GPT-SoVITS/trans_utils.py
刘悦 66341adf55
添加手动语义字幕语音切分工具(多角色)
添加手动语义字幕语音切分工具(多角色)
2024-02-03 11:48:21 +08:00

82 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

PUNC_LIST = ['', '', '', '', '']
def pre_proc(text):
res = ''
for i in range(len(text)):
if text[i] in PUNC_LIST:
continue
if '\u4e00' <= text[i] <= '\u9fff':
if len(res) and res[-1] != " ":
res += ' ' + text[i]+' '
else:
res += text[i]+' '
else:
res += text[i]
if res[-1] == ' ':
res = res[:-1]
return res
def proc(raw_text, timestamp, dest_text):
# simple matching
ld = len(dest_text.split())
mi, ts = [], []
offset = 0
while True:
fi = raw_text.find(dest_text, offset, len(raw_text))
# import pdb; pdb.set_trace()
ti = raw_text[:fi].count(' ')
if fi == -1:
break
offset = fi + ld
mi.append(fi)
ts.append([timestamp[ti][0]*16, timestamp[ti+ld-1][1]*16])
# import pdb; pdb.set_trace()
return ts
def proc_spk(dest_spk, sd_sentences):
ts = []
for d in sd_sentences:
d_start = d['ts_list'][0][0]
d_end = d['ts_list'][-1][1]
spkid=dest_spk[3:]
if str(d['spk']) == spkid and d_end-d_start>999:
ts.append([d['start']*16, d['end']*16])
return ts
def generate_vad_data(data, sd_sentences, sr=16000):
assert len(data.shape) == 1
vad_data = []
for d in sd_sentences:
d_start = round(d['ts_list'][0][0]/1000, 2)
d_end = round(d['ts_list'][-1][1]/1000, 2)
vad_data.append([d_start, d_end, data[int(d_start * sr):int(d_end * sr)]])
return vad_data
def write_state(output_dir, state):
for key in ['/recog_res_raw', '/timestamp', '/sentences', '/sd_sentences']:
with open(output_dir+key, 'w') as fout:
fout.write(str(state[key[1:]]))
if 'sd_sentences' in state:
with open(output_dir+'/sd_sentences', 'w') as fout:
fout.write(str(state['sd_sentences']))
import os
def load_state(output_dir):
state = {}
with open(output_dir+'/recog_res_raw') as fin:
line = fin.read()
state['recog_res_raw'] = line
with open(output_dir+'/timestamp') as fin:
line = fin.read()
state['timestamp'] = eval(line)
with open(output_dir+'/sentences') as fin:
line = fin.read()
state['sentences'] = eval(line)
if os.path.exists(output_dir+'/sd_sentences'):
with open(output_dir+'/sd_sentences') as fin:
line = fin.read()
state['sd_sentences'] = eval(line)
return state