增加混合语音分切方式,解决无法混合推理的问题,并支持分词优化让并行推理加快推理速度

This commit is contained in:
DESKTOP-030C45B\123 2024-03-26 16:40:58 +08:00
parent ed75ecdd6d
commit 67d6243229
2 changed files with 117 additions and 2 deletions

View File

@ -146,9 +146,123 @@ def cut5(inp):
opt = "\n".join(mergeitems)
return opt
def num_to_chinese(num):
chinese_nums = {
0: '', 1: '', 2: '', 3: '', 4: '', 5: '', 6: '', 7: '', 8: '', 9: '',
}
units = ['', '', '', '', '', '亿']
num_str = str(num)
num_str_rev = num_str[::-1]
result = ''
for i, digit in enumerate(num_str_rev):
if i == 0 and digit == '0':
continue
if i > 0 and digit == '0' and result[0] != '':
result = '' + result
digit_chinese = chinese_nums[int(digit)]
unit = units[i % 4]
if i % 4 == 0:
unit = units[i % 4 + int(i / 4)]
result = digit_chinese + unit + result
return result
# 支持语言混合切按照约10个字一组拆分更多的文本支持batch并行推理
@register_method("mixed_cut")
def mixed_cut(inp):
def re_exp_japenese_char():
#日文中带有中文字符的情况依赖short合并把中文合并到上一个日文分组中
return '[\u3040-\u30FF\uFF66-\uFF9D]'
def re_exp_chinese_char():
return '[\u4e00-\u9fa5]'
def re_exp_alpha():
return '[a-zA-Z]'
def re_exp_digit():
return '[0-9]'
bad_case_ignore = [ "...","~","——","……" ]
for ss in bad_case_ignore:
inp = inp.replace(ss, "")
result = []
last_s = ""
last_c_type = ""
#按连续字符进行分组
for char in inp:
c_type = "unknow"
if char == " ":
last_s += char
continue
elif re.match(re_exp_japenese_char(), char):
c_type="jps"
elif re.match(re_exp_chinese_char(), char):
c_type="hans"
elif re.match(re_exp_alpha(), char):
c_type="alpha"
elif re.match(re_exp_digit(), char):
c_type="digit"
if (c_type != last_c_type and c_type != "unknow" and len(last_c_type) > 0):
result.append(last_s)
last_s = ""
last_s += char
if c_type != "unknow":
last_c_type = c_type
elif len(last_s) > 10:
result.append(last_s)
last_s = ""
result.append(last_s)
def s_type(s):
if len(s) > 0:
if re.compile(re_exp_japenese_char()).search(s) is not None:
return "jps"
elif re.compile(re_exp_chinese_char()).search(s) is not None:
return "hans"
elif re.compile(re_exp_alpha()).search(s) is not None:
return "alpha"
elif re.compile(re_exp_digit()).search(s) is not None:
return "digit"
return "unknow"
#数组合并至前项,并支持念出中文数字
new_result = []
n = 0
while n < len(result):
this_s = result[n]
this_s_type = s_type(this_s)
before_s = ""
if n > 0:
before_s = result[n-1]
before_s_type = s_type(this_s)
next_s = ""
if n < (len(result)-1):
next_s = result[n+1]
next_s_type = s_type(this_s)
if this_s_type == "digit":
if before_s == "":
new_result.append(before_s)
if before_s_type == "hans" or next_s_type == "hans":
ss = num_to_chinese(this_s)
else:
ss = this_s
if before_s == "" or before_s_type == next_s_type:
ss += next_s
n+=1
new_result[len(new_result)-1]+=ss
else:
new_result.append(this_s)
n+=1
opt = "\n".join(new_result)
return opt
if __name__ == '__main__':
method = get_method("cut5")
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。"))
method = get_method("mixed_cut")
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。") + "\n===\n")
print(method("你好,我是小明") + "\n===\n")
print(method("12345") + "\n===\n")
print(method("123不许动") + "\n===\n")
print(method("你好我是小明。我今年20岁了") + "\n===\n")
print(method("你好我是Maxwell, nice to meet you") + "\n===\n")
print(method("你好我是Maxwell。我今年20岁了") + "\n===\n")
print(method("你好,我是小明。こんにちは、シャオミンです。") + "\n===\n")
print(method("こんにちは、シャオミンです。 今年で20周年") + "\n===\n")
print(method("こんにちは、シャオミンです。 今年で20周年 nice to meet you") + "\n===\n")
print(method("こんにちは、シャオミンです。nice to meet you") + "\n===\n")
print(method("Hello, I am Maxwell. 20 years old中文名叫小明") + "\n===\n")

View File

@ -67,6 +67,7 @@ cut_method = {
i18n("按中文句号。切"): "cut3",
i18n("按英文句号.切"): "cut4",
i18n("按标点符号切"): "cut5",
i18n("语言混合切字"): "mixed_cut",
}
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")