mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-08 07:49:59 +08:00
增加混合语音分切方式,解决无法混合推理的问题,并支持分词优化让并行推理加快推理速度
This commit is contained in:
parent
ed75ecdd6d
commit
67d6243229
@ -146,9 +146,123 @@ def cut5(inp):
|
|||||||
opt = "\n".join(mergeitems)
|
opt = "\n".join(mergeitems)
|
||||||
return opt
|
return opt
|
||||||
|
|
||||||
|
def num_to_chinese(num):
|
||||||
|
chinese_nums = {
|
||||||
|
0: '零', 1: '一', 2: '二', 3: '三', 4: '四', 5: '五', 6: '六', 7: '七', 8: '八', 9: '九',
|
||||||
|
}
|
||||||
|
units = ['', '十', '百', '千', '万', '亿']
|
||||||
|
num_str = str(num)
|
||||||
|
num_str_rev = num_str[::-1]
|
||||||
|
result = ''
|
||||||
|
for i, digit in enumerate(num_str_rev):
|
||||||
|
if i == 0 and digit == '0':
|
||||||
|
continue
|
||||||
|
if i > 0 and digit == '0' and result[0] != '零':
|
||||||
|
result = '零' + result
|
||||||
|
digit_chinese = chinese_nums[int(digit)]
|
||||||
|
unit = units[i % 4]
|
||||||
|
if i % 4 == 0:
|
||||||
|
unit = units[i % 4 + int(i / 4)]
|
||||||
|
result = digit_chinese + unit + result
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 支持语言混合切,按照约10个字一组,拆分更多的文本支持batch并行推理
|
||||||
|
@register_method("mixed_cut")
|
||||||
|
def mixed_cut(inp):
|
||||||
|
def re_exp_japenese_char():
|
||||||
|
#日文中带有中文字符的情况,依赖short合并把中文合并到上一个日文分组中
|
||||||
|
return '[\u3040-\u30FF\uFF66-\uFF9D]'
|
||||||
|
def re_exp_chinese_char():
|
||||||
|
return '[\u4e00-\u9fa5]'
|
||||||
|
def re_exp_alpha():
|
||||||
|
return '[a-zA-Z]'
|
||||||
|
def re_exp_digit():
|
||||||
|
return '[0-9]'
|
||||||
|
bad_case_ignore = [ "...","~","——","……" ]
|
||||||
|
for ss in bad_case_ignore:
|
||||||
|
inp = inp.replace(ss, "。")
|
||||||
|
result = []
|
||||||
|
last_s = ""
|
||||||
|
last_c_type = ""
|
||||||
|
#按连续字符进行分组
|
||||||
|
for char in inp:
|
||||||
|
c_type = "unknow"
|
||||||
|
if char == " ":
|
||||||
|
last_s += char
|
||||||
|
continue
|
||||||
|
elif re.match(re_exp_japenese_char(), char):
|
||||||
|
c_type="jps"
|
||||||
|
elif re.match(re_exp_chinese_char(), char):
|
||||||
|
c_type="hans"
|
||||||
|
elif re.match(re_exp_alpha(), char):
|
||||||
|
c_type="alpha"
|
||||||
|
elif re.match(re_exp_digit(), char):
|
||||||
|
c_type="digit"
|
||||||
|
if (c_type != last_c_type and c_type != "unknow" and len(last_c_type) > 0):
|
||||||
|
result.append(last_s)
|
||||||
|
last_s = ""
|
||||||
|
last_s += char
|
||||||
|
if c_type != "unknow":
|
||||||
|
last_c_type = c_type
|
||||||
|
elif len(last_s) > 10:
|
||||||
|
result.append(last_s)
|
||||||
|
last_s = ""
|
||||||
|
result.append(last_s)
|
||||||
|
|
||||||
|
def s_type(s):
|
||||||
|
if len(s) > 0:
|
||||||
|
if re.compile(re_exp_japenese_char()).search(s) is not None:
|
||||||
|
return "jps"
|
||||||
|
elif re.compile(re_exp_chinese_char()).search(s) is not None:
|
||||||
|
return "hans"
|
||||||
|
elif re.compile(re_exp_alpha()).search(s) is not None:
|
||||||
|
return "alpha"
|
||||||
|
elif re.compile(re_exp_digit()).search(s) is not None:
|
||||||
|
return "digit"
|
||||||
|
return "unknow"
|
||||||
|
#数组合并至前项,并支持念出中文数字
|
||||||
|
new_result = []
|
||||||
|
n = 0
|
||||||
|
while n < len(result):
|
||||||
|
this_s = result[n]
|
||||||
|
this_s_type = s_type(this_s)
|
||||||
|
before_s = ""
|
||||||
|
if n > 0:
|
||||||
|
before_s = result[n-1]
|
||||||
|
before_s_type = s_type(this_s)
|
||||||
|
next_s = ""
|
||||||
|
if n < (len(result)-1):
|
||||||
|
next_s = result[n+1]
|
||||||
|
next_s_type = s_type(this_s)
|
||||||
|
if this_s_type == "digit":
|
||||||
|
if before_s == "":
|
||||||
|
new_result.append(before_s)
|
||||||
|
if before_s_type == "hans" or next_s_type == "hans":
|
||||||
|
ss = num_to_chinese(this_s)
|
||||||
|
else:
|
||||||
|
ss = this_s
|
||||||
|
if before_s == "" or before_s_type == next_s_type:
|
||||||
|
ss += next_s
|
||||||
|
n+=1
|
||||||
|
new_result[len(new_result)-1]+=ss
|
||||||
|
else:
|
||||||
|
new_result.append(this_s)
|
||||||
|
n+=1
|
||||||
|
opt = "\n".join(new_result)
|
||||||
|
return opt
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
method = get_method("cut5")
|
method = get_method("mixed_cut")
|
||||||
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。"))
|
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。") + "\n===\n")
|
||||||
|
print(method("你好,我是小明") + "\n===\n")
|
||||||
|
print(method("12345") + "\n===\n")
|
||||||
|
print(method("123,不许动") + "\n===\n")
|
||||||
|
print(method("你好,我是小明。我今年20岁了") + "\n===\n")
|
||||||
|
print(method("你好,我是Maxwell, nice to meet you") + "\n===\n")
|
||||||
|
print(method("你好,我是Maxwell。我今年20岁了") + "\n===\n")
|
||||||
|
print(method("你好,我是小明。こんにちは、シャオミンです。") + "\n===\n")
|
||||||
|
print(method("こんにちは、シャオミンです。 今年で20周年") + "\n===\n")
|
||||||
|
print(method("こんにちは、シャオミンです。 今年で20周年, nice to meet you") + "\n===\n")
|
||||||
|
print(method("こんにちは、シャオミンです。nice to meet you") + "\n===\n")
|
||||||
|
print(method("Hello, I am Maxwell. 20 years old,中文名叫小明") + "\n===\n")
|
||||||
|
|
||||||
|
@ -67,6 +67,7 @@ cut_method = {
|
|||||||
i18n("按中文句号。切"): "cut3",
|
i18n("按中文句号。切"): "cut3",
|
||||||
i18n("按英文句号.切"): "cut4",
|
i18n("按英文句号.切"): "cut4",
|
||||||
i18n("按标点符号切"): "cut5",
|
i18n("按标点符号切"): "cut5",
|
||||||
|
i18n("语言混合切字"): "mixed_cut",
|
||||||
}
|
}
|
||||||
|
|
||||||
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
|
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user