mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-07 15:19:59 +08:00
增加混合语音分切方式,解决无法混合推理的问题,并支持分词优化让并行推理加快推理速度
This commit is contained in:
parent
ed75ecdd6d
commit
67d6243229
@ -146,9 +146,123 @@ def cut5(inp):
|
||||
opt = "\n".join(mergeitems)
|
||||
return opt
|
||||
|
||||
def num_to_chinese(num):
|
||||
chinese_nums = {
|
||||
0: '零', 1: '一', 2: '二', 3: '三', 4: '四', 5: '五', 6: '六', 7: '七', 8: '八', 9: '九',
|
||||
}
|
||||
units = ['', '十', '百', '千', '万', '亿']
|
||||
num_str = str(num)
|
||||
num_str_rev = num_str[::-1]
|
||||
result = ''
|
||||
for i, digit in enumerate(num_str_rev):
|
||||
if i == 0 and digit == '0':
|
||||
continue
|
||||
if i > 0 and digit == '0' and result[0] != '零':
|
||||
result = '零' + result
|
||||
digit_chinese = chinese_nums[int(digit)]
|
||||
unit = units[i % 4]
|
||||
if i % 4 == 0:
|
||||
unit = units[i % 4 + int(i / 4)]
|
||||
result = digit_chinese + unit + result
|
||||
return result
|
||||
|
||||
# 支持语言混合切,按照约10个字一组,拆分更多的文本支持batch并行推理
|
||||
@register_method("mixed_cut")
|
||||
def mixed_cut(inp):
|
||||
def re_exp_japenese_char():
|
||||
#日文中带有中文字符的情况,依赖short合并把中文合并到上一个日文分组中
|
||||
return '[\u3040-\u30FF\uFF66-\uFF9D]'
|
||||
def re_exp_chinese_char():
|
||||
return '[\u4e00-\u9fa5]'
|
||||
def re_exp_alpha():
|
||||
return '[a-zA-Z]'
|
||||
def re_exp_digit():
|
||||
return '[0-9]'
|
||||
bad_case_ignore = [ "...","~","——","……" ]
|
||||
for ss in bad_case_ignore:
|
||||
inp = inp.replace(ss, "。")
|
||||
result = []
|
||||
last_s = ""
|
||||
last_c_type = ""
|
||||
#按连续字符进行分组
|
||||
for char in inp:
|
||||
c_type = "unknow"
|
||||
if char == " ":
|
||||
last_s += char
|
||||
continue
|
||||
elif re.match(re_exp_japenese_char(), char):
|
||||
c_type="jps"
|
||||
elif re.match(re_exp_chinese_char(), char):
|
||||
c_type="hans"
|
||||
elif re.match(re_exp_alpha(), char):
|
||||
c_type="alpha"
|
||||
elif re.match(re_exp_digit(), char):
|
||||
c_type="digit"
|
||||
if (c_type != last_c_type and c_type != "unknow" and len(last_c_type) > 0):
|
||||
result.append(last_s)
|
||||
last_s = ""
|
||||
last_s += char
|
||||
if c_type != "unknow":
|
||||
last_c_type = c_type
|
||||
elif len(last_s) > 10:
|
||||
result.append(last_s)
|
||||
last_s = ""
|
||||
result.append(last_s)
|
||||
|
||||
def s_type(s):
|
||||
if len(s) > 0:
|
||||
if re.compile(re_exp_japenese_char()).search(s) is not None:
|
||||
return "jps"
|
||||
elif re.compile(re_exp_chinese_char()).search(s) is not None:
|
||||
return "hans"
|
||||
elif re.compile(re_exp_alpha()).search(s) is not None:
|
||||
return "alpha"
|
||||
elif re.compile(re_exp_digit()).search(s) is not None:
|
||||
return "digit"
|
||||
return "unknow"
|
||||
#数组合并至前项,并支持念出中文数字
|
||||
new_result = []
|
||||
n = 0
|
||||
while n < len(result):
|
||||
this_s = result[n]
|
||||
this_s_type = s_type(this_s)
|
||||
before_s = ""
|
||||
if n > 0:
|
||||
before_s = result[n-1]
|
||||
before_s_type = s_type(this_s)
|
||||
next_s = ""
|
||||
if n < (len(result)-1):
|
||||
next_s = result[n+1]
|
||||
next_s_type = s_type(this_s)
|
||||
if this_s_type == "digit":
|
||||
if before_s == "":
|
||||
new_result.append(before_s)
|
||||
if before_s_type == "hans" or next_s_type == "hans":
|
||||
ss = num_to_chinese(this_s)
|
||||
else:
|
||||
ss = this_s
|
||||
if before_s == "" or before_s_type == next_s_type:
|
||||
ss += next_s
|
||||
n+=1
|
||||
new_result[len(new_result)-1]+=ss
|
||||
else:
|
||||
new_result.append(this_s)
|
||||
n+=1
|
||||
opt = "\n".join(new_result)
|
||||
return opt
|
||||
|
||||
if __name__ == '__main__':
|
||||
method = get_method("cut5")
|
||||
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。"))
|
||||
method = get_method("mixed_cut")
|
||||
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。") + "\n===\n")
|
||||
print(method("你好,我是小明") + "\n===\n")
|
||||
print(method("12345") + "\n===\n")
|
||||
print(method("123,不许动") + "\n===\n")
|
||||
print(method("你好,我是小明。我今年20岁了") + "\n===\n")
|
||||
print(method("你好,我是Maxwell, nice to meet you") + "\n===\n")
|
||||
print(method("你好,我是Maxwell。我今年20岁了") + "\n===\n")
|
||||
print(method("你好,我是小明。こんにちは、シャオミンです。") + "\n===\n")
|
||||
print(method("こんにちは、シャオミンです。 今年で20周年") + "\n===\n")
|
||||
print(method("こんにちは、シャオミンです。 今年で20周年, nice to meet you") + "\n===\n")
|
||||
print(method("こんにちは、シャオミンです。nice to meet you") + "\n===\n")
|
||||
print(method("Hello, I am Maxwell. 20 years old,中文名叫小明") + "\n===\n")
|
||||
|
||||
|
@ -67,6 +67,7 @@ cut_method = {
|
||||
i18n("按中文句号。切"): "cut3",
|
||||
i18n("按英文句号.切"): "cut4",
|
||||
i18n("按标点符号切"): "cut5",
|
||||
i18n("语言混合切字"): "mixed_cut",
|
||||
}
|
||||
|
||||
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
|
||||
|
Loading…
x
Reference in New Issue
Block a user