增加混合语音分切方式，解决无法混合推理的问题，并支持分词优化让并行推理加快推理速度

2026-06-05 22:08:15 +08:00 · 2024-03-26 16:40:58 +08:00 · 2024-03-26 16:40:58 +08:00 · 67d6243229
commit 67d6243229
parent ed75ecdd6d
2 changed files with 117 additions and 2 deletions
--- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
+++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
@ -146,9 +146,123 @@ def cut5(inp):
    opt = "\n".join(mergeitems)
    return opt

+def num_to_chinese(num):
+    chinese_nums = {
+        0: '零', 1: '一', 2: '二', 3: '三', 4: '四', 5: '五', 6: '六', 7: '七', 8: '八', 9: '九',
+    }
+    units = ['', '十', '百', '千', '万', '亿']
+    num_str = str(num)
+    num_str_rev = num_str[::-1]
+    result = ''
+    for i, digit in enumerate(num_str_rev):
+        if i == 0 and digit == '0':
+            continue
+        if i > 0 and digit == '0' and result[0] != '零':
+            result = '零' + result
+        digit_chinese = chinese_nums[int(digit)]
+        unit = units[i % 4]
+        if i % 4 == 0:
+            unit = units[i % 4 + int(i / 4)]
+        result = digit_chinese + unit + result
+    return result

+# 支持语言混合切，按照约10个字一组，拆分更多的文本支持batch并行推理
+@register_method("mixed_cut")
+def mixed_cut(inp):
+    def re_exp_japenese_char():
+        #日文中带有中文字符的情况，依赖short合并把中文合并到上一个日文分组中
+        return '[\u3040-\u30FF\uFF66-\uFF9D]'
+    def re_exp_chinese_char():
+        return '[\u4e00-\u9fa5]'
+    def re_exp_alpha():
+        return '[a-zA-Z]'
+    def re_exp_digit():
+        return '[0-9]'
+    bad_case_ignore = [ "...","~","——","……" ]
+    for ss in bad_case_ignore:
+        inp = inp.replace(ss, "。")
+    result = []
+    last_s = ""
+    last_c_type = ""
+    #按连续字符进行分组
+    for char in inp:
+        c_type = "unknow"
+        if char == " ":
+            last_s += char
+            continue
+        elif re.match(re_exp_japenese_char(), char):
+            c_type="jps"
+        elif re.match(re_exp_chinese_char(), char):
+            c_type="hans"
+        elif re.match(re_exp_alpha(), char):
+            c_type="alpha"
+        elif re.match(re_exp_digit(), char):
+            c_type="digit"
+        if (c_type != last_c_type and c_type != "unknow" and len(last_c_type) > 0):
+            result.append(last_s)
+            last_s = ""
+        last_s += char
+        if c_type != "unknow":
+            last_c_type = c_type
+        elif len(last_s) > 10:
+            result.append(last_s)
+            last_s = ""
+    result.append(last_s)
+
+    def s_type(s):
+        if len(s) > 0:
+            if re.compile(re_exp_japenese_char()).search(s) is not None:
+                return "jps"
+            elif re.compile(re_exp_chinese_char()).search(s) is not None:
+                return "hans"
+            elif re.compile(re_exp_alpha()).search(s) is not None:
+                return "alpha"
+            elif re.compile(re_exp_digit()).search(s) is not None:
+                return "digit"
+        return "unknow"    
+    #数组合并至前项，并支持念出中文数字
+    new_result = []
+    n = 0
+    while n < len(result):
+        this_s = result[n]
+        this_s_type = s_type(this_s)
+        before_s = ""
+        if n > 0:
+            before_s = result[n-1]
+        before_s_type = s_type(this_s)
+        next_s = ""
+        if n < (len(result)-1):
+            next_s = result[n+1]
+        next_s_type = s_type(this_s)
+        if this_s_type == "digit":
+            if before_s == "":
+                new_result.append(before_s)
+            if before_s_type == "hans" or next_s_type == "hans":
+                ss = num_to_chinese(this_s)
+            else:
+                ss = this_s
+            if before_s == "" or before_s_type == next_s_type:
+                ss += next_s
+                n+=1
+            new_result[len(new_result)-1]+=ss
+        else:
+            new_result.append(this_s)
+        n+=1
+    opt = "\n".join(new_result)
+    return opt

 if __name__ == '__main__':
-    method = get_method("cut5")
-    print(method("你好，我是小明。你好，我是小红。你好，我是小刚。你好，我是小张。"))
+    method = get_method("mixed_cut")
+    print(method("你好，我是小明。你好，我是小红。你好，我是小刚。你好，我是小张。") + "\n===\n")
+    print(method("你好，我是小明") + "\n===\n")
+    print(method("12345") + "\n===\n")
+    print(method("123，不许动") + "\n===\n")
+    print(method("你好，我是小明。我今年20岁了") + "\n===\n")
+    print(method("你好，我是Maxwell, nice to meet you") + "\n===\n")
+    print(method("你好，我是Maxwell。我今年20岁了") + "\n===\n")
+    print(method("你好，我是小明。こんにちは、シャオミンです。") + "\n===\n")
+    print(method("こんにちは、シャオミンです。 今年で20周年") + "\n===\n")
+    print(method("こんにちは、シャオミンです。 今年で20周年， nice to meet you") + "\n===\n")
+    print(method("こんにちは、シャオミンです。nice to meet you") + "\n===\n")
+    print(method("Hello, I am Maxwell. 20 years old，中文名叫小明") + "\n===\n")
    
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -67,6 +67,7 @@ cut_method = {
    i18n("按中文句号。切"): "cut3",
    i18n("按英文句号.切"): "cut4",
    i18n("按标点符号切"): "cut5",
+    i18n("语言混合切字"): "mixed_cut",
 }

 tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")