增加混合语音分切方式，解决无法混合推理的问题，并支持分词优化让并行推理加快推理速度

2025-10-08 07:49:59 +08:00 · 2024-03-26 16:40:58 +08:00 · 2024-03-26 16:40:58 +08:00 · 67d6243229
commit 67d6243229
parent ed75ecdd6d
2 changed files with 117 additions and 2 deletions
--- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
+++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
@ -146,9 +146,123 @@ def cut5(inp):
    opt = "\n".join(mergeitems)
    return opt
 def num_to_chinese(num):
    chinese_nums = {
        0: '零', 1: '一', 2: '二', 3: '三', 4: '四', 5: '五', 6: '六', 7: '七', 8: '八', 9: '九',
    }
    units = ['', '十', '百', '千', '万', '亿']
    num_str = str(num)
    num_str_rev = num_str[::-1]
    result = ''
    for i, digit in enumerate(num_str_rev):
        if i == 0 and digit == '0':
            continue
        if i > 0 and digit == '0' and result[0] != '零':
            result = '零' + result
        digit_chinese = chinese_nums[int(digit)]
        unit = units[i % 4]
        if i % 4 == 0:
            unit = units[i % 4 + int(i / 4)]
        result = digit_chinese + unit + result
    return result
 # 支持语言混合切，按照约10个字一组，拆分更多的文本支持batch并行推理
@register_method("mixed_cut")
 def mixed_cut(inp):
    def re_exp_japenese_char():
        #日文中带有中文字符的情况，依赖short合并把中文合并到上一个日文分组中
        return '[\u3040-\u30FF\uFF66-\uFF9D]'
    def re_exp_chinese_char():
        return '[\u4e00-\u9fa5]'
    def re_exp_alpha():
        return '[a-zA-Z]'
    def re_exp_digit():
        return '[0-9]'
    bad_case_ignore = [ "...","~","——","……" ]
    for ss in bad_case_ignore:
        inp = inp.replace(ss, "。")
    result = []
    last_s = ""
    last_c_type = ""
    #按连续字符进行分组
    for char in inp:
        c_type = "unknow"
        if char == " ":
            last_s += char
            continue
        elif re.match(re_exp_japenese_char(), char):
            c_type="jps"
        elif re.match(re_exp_chinese_char(), char):
            c_type="hans"
        elif re.match(re_exp_alpha(), char):
            c_type="alpha"
        elif re.match(re_exp_digit(), char):
            c_type="digit"
        if (c_type != last_c_type and c_type != "unknow" and len(last_c_type) > 0):
            result.append(last_s)
            last_s = ""
        last_s += char
        if c_type != "unknow":
            last_c_type = c_type
        elif len(last_s) > 10:
            result.append(last_s)
            last_s = ""
    result.append(last_s)
    def s_type(s):
        if len(s) > 0:
            if re.compile(re_exp_japenese_char()).search(s) is not None:
                return "jps"
            elif re.compile(re_exp_chinese_char()).search(s) is not None:
                return "hans"
            elif re.compile(re_exp_alpha()).search(s) is not None:
                return "alpha"
            elif re.compile(re_exp_digit()).search(s) is not None:
                return "digit"
        return "unknow"    
    #数组合并至前项，并支持念出中文数字
    new_result = []
    n = 0
    while n < len(result):
        this_s = result[n]
        this_s_type = s_type(this_s)
        before_s = ""
        if n > 0:
            before_s = result[n-1]
        before_s_type = s_type(this_s)
        next_s = ""
        if n < (len(result)-1):
            next_s = result[n+1]
        next_s_type = s_type(this_s)
        if this_s_type == "digit":
            if before_s == "":
                new_result.append(before_s)
            if before_s_type == "hans" or next_s_type == "hans":
                ss = num_to_chinese(this_s)
            else:
                ss = this_s
            if before_s == "" or before_s_type == next_s_type:
                ss += next_s
                n+=1
            new_result[len(new_result)-1]+=ss
        else:
            new_result.append(this_s)
        n+=1
    opt = "\n".join(new_result)
    return opt
 if __name__ == '__main__':
-    method = get_method("cut5")
+    method = get_method("mixed_cut")
-    print(method("你好，我是小明。你好，我是小红。你好，我是小刚。你好，我是小张。"))
+    print(method("你好，我是小明。你好，我是小红。你好，我是小刚。你好，我是小张。") + "\n===\n")
    print(method("你好，我是小明") + "\n===\n")
    print(method("12345") + "\n===\n")
    print(method("123，不许动") + "\n===\n")
    print(method("你好，我是小明。我今年20岁了") + "\n===\n")
    print(method("你好，我是Maxwell, nice to meet you") + "\n===\n")
    print(method("你好，我是Maxwell。我今年20岁了") + "\n===\n")
    print(method("你好，我是小明。こんにちは、シャオミンです。") + "\n===\n")
    print(method("こんにちは、シャオミンです。 今年で20周年") + "\n===\n")
    print(method("こんにちは、シャオミンです。 今年で20周年， nice to meet you") + "\n===\n")
    print(method("こんにちは、シャオミンです。nice to meet you") + "\n===\n")
    print(method("Hello, I am Maxwell. 20 years old，中文名叫小明") + "\n===\n")
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -67,6 +67,7 @@ cut_method = {
    i18n("按中文句号。切"): "cut3",
    i18n("按英文句号.切"): "cut4",
    i18n("按标点符号切"): "cut5",
    i18n("语言混合切字"): "mixed_cut",
 }
 tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")