Add more polyphonic and merge_erhua

2026-06-05 22:08:15 +08:00 · 2024-03-13 21:54:45 +08:00 · 2024-03-13 21:54:45 +08:00 · 401bf3a04b
commit 401bf3a04b
parent c31126f674
5 changed files with 45085 additions and 23 deletions
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@ -86,6 +86,54 @@ def _get_initials_finals(word):
    return initials, finals
 must_erhua = {
    "小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
 }
 not_erhua = {
    "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
    "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
    "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
    "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
    "狗儿", "少儿"
 }
 def _merge_erhua(initials: list[str],
                finals: list[str],
                word: str,
                pos: str) -> list[list[str]]:
    """
    Do erhub.
    """
    # fix er1
    for i, phn in enumerate(finals):
        if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
            finals[i] = 'er2'
    # 发音
    if word not in must_erhua and (word in not_erhua or
                                        pos in {"a", "j", "nr"}):
        return initials, finals
    # "……" 等情况直接返回
    if len(finals) != len(word):
        return initials, finals
    assert len(finals) == len(word)
    # 与前一个字发同音
    new_initials = []
    new_finals = []
    for i, phn in enumerate(finals):
        if i == len(finals) - 1 and word[i] == "儿" and phn in {
                "er2", "er5"
        } and word[-2:] not in not_erhua and new_finals:
            phn = "er" + new_finals[-1][-1]
        new_initials.append(initials[i])
        new_finals.append(phn)
    return new_initials, new_finals
 def _g2p(segments):
    phones_list = []
    word2ph = []
@ -104,6 +152,8 @@ def _g2p(segments):
                    continue
                sub_initials, sub_finals = _get_initials_finals(word)
                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
                # 儿化
                sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
                initials.append(sub_initials)
                finals.append(sub_finals)
                # assert len(sub_initials) == len(sub_finals) == len(word)
@ -139,6 +189,8 @@ def _g2p(segments):
                pre_word_length = now_word_length
                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
                # 儿化
                sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
                initials.append(sub_initials)
                finals.append(sub_finals)
--- a/GPT_SoVITS/text/g2pw/g2pw.py
+++ b/GPT_SoVITS/text/g2pw/g2pw.py
@ -13,6 +13,7 @@ from .onnx_api import G2PWOnnxConverter
 current_file_path = os.path.dirname(__file__)
 CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
 PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
 PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")
 class G2PWPinyin(Pinyin):
@ -133,6 +134,13 @@ def read_dict():
            value = eval(value_str.strip())
            polyphonic_dict[key.strip()] = value
            line = f.readline()
    with open(PP_FIX_DICT_PATH) as f:
        line = f.readline()
        while line:
            key, value_str = line.split(':')
            value = eval(value_str.strip())
            polyphonic_dict[key.strip()] = value
            line = f.readline()
    return polyphonic_dict
--- a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep
+++ b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep
--- a/GPT_SoVITS/text/g2pw/polyphonic.pickle
+++ b/GPT_SoVITS/text/g2pw/polyphonic.pickle
--- a/GPT_SoVITS/text/g2pw/polyphonic.rep
+++ b/GPT_SoVITS/text/g2pw/polyphonic.rep
@ -51,25 +51,3 @@
 郭晟: ['guo1', 'sheng4']
 中标: ['zhong4', 'biao1']
 抗住: ['kang2', 'zhu4']
 果脯: ['guo3', 'fu3']
 肉脯: ['rou4', 'fu3']
 不粘锅: ['bu4', 'zhan1', 'guo1']
 粘信封: ['zhan1', 'xin4', 'feng1']
 粘牙: ['zhan1', 'ya2']
 粘皮带骨: ['zhan1', 'pi2' 'dai4' 'gu3']
 粘贴: ['zhan1', 'tie1']
 粘连: ['zhan1', 'lian2']
 一扎: ['yī', 'zā']
 包扎: ['bāo', 'zā']
 安营扎寨: ['ān', 'yíng', 'zhā', 'zhài']
 屯扎: ['tún', 'zhā']
 巴尔扎克: ['bā', 'ěr', 'zhā', 'kè']
 扎染: ['zā', 'rǎn']
 扎根: ['zhā', 'gēn']
 扎根串连: ['zhā', 'gēn', 'chuàn', 'lián']
 扎破: ['zhá', 'pò']
 扎营: ['zhā', 'yíng']
 结扎: ['jie2', 'za1']
 绑扎: ['bang3', 'za1']
 长进: ['zhǎng', 'jìn']
 折本: ['shé', 'běn']