Add more polyphonic and merge_erhua

2025-10-08 16:00:01 +08:00 · 2024-03-13 21:54:45 +08:00 · 2024-03-13 21:54:45 +08:00 · 401bf3a04b
commit 401bf3a04b
parent c31126f674
5 changed files with 45085 additions and 23 deletions
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@ -86,6 +86,54 @@ def _get_initials_finals(word):
    return initials, finals


+must_erhua = {
+    "小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
+}
+not_erhua = {
+    "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
+    "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
+    "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
+    "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
+    "狗儿", "少儿"
+}
+def _merge_erhua(initials: list[str],
+                finals: list[str],
+                word: str,
+                pos: str) -> list[list[str]]:
+    """
+    Do erhub.
+    """
+    # fix er1
+    for i, phn in enumerate(finals):
+        if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
+            finals[i] = 'er2'
+
+    # 发音
+    if word not in must_erhua and (word in not_erhua or
+                                        pos in {"a", "j", "nr"}):
+        return initials, finals
+
+    # "……" 等情况直接返回
+    if len(finals) != len(word):
+        return initials, finals
+
+    assert len(finals) == len(word)
+
+    # 与前一个字发同音
+    new_initials = []
+    new_finals = []
+    for i, phn in enumerate(finals):
+        if i == len(finals) - 1 and word[i] == "儿" and phn in {
+                "er2", "er5"
+        } and word[-2:] not in not_erhua and new_finals:
+            phn = "er" + new_finals[-1][-1]
+
+        new_initials.append(initials[i])
+        new_finals.append(phn)
+
+    return new_initials, new_finals
+
+
 def _g2p(segments):
    phones_list = []
    word2ph = []
@ -104,6 +152,8 @@ def _g2p(segments):
                    continue
                sub_initials, sub_finals = _get_initials_finals(word)
                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
+                # 儿化
+                sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
                initials.append(sub_initials)
                finals.append(sub_finals)
                # assert len(sub_initials) == len(sub_finals) == len(word)
@ -139,6 +189,8 @@ def _g2p(segments):

                pre_word_length = now_word_length
                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
+                # 儿化
+                sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
                initials.append(sub_initials)
                finals.append(sub_finals)

--- a/GPT_SoVITS/text/g2pw/g2pw.py
+++ b/GPT_SoVITS/text/g2pw/g2pw.py
@ -13,6 +13,7 @@ from .onnx_api import G2PWOnnxConverter
 current_file_path = os.path.dirname(__file__)
 CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
 PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
+PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")


 class G2PWPinyin(Pinyin):
@ -133,6 +134,13 @@ def read_dict():
            value = eval(value_str.strip())
            polyphonic_dict[key.strip()] = value
            line = f.readline()
+    with open(PP_FIX_DICT_PATH) as f:
+        line = f.readline()
+        while line:
+            key, value_str = line.split(':')
+            value = eval(value_str.strip())
+            polyphonic_dict[key.strip()] = value
+            line = f.readline()
    return polyphonic_dict


--- a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep
+++ b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep
--- a/GPT_SoVITS/text/g2pw/polyphonic.pickle
+++ b/GPT_SoVITS/text/g2pw/polyphonic.pickle
--- a/GPT_SoVITS/text/g2pw/polyphonic.rep
+++ b/GPT_SoVITS/text/g2pw/polyphonic.rep
@ -51,25 +51,3 @@
 郭晟: ['guo1', 'sheng4']
 中标: ['zhong4', 'biao1']
 抗住: ['kang2', 'zhu4']
-果脯: ['guo3', 'fu3']
-肉脯: ['rou4', 'fu3']
-不粘锅: ['bu4', 'zhan1', 'guo1']
-粘信封: ['zhan1', 'xin4', 'feng1']
-粘牙: ['zhan1', 'ya2']
-粘皮带骨: ['zhan1', 'pi2' 'dai4' 'gu3']
-粘贴: ['zhan1', 'tie1']
-粘连: ['zhan1', 'lian2']
-一扎: ['yī', 'zā']
-包扎: ['bāo', 'zā']
-安营扎寨: ['ān', 'yíng', 'zhā', 'zhài']
-屯扎: ['tún', 'zhā']
-巴尔扎克: ['bā', 'ěr', 'zhā', 'kè']
-扎染: ['zā', 'rǎn']
-扎根: ['zhā', 'gēn']
-扎根串连: ['zhā', 'gēn', 'chuàn', 'lián']
-扎破: ['zhá', 'pò']
-扎营: ['zhā', 'yíng']
-结扎: ['jie2', 'za1']
-绑扎: ['bang3', 'za1']
-长进: ['zhǎng', 'jìn']
-折本: ['shé', 'běn']