Make the inference logic more reasonable

2026-06-05 22:08:15 +08:00 · 2024-03-09 17:00:53 +08:00 · 2024-03-09 17:00:53 +08:00 · 14ee0ae87a
commit 14ee0ae87a
parent 7def70d951
4 changed files with 66 additions and 30 deletions
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@ -4,6 +4,7 @@ import re
 import cn2an
 from pypinyin import lazy_pinyin, Style
 from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
 from text.symbols import punctuation
 from text.tone_sandhi import ToneSandhi
@ -23,7 +24,7 @@ is_g2pw_str = os.environ.get("is_g2pw", "True")
 is_g2pw = True if is_g2pw_str.lower() == 'true' else False
 if is_g2pw:
    print("当前使用g2pw进行拼音推理")
-    from text.g2pw import G2PWPinyin
+    from text.g2pw import G2PWPinyin, correct_pronunciation
    parent_directory = os.path.dirname(current_file_path)
    g2pw_model_dir = os.path.join(parent_directory,"pretrained_models","G2PWModel")
    g2pw_model_source = os.path.join(parent_directory,"pretrained_models","chinese-roberta-wwm-ext-large")
@ -72,16 +73,10 @@ def _get_initials_finals(word):
    initials = []
    finals = []
-    if not is_g2pw:
+    orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
-        orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
+    orig_finals = lazy_pinyin(
-        orig_finals = lazy_pinyin(
+        word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
-            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
+    )
        )
    else:
        orig_initials = g2pw.lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
        orig_finals = g2pw.lazy_pinyin(
            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
        )
    for c, v in zip(orig_initials, orig_finals):
        initials.append(c)
@ -97,20 +92,58 @@ def _g2p(segments):
        # Replace all English words in the sentence
        seg = re.sub("[a-zA-Z]+", "", seg)
        seg_cut = psg.lcut(seg)
        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
        initials = []
        finals = []
-        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
+
-        for word, pos in seg_cut:
+        if not is_g2pw:
-            if pos == "eng":
+            for word, pos in seg_cut:
-                continue
+                if pos == "eng":
-            sub_initials, sub_finals = _get_initials_finals(word)
+                    continue
-            sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
+                sub_initials, sub_finals = _get_initials_finals(word)
-            initials.append(sub_initials)
+                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
-            finals.append(sub_finals)
+                initials.append(sub_initials)
-            # assert len(sub_initials) == len(sub_finals) == len(word)
+                finals.append(sub_finals)
-        initials = sum(initials, [])
+                # assert len(sub_initials) == len(sub_finals) == len(word)
-        finals = sum(finals, [])
+            initials = sum(initials, [])
-        #
+            finals = sum(finals, [])
            print("pypinyin结果",initials,finals)
        else:
            # g2pw采用整句推理
            pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)
            pre_word_length = 0
            for word, pos in seg_cut:
                sub_initials = []
                sub_finals = []
                now_word_length = pre_word_length + len(word)
                if pos == 'eng':
                    pre_word_length = now_word_length
                    continue
                word_pinyins = pinyins[pre_word_length:now_word_length]
                # 多音字消歧
                word_pinyins = correct_pronunciation(word,word_pinyins)
                for pinyin in word_pinyins:
                    if pinyin[0].isalpha():
                        sub_initials.append(to_initials(pinyin))
                        sub_finals.append(to_finals_tone3(pinyin,neutral_tone_with_five=True))
                    else:
                        sub_initials.append(pinyin)
                        sub_finals.append(pinyin)
                pre_word_length = now_word_length
                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
                initials.append(sub_initials)
                finals.append(sub_finals)
            initials = sum(initials, [])
            finals = sum(finals, [])
            print("g2pw结果",initials,finals)
        for c, v in zip(initials, finals):
            raw_pinyin = c + v
            # NOTE: post process for pypinyin outputs
--- a/GPT_SoVITS/text/g2pw/g2pw.py
+++ b/GPT_SoVITS/text/g2pw/g2pw.py
@ -69,12 +69,6 @@ class Converter(UltimateConverter):
    def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs):
        pinyins = []
        if han in pp_dict:
            phns = pp_dict[han]
            for ph in phns:
                pinyins.append([ph])
            return pinyins
        g2pw_pinyin = self._g2pw(han)
        if not g2pw_pinyin:  # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
@ -142,4 +136,11 @@ def read_dict():
    return polyphonic_dict
 def correct_pronunciation(word,word_pinyins):
    if word in pp_dict:
        word_pinyins = pp_dict[word]
    return word_pinyins
 pp_dict = get_dict()
--- a/GPT_SoVITS/text/g2pw/polyphonic.pickle
+++ b/GPT_SoVITS/text/g2pw/polyphonic.pickle
--- a/GPT_SoVITS/text/g2pw/polyphonic.rep
+++ b/GPT_SoVITS/text/g2pw/polyphonic.rep
@ -50,4 +50,6 @@
 陈威行: ['chen2', 'wei1', 'hang2']
 郭晟: ['guo1', 'sheng4']
 中标: ['zhong4', 'biao1']
-抗住: ['kang2', 'zhu4']
+抗住: ['kang2', 'zhu4']
 果脯: ['guo3', 'fu3']
 肉脯: ['rou4', 'fu3']