From 14ee0ae87a0523a1cd44bd6222fb47ec42c59f5c Mon Sep 17 00:00:00 2001
From: KamioRinn <snowsdream@live.com>
Date: Sat, 9 Mar 2024 17:00:53 +0800
Subject: [PATCH] Make the inference logic more reasonable

---
 GPT_SoVITS/text/chinese.py             |  79 ++++++++++++++++++-------
 GPT_SoVITS/text/g2pw/g2pw.py           |  13 ++--
 GPT_SoVITS/text/g2pw/polyphonic.pickle | Bin 1498 -> 1546 bytes
 GPT_SoVITS/text/g2pw/polyphonic.rep    |   4 +-
 4 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py
index fc8c2cba..eb8a45b6 100644
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@@ -4,6 +4,7 @@ import re
 
 import cn2an
 from pypinyin import lazy_pinyin, Style
+from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
 
 from text.symbols import punctuation
 from text.tone_sandhi import ToneSandhi
@@ -23,7 +24,7 @@ is_g2pw_str = os.environ.get("is_g2pw", "True")
 is_g2pw = True if is_g2pw_str.lower() == 'true' else False
 if is_g2pw:
     print("当前使用g2pw进行拼音推理")
-    from text.g2pw import G2PWPinyin
+    from text.g2pw import G2PWPinyin, correct_pronunciation
     parent_directory = os.path.dirname(current_file_path)
     g2pw_model_dir = os.path.join(parent_directory,"pretrained_models","G2PWModel")
     g2pw_model_source = os.path.join(parent_directory,"pretrained_models","chinese-roberta-wwm-ext-large")
@@ -72,16 +73,10 @@ def _get_initials_finals(word):
     initials = []
     finals = []
 
-    if not is_g2pw:
-        orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
-        orig_finals = lazy_pinyin(
-            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
-        )
-    else:
-        orig_initials = g2pw.lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
-        orig_finals = g2pw.lazy_pinyin(
-            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
-        )
+    orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
+    orig_finals = lazy_pinyin(
+        word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
+    )
 
     for c, v in zip(orig_initials, orig_finals):
         initials.append(c)
@@ -97,20 +92,58 @@ def _g2p(segments):
         # Replace all English words in the sentence
         seg = re.sub("[a-zA-Z]+", "", seg)
         seg_cut = psg.lcut(seg)
+        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
         initials = []
         finals = []
-        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
-        for word, pos in seg_cut:
-            if pos == "eng":
-                continue
-            sub_initials, sub_finals = _get_initials_finals(word)
-            sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
-            initials.append(sub_initials)
-            finals.append(sub_finals)
-            # assert len(sub_initials) == len(sub_finals) == len(word)
-        initials = sum(initials, [])
-        finals = sum(finals, [])
-        #
+
+        if not is_g2pw:
+            for word, pos in seg_cut:
+                if pos == "eng":
+                    continue
+                sub_initials, sub_finals = _get_initials_finals(word)
+                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
+                initials.append(sub_initials)
+                finals.append(sub_finals)
+                # assert len(sub_initials) == len(sub_finals) == len(word)
+            initials = sum(initials, [])
+            finals = sum(finals, [])
+            print("pypinyin结果",initials,finals)
+        else:
+            # g2pw采用整句推理
+            pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)
+
+            pre_word_length = 0
+            for word, pos in seg_cut:
+                sub_initials = []
+                sub_finals = []
+                now_word_length = pre_word_length + len(word)
+
+                if pos == 'eng':
+                    pre_word_length = now_word_length
+                    continue
+
+                word_pinyins = pinyins[pre_word_length:now_word_length]
+
+                # 多音字消歧
+                word_pinyins = correct_pronunciation(word,word_pinyins)
+
+                for pinyin in word_pinyins:
+                    if pinyin[0].isalpha():
+                        sub_initials.append(to_initials(pinyin))
+                        sub_finals.append(to_finals_tone3(pinyin,neutral_tone_with_five=True))
+                    else:
+                        sub_initials.append(pinyin)
+                        sub_finals.append(pinyin)
+
+                pre_word_length = now_word_length
+                sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
+                initials.append(sub_initials)
+                finals.append(sub_finals)
+
+            initials = sum(initials, [])
+            finals = sum(finals, [])
+            print("g2pw结果",initials,finals)
+
         for c, v in zip(initials, finals):
             raw_pinyin = c + v
             # NOTE: post process for pypinyin outputs
diff --git a/GPT_SoVITS/text/g2pw/g2pw.py b/GPT_SoVITS/text/g2pw/g2pw.py
index ef6e3394..0e3e7d2d 100644
--- a/GPT_SoVITS/text/g2pw/g2pw.py
+++ b/GPT_SoVITS/text/g2pw/g2pw.py
@@ -69,12 +69,6 @@ class Converter(UltimateConverter):
     def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs):
         pinyins = []
 
-        if han in pp_dict:
-            phns = pp_dict[han]
-            for ph in phns:
-                pinyins.append([ph])
-            return pinyins
-
         g2pw_pinyin = self._g2pw(han)
 
         if not g2pw_pinyin:  # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
@@ -142,4 +136,11 @@ def read_dict():
     return polyphonic_dict
 
 
+def correct_pronunciation(word,word_pinyins):
+    if word in pp_dict:
+        word_pinyins = pp_dict[word]
+
+    return word_pinyins
+
+
 pp_dict = get_dict()
\ No newline at end of file
diff --git a/GPT_SoVITS/text/g2pw/polyphonic.pickle b/GPT_SoVITS/text/g2pw/polyphonic.pickle
index 98cba72f643b42e048b7ff32f3a551ab8d6566e5..e0ddfdc28e3652a8e73a5ebbdd753c8c72fc4811 100644
GIT binary patch
delta 64
zcmcb`-NnPwz%up!MwTnAMm=oL=FNH0vVKbJ6pbF1^wNCeDLu?-rN&cId)Qtyb;4ze
O@=HyoWL!-x)dK*Iq#Q~B

delta 16
XcmeC;xy8-Wz%upxMwTnAOr?4NF|h@k

diff --git a/GPT_SoVITS/text/g2pw/polyphonic.rep b/GPT_SoVITS/text/g2pw/polyphonic.rep
index cb4ef979..de1b65d5 100644
--- a/GPT_SoVITS/text/g2pw/polyphonic.rep
+++ b/GPT_SoVITS/text/g2pw/polyphonic.rep
@@ -50,4 +50,6 @@
 陈威行: ['chen2', 'wei1', 'hang2']
 郭晟: ['guo1', 'sheng4']
 中标: ['zhong4', 'biao1']
-抗住: ['kang2', 'zhu4']
\ No newline at end of file
+抗住: ['kang2', 'zhu4']
+果脯: ['guo3', 'fu3']
+肉脯: ['rou4', 'fu3']
\ No newline at end of file