From 14ee0ae87a0523a1cd44bd6222fb47ec42c59f5c Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Sat, 9 Mar 2024 17:00:53 +0800 Subject: [PATCH] Make the inference logic more reasonable --- GPT_SoVITS/text/chinese.py | 79 ++++++++++++++++++------- GPT_SoVITS/text/g2pw/g2pw.py | 13 ++-- GPT_SoVITS/text/g2pw/polyphonic.pickle | Bin 1498 -> 1546 bytes GPT_SoVITS/text/g2pw/polyphonic.rep | 4 +- 4 files changed, 66 insertions(+), 30 deletions(-) diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py index fc8c2cba..eb8a45b6 100644 --- a/GPT_SoVITS/text/chinese.py +++ b/GPT_SoVITS/text/chinese.py @@ -4,6 +4,7 @@ import re import cn2an from pypinyin import lazy_pinyin, Style +from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals from text.symbols import punctuation from text.tone_sandhi import ToneSandhi @@ -23,7 +24,7 @@ is_g2pw_str = os.environ.get("is_g2pw", "True") is_g2pw = True if is_g2pw_str.lower() == 'true' else False if is_g2pw: print("当前使用g2pw进行拼音推理") - from text.g2pw import G2PWPinyin + from text.g2pw import G2PWPinyin, correct_pronunciation parent_directory = os.path.dirname(current_file_path) g2pw_model_dir = os.path.join(parent_directory,"pretrained_models","G2PWModel") g2pw_model_source = os.path.join(parent_directory,"pretrained_models","chinese-roberta-wwm-ext-large") @@ -72,16 +73,10 @@ def _get_initials_finals(word): initials = [] finals = [] - if not is_g2pw: - orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS) - orig_finals = lazy_pinyin( - word, neutral_tone_with_five=True, style=Style.FINALS_TONE3 - ) - else: - orig_initials = g2pw.lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS) - orig_finals = g2pw.lazy_pinyin( - word, neutral_tone_with_five=True, style=Style.FINALS_TONE3 - ) + orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS) + orig_finals = lazy_pinyin( + word, neutral_tone_with_five=True, style=Style.FINALS_TONE3 + ) for c, v in zip(orig_initials, orig_finals): initials.append(c) @@ -97,20 +92,58 @@ def _g2p(segments): # Replace all English words in the sentence seg = re.sub("[a-zA-Z]+", "", seg) seg_cut = psg.lcut(seg) + seg_cut = tone_modifier.pre_merge_for_modify(seg_cut) initials = [] finals = [] - seg_cut = tone_modifier.pre_merge_for_modify(seg_cut) - for word, pos in seg_cut: - if pos == "eng": - continue - sub_initials, sub_finals = _get_initials_finals(word) - sub_finals = tone_modifier.modified_tone(word, pos, sub_finals) - initials.append(sub_initials) - finals.append(sub_finals) - # assert len(sub_initials) == len(sub_finals) == len(word) - initials = sum(initials, []) - finals = sum(finals, []) - # + + if not is_g2pw: + for word, pos in seg_cut: + if pos == "eng": + continue + sub_initials, sub_finals = _get_initials_finals(word) + sub_finals = tone_modifier.modified_tone(word, pos, sub_finals) + initials.append(sub_initials) + finals.append(sub_finals) + # assert len(sub_initials) == len(sub_finals) == len(word) + initials = sum(initials, []) + finals = sum(finals, []) + print("pypinyin结果",initials,finals) + else: + # g2pw采用整句推理 + pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3) + + pre_word_length = 0 + for word, pos in seg_cut: + sub_initials = [] + sub_finals = [] + now_word_length = pre_word_length + len(word) + + if pos == 'eng': + pre_word_length = now_word_length + continue + + word_pinyins = pinyins[pre_word_length:now_word_length] + + # 多音字消歧 + word_pinyins = correct_pronunciation(word,word_pinyins) + + for pinyin in word_pinyins: + if pinyin[0].isalpha(): + sub_initials.append(to_initials(pinyin)) + sub_finals.append(to_finals_tone3(pinyin,neutral_tone_with_five=True)) + else: + sub_initials.append(pinyin) + sub_finals.append(pinyin) + + pre_word_length = now_word_length + sub_finals = tone_modifier.modified_tone(word, pos, sub_finals) + initials.append(sub_initials) + finals.append(sub_finals) + + initials = sum(initials, []) + finals = sum(finals, []) + print("g2pw结果",initials,finals) + for c, v in zip(initials, finals): raw_pinyin = c + v # NOTE: post process for pypinyin outputs diff --git a/GPT_SoVITS/text/g2pw/g2pw.py b/GPT_SoVITS/text/g2pw/g2pw.py index ef6e3394..0e3e7d2d 100644 --- a/GPT_SoVITS/text/g2pw/g2pw.py +++ b/GPT_SoVITS/text/g2pw/g2pw.py @@ -69,12 +69,6 @@ class Converter(UltimateConverter): def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs): pinyins = [] - if han in pp_dict: - phns = pp_dict[han] - for ph in phns: - pinyins.append([ph]) - return pinyins - g2pw_pinyin = self._g2pw(han) if not g2pw_pinyin: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑 @@ -142,4 +136,11 @@ def read_dict(): return polyphonic_dict +def correct_pronunciation(word,word_pinyins): + if word in pp_dict: + word_pinyins = pp_dict[word] + + return word_pinyins + + pp_dict = get_dict() \ No newline at end of file diff --git a/GPT_SoVITS/text/g2pw/polyphonic.pickle b/GPT_SoVITS/text/g2pw/polyphonic.pickle index 98cba72f643b42e048b7ff32f3a551ab8d6566e5..e0ddfdc28e3652a8e73a5ebbdd753c8c72fc4811 100644 GIT binary patch delta 64 zcmcb`-NnPwz%up!MwTnAMm=oL=FNH0vVKbJ6pbF1^wNCeDLu?-rN&cId)Qtyb;4ze O@=HyoWL!-x)dK*Iq#Q~B delta 16 XcmeC;xy8-Wz%upxMwTnAOr?4NF|h@k diff --git a/GPT_SoVITS/text/g2pw/polyphonic.rep b/GPT_SoVITS/text/g2pw/polyphonic.rep index cb4ef979..de1b65d5 100644 --- a/GPT_SoVITS/text/g2pw/polyphonic.rep +++ b/GPT_SoVITS/text/g2pw/polyphonic.rep @@ -50,4 +50,6 @@ 陈威行: ['chen2', 'wei1', 'hang2'] 郭晟: ['guo1', 'sheng4'] 中标: ['zhong4', 'biao1'] -抗住: ['kang2', 'zhu4'] \ No newline at end of file +抗住: ['kang2', 'zhu4'] +果脯: ['guo3', 'fu3'] +肉脯: ['rou4', 'fu3'] \ No newline at end of file