diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index d8dce13..aa0cf28 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -4,6 +4,7 @@ import re import cn2an from pypinyin import lazy_pinyin, Style +from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals from text.symbols import punctuation from text.tone_sandhi import ToneSandhi @@ -24,7 +25,7 @@ import jieba_fast.posseg as psg is_g2pw = True#True if is_g2pw_str.lower() == 'true' else False if is_g2pw: print("当前使用g2pw进行拼音推理") - from text.g2pw import G2PWPinyin + from text.g2pw import G2PWPinyin, correct_pronunciation parent_directory = os.path.dirname(current_file_path) g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True) @@ -42,6 +43,8 @@ rep_map = { "$": ".", "/": ",", "—": "-", + "~": "…", + "~":"…", } tone_modifier = ToneSandhi() @@ -60,26 +63,6 @@ def replace_punctuation(text): return replaced_text -def replace_punctuation_with_en(text): - text = text.replace("嗯", "恩").replace("呣", "母") - pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys())) - - replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) - - replaced_text = re.sub( - r"[^\u4e00-\u9fa5A-Za-z" + "".join(punctuation) + r"]+", "", replaced_text - ) - - return replaced_text - - -def replace_consecutive_punctuation(text): - punctuations = ''.join(re.escape(p) for p in punctuation) - pattern = f'([{punctuations}])([{punctuations}])+' - result = re.sub(pattern, r'\1', text) - return result - - def g2p(text): pattern = r"(?<=[{0}])\s*".format("".join(punctuation)) sentences = [i for i in re.split(pattern, text) if i.strip() != ""] @@ -91,16 +74,10 @@ def _get_initials_finals(word): initials = [] finals = [] - if not is_g2pw: - orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS) - orig_finals = lazy_pinyin( - word, neutral_tone_with_five=True, style=Style.FINALS_TONE3 - ) - else: - orig_initials = g2pw.lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS) - orig_finals = g2pw.lazy_pinyin( - word, neutral_tone_with_five=True, style=Style.FINALS_TONE3 - ) + orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS) + orig_finals = lazy_pinyin( + word, neutral_tone_with_five=True, style=Style.FINALS_TONE3 + ) for c, v in zip(orig_initials, orig_finals): initials.append(c) @@ -108,6 +85,54 @@ def _get_initials_finals(word): return initials, finals +must_erhua = { + "小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿" +} +not_erhua = { + "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿", + "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿", + "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿", + "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿", + "狗儿", "少儿" +} +def _merge_erhua(initials: list[str], + finals: list[str], + word: str, + pos: str) -> list[list[str]]: + """ + Do erhub. + """ + # fix er1 + for i, phn in enumerate(finals): + if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1': + finals[i] = 'er2' + + # 发音 + if word not in must_erhua and (word in not_erhua or + pos in {"a", "j", "nr"}): + return initials, finals + + # "……" 等情况直接返回 + if len(finals) != len(word): + return initials, finals + + assert len(finals) == len(word) + + # 与前一个字发同音 + new_initials = [] + new_finals = [] + for i, phn in enumerate(finals): + if i == len(finals) - 1 and word[i] == "儿" and phn in { + "er2", "er5" + } and word[-2:] not in not_erhua and new_finals: + phn = "er" + new_finals[-1][-1] + + new_initials.append(initials[i]) + new_finals.append(phn) + + return new_initials, new_finals + + def _g2p(segments): phones_list = [] word2ph = [] @@ -116,20 +141,62 @@ def _g2p(segments): # Replace all English words in the sentence seg = re.sub("[a-zA-Z]+", "", seg) seg_cut = psg.lcut(seg) + seg_cut = tone_modifier.pre_merge_for_modify(seg_cut) initials = [] finals = [] - seg_cut = tone_modifier.pre_merge_for_modify(seg_cut) - for word, pos in seg_cut: - if pos == "eng": - continue - sub_initials, sub_finals = _get_initials_finals(word) - sub_finals = tone_modifier.modified_tone(word, pos, sub_finals) - initials.append(sub_initials) - finals.append(sub_finals) - # assert len(sub_initials) == len(sub_finals) == len(word) - initials = sum(initials, []) - finals = sum(finals, []) - # + + if not is_g2pw: + for word, pos in seg_cut: + if pos == "eng": + continue + sub_initials, sub_finals = _get_initials_finals(word) + sub_finals = tone_modifier.modified_tone(word, pos, sub_finals) + # 儿化 + sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos) + initials.append(sub_initials) + finals.append(sub_finals) + # assert len(sub_initials) == len(sub_finals) == len(word) + initials = sum(initials, []) + finals = sum(finals, []) + print("pypinyin结果",initials,finals) + else: + # g2pw采用整句推理 + pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3) + + pre_word_length = 0 + for word, pos in seg_cut: + sub_initials = [] + sub_finals = [] + now_word_length = pre_word_length + len(word) + + if pos == 'eng': + pre_word_length = now_word_length + continue + + word_pinyins = pinyins[pre_word_length:now_word_length] + + # 多音字消歧 + word_pinyins = correct_pronunciation(word,word_pinyins) + + for pinyin in word_pinyins: + if pinyin[0].isalpha(): + sub_initials.append(to_initials(pinyin)) + sub_finals.append(to_finals_tone3(pinyin,neutral_tone_with_five=True)) + else: + sub_initials.append(pinyin) + sub_finals.append(pinyin) + + pre_word_length = now_word_length + sub_finals = tone_modifier.modified_tone(word, pos, sub_finals) + # 儿化 + sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos) + initials.append(sub_initials) + finals.append(sub_finals) + + initials = sum(initials, []) + finals = sum(finals, []) + print("g2pw结果",initials,finals) + for c, v in zip(initials, finals): raw_pinyin = c + v # NOTE: post process for pypinyin outputs @@ -184,6 +251,24 @@ def _g2p(segments): return phones_list, word2ph +def replace_punctuation_with_en(text): + text = text.replace("嗯", "恩").replace("呣", "母") + pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys())) + + replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) + + replaced_text = re.sub( + r"[^\u4e00-\u9fa5A-Za-z" + "".join(punctuation) + r"]+", "", replaced_text + ) + + return replaced_text + +def replace_consecutive_punctuation(text): + punctuations = ''.join(re.escape(p) for p in punctuation) + pattern = f'([{punctuations}])([{punctuations}])+' + result = re.sub(pattern, r'\1', text) + return result + def text_normalize(text): # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization tx = TextNormalizer() @@ -196,7 +281,6 @@ def text_normalize(text): dest_text = replace_consecutive_punctuation(dest_text) return dest_text - # 不排除英文的文本格式化 def mix_text_normalize(text): # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization