From 6ec3a66d53c4fe0d72a7e342e30fa02961756005 Mon Sep 17 00:00:00 2001 From: KamioRinn <63162909+KamioRinn@users.noreply.github.com> Date: Sat, 3 Aug 2024 11:19:52 +0800 Subject: [PATCH] support yue and ko inference support yue and ko inference --- GPT_SoVITS/inference_webui.py | 38 ++++++++++++++++++++++------------- GPT_SoVITS/text/cantonese.py | 11 +++++++--- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index c72acd8..1578732 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -205,11 +205,16 @@ def get_spepc(hps, filename): dict_language = { i18n("中文"): "all_zh",#全部按中文识别 + i18n("粤语"): "all_yue",#全部按中文识别 i18n("英文"): "en",#全部按英文识别#######不变 i18n("日文"): "all_ja",#全部按日文识别 + i18n("韩文"): "all_ko",#全部按韩文识别 i18n("中英混合"): "zh",#按中英混合识别####不变 + i18n("粤英混合"): "yue",#按粤英混合识别####不变 i18n("日英混合"): "ja",#按日英混合识别####不变 + i18n("韩英混合"): "ko",#按韩英混合识别####不变 i18n("多语种混合"): "auto",#多语种启动切分识别语种 + i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种 } @@ -242,13 +247,13 @@ def get_first(text): from text import chinese def get_phones_and_bert(text,language): - if language in {"en","all_zh","all_ja"}: + if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}: language = language.replace("all_","") if language == "en": LangSegment.setfilters(["en"]) formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text)) else: - # 因无法区别中日文汉字,以用户输入为准 + # 因无法区别中日韩文汉字,以用户输入为准 formattext = text while " " in formattext: formattext = formattext.replace(" ", " ") @@ -259,32 +264,37 @@ def get_phones_and_bert(text,language): return get_phones_and_bert(formattext,"zh") else: phones, word2ph, norm_text = clean_text_inf(formattext, language) - - bert = get_bert_feature(norm_text, word2ph).to(device) + bert = get_bert_feature(norm_text, word2ph).to(device) + elif language == "yue" and re.search(r'[A-Za-z]', formattext): + formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext) + formattext = chinese.text_normalize(formattext) + return get_phones_and_bert(formattext,"yue") else: phones, word2ph, norm_text = clean_text_inf(formattext, language) bert = torch.zeros( (1024, len(phones)), dtype=torch.float16 if is_half == True else torch.float32, ).to(device) - elif language in {"zh", "ja","auto"}: + elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}: textlist=[] langlist=[] LangSegment.setfilters(["zh","ja","en","ko"]) if language == "auto": for tmp in LangSegment.getTexts(text): - if tmp["lang"] == "ko": - langlist.append("zh") - textlist.append(tmp["text"]) - else: - langlist.append(tmp["lang"]) - textlist.append(tmp["text"]) + langlist.append(tmp["lang"]) + textlist.append(tmp["text"]) + elif language == "auto_yue": + for tmp in LangSegment.getTexts(text): + if tmp["lang"] == "zh": + tmp["lang"] = "yue" + langlist.append(tmp["lang"]) + textlist.append(tmp["text"]) else: for tmp in LangSegment.getTexts(text): if tmp["lang"] == "en": langlist.append(tmp["lang"]) else: - # 因无法区别中日文汉字,以用户输入为准 + # 因无法区别中日韩文汉字,以用户输入为准 langlist.append(language) textlist.append(tmp["text"]) print(textlist) @@ -605,14 +615,14 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。")) prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="") prompt_language = gr.Dropdown( - label=i18n("参考音频的语种"), choices=[i18n("中文"), i18n("英文"), i18n("日文"), i18n("中英混合"), i18n("日英混合"), i18n("多语种混合")], value=i18n("中文") + label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文") ) gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式")) with gr.Row(): with gr.Column(): text = gr.Textbox(label=i18n("需要合成的文本"), value="") text_language = gr.Dropdown( - label=i18n("需要合成的语种"), choices=[i18n("中文"), i18n("英文"), i18n("日文"), i18n("中英混合"), i18n("日英混合"), i18n("多语种混合")], value=i18n("中文") + label=i18n("需要合成的语种"), choices=list(dict_language.keys()), value=i18n("中文") ) how_to_cut = gr.Radio( label=i18n("怎么切"), diff --git a/GPT_SoVITS/text/cantonese.py b/GPT_SoVITS/text/cantonese.py index 915be38..b31dbd5 100644 --- a/GPT_SoVITS/text/cantonese.py +++ b/GPT_SoVITS/text/cantonese.py @@ -6,6 +6,7 @@ import cn2an from pyjyutping import jyutping from text.symbols import punctuation +from text.zh_normalization.text_normlization import TextNormalizer normalizer = lambda x: cn2an.transform(x, "an2cn") @@ -106,9 +107,13 @@ def replace_punctuation(text): def text_normalize(text): - text = normalizer(text) - text = replace_punctuation(text) - return text + tx = TextNormalizer() + sentences = tx.normalize(text) + dest_text = "" + for sentence in sentences: + dest_text += replace_punctuation(sentence) + return dest_text + punctuation_set=set(punctuation) def jyuping_to_initials_finals_tones(jyuping_syllables):