From dd63352168a2ce07f0beeeb23f6f7881c2f2eedf Mon Sep 17 00:00:00 2001 From: KakaruHayate <97896816+KakaruHayate@users.noreply.github.com> Date: Sat, 3 Feb 2024 15:41:30 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9C=A8=E6=8E=A8=E7=90=86=E9=A1=B5=E9=9D=A2?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=9C=A8=E7=BA=BF=E5=BD=95=E5=88=B6=E5=8F=82?= =?UTF-8?q?=E8=80=83=E9=9F=B3=E9=A2=91=E5=8A=9F=E8=83=BD=E7=94=A8=E4=BA=8E?= =?UTF-8?q?ZeroShot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPT_SoVITS/inference_webui.py | 45 ++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 1868a12..971131b 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -199,6 +199,34 @@ def get_spepc(hps, filename): return spec +# record part code from https://github.com/gradio-app/gradio/issues/5425 +def record_click_js(): + return """function audioRecord() { + var xPathRes = document.evaluate ('//*[@id="audio"]//button', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); + xPathRes.singleNodeValue.click();}""" + + +def record_action(btn): + """Changes button text on click""" + if btn == '录音': return '停止' + else: return '录音' + + +def record_check_btn(btn): + """Checks for correct button text before invoking transcribe()""" + if btn != '录音': raise Exception('录音中...') + + +def record_transcribe(): + return '录音成功' + + +def record_copy_audio(audio): + inp_ref = audio + + return inp_ref + + dict_language = { i18n("中文"): "all_zh",#全部按中文识别 i18n("英文"): "en",#全部按英文识别#######不变 @@ -209,7 +237,7 @@ dict_language = { } -def splite_en_inf(sentence, language): +def split_en_inf(sentence, language): pattern = re.compile(r'[a-zA-Z ]+') textlist = [] langlist = [] @@ -265,7 +293,7 @@ def get_bert_inf(phones, word2ph, norm_text, language): def nonen_clean_text_inf(text, language): if(language!="auto"): - textlist, langlist = splite_en_inf(text, language) + textlist, langlist = split_en_inf(text, language) else: textlist=[] langlist=[] @@ -294,7 +322,7 @@ def nonen_clean_text_inf(text, language): def nonen_get_bert_inf(text, language): if(language!="auto"): - textlist, langlist = splite_en_inf(text, language) + textlist, langlist = split_en_inf(text, language) else: textlist=[] langlist=[] @@ -578,9 +606,20 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown], []) GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], []) + gr.Markdown(value="直接录制参考音频(ZeroShot)") + with gr.Row(): + record_msg = gr.Textbox(label="音频录制状态") + record_audio_box = gr.Audio(label="录制的音频", source="microphone", type="filepath", elem_id='audio') + record_audio_btn = gr.Button('录音', variant="primary") + record_audio_btn.click(fn=record_action, inputs=record_audio_btn, outputs=record_audio_btn).\ + then(fn=lambda: None, _js=record_click_js()).\ + then(fn=record_check_btn, inputs=record_audio_btn).\ + success(fn=record_transcribe, outputs=record_msg) + import_button = gr.Button("加载录制音频", variant="primary") gr.Markdown(value=i18n("*请上传并填写参考信息")) with gr.Row(): inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath") + import_button.click(record_copy_audio, [record_audio_box], [inp_ref]) prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="") prompt_language = gr.Dropdown( label=i18n("参考音频的语种"), choices=[i18n("中文"), i18n("英文"), i18n("日文")], value=i18n("中文")