From 9c4f3bb0b481a4211e4a1b9bc1209d8a7c3133d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=82=A6?= Date: Sun, 28 Jan 2024 15:56:38 +0800 Subject: [PATCH] Update inference_webui.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加引导音频转写功能 --- GPT_SoVITS/inference_webui.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index f725c5cd..5d2f98f9 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -1,4 +1,5 @@ import os,re,logging +from faster_whisper import WhisperModel logging.getLogger("markdown_it").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("httpcore").setLevel(logging.ERROR) @@ -186,6 +187,26 @@ dict_language={ } +# 音频转写 + +def get_whisper(audio_path): + + model_name="small" + + if device == "cuda": + model = WhisperModel(model_name, device="cuda", compute_type="float16",download_root="./model_from_whisper",local_files_only=False) + else: + model = WhisperModel(model_name, device="cpu", compute_type="int8",download_root="./model_from_whisper",local_files_only=False) + + segments, info = model.transcribe(audio_path, beam_size=5) + print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) + + text_str = "" + for segment in segments: + text_str += f"{segment.text.lstrip()}," + + return text_str.rstrip(",") + def splite_en_inf(sentence, language): pattern = re.compile(r'[a-zA-Z. ]+') textlist = [] @@ -495,10 +516,12 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown(value=i18n("*请上传并填写参考信息")) with gr.Row(): inp_ref = gr.Audio(label=i18n("请上传参考音频"), type="filepath") + whisper_button = gr.Button(i18n("faster_whisper转写音频内容到文本")) prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="") prompt_language = gr.Dropdown( label=i18n("参考音频的语种"),choices=[i18n("中文"),i18n("英文"),i18n("日文")],value=i18n("中文") ) + whisper_button.click(get_whisper,[inp_ref],[prompt_text]) gr.Markdown(value=i18n("*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。")) with gr.Row(): text = gr.Textbox(label=i18n("需要合成的文本"), value="")