From 64cdc9727730bb02e6e674f1d86d9e992fadfca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=82=A6?= Date: Sun, 28 Jan 2024 15:50:52 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=95=B0=E6=8D=AE=E9=A2=84?= =?UTF-8?q?=E5=A4=84=E7=90=86=E7=8E=AF=E8=8A=82=E7=9A=84=E9=9F=B3=E9=A2=91?= =?UTF-8?q?=E9=99=8D=E5=99=AA=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=8F=90=E9=AB=98?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加数据预处理环节的音频降噪功能,提高模型性能 --- webui.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/webui.py b/webui.py index 885aecbf..de3b1edb 100644 --- a/webui.py +++ b/webui.py @@ -5,6 +5,8 @@ import json,yaml,warnings,torch import platform import psutil import signal +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks warnings.filterwarnings("ignore") torch.manual_seed(233333) @@ -210,6 +212,17 @@ def close_asr(): p_asr=None return "已终止ASR进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} +# 音频降噪 + +def reset_tts_wav(audio): + + ans = pipeline( + Tasks.acoustic_noise_suppression, + model='damo/speech_frcrn_ans_cirm_16k') + ans(audio,output_path='./output_ins.wav') + + return "./output_ins.wav" + p_train_SoVITS=None def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D): global p_train_SoVITS @@ -634,7 +647,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown(value=i18n("0b-语音切分工具")) with gr.Row(): with gr.Row(): - slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="") + #slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="") + slice_inp_path = gr.Audio(label=i18n("请上传克隆对象音频"), type="filepath") slice_opt_root=gr.Textbox(label=i18n("切分后的子音频的输出根目录"),value="output/slicer_opt") threshold=gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"),value="-34") min_length=gr.Textbox(label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4000") @@ -648,6 +662,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) + reset_inp_button.click(reset_tts_wav,[slice_inp_path],[slice_inp_path]) gr.Markdown(value=i18n("0c-中文批量离线ASR工具")) with gr.Row(): open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True)