From eef5b11b47efb92b574accbea2ff152f7643bad0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=82=A6?= <zcxey2911@hotmail.com>
Date: Fri, 26 Jan 2024 14:05:19 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E9=9F=B3=E9=A2=91=E9=99=8D?=
 =?UTF-8?q?=E5=99=AA=E5=8A=9F=E8=83=BD=EF=BC=8C=E5=8E=BB=E9=99=A4=E7=94=B5?=
 =?UTF-8?q?=E9=9F=B3=E5=92=8C=E6=9D=82=E9=9F=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

添加音频降噪功能，去除电音和杂音
---
 GPT_SoVITS/inference_webui.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index bb571833..71f2b6b5 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -5,6 +5,9 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
 logging.getLogger("httpx").setLevel(logging.ERROR)
 logging.getLogger("asyncio").setLevel(logging.ERROR)
 import pdb
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+import soundfile as sf
 
 gpt_path = os.environ.get(
     "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
@@ -56,6 +59,20 @@ if is_half == True:
 else:
     bert_model = bert_model.to(device)
 
+# 音频降噪
+
+def reset_tts_wav(audio):
+
+
+    sf.write('./output.wav', audio[1], audio[0], 'PCM_24')
+
+    ans = pipeline(
+    Tasks.acoustic_noise_suppression,
+    model='damo/speech_frcrn_ans_cirm_16k')
+    ans('./output.wav',output_path='./output.wav')
+
+    return "./output.wav"
+
 def get_bert_feature(text, word2ph):
     with torch.no_grad():
         inputs = tokenizer(text, return_tensors="pt")
@@ -396,12 +413,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
             )
             inference_button = gr.Button(i18n("合成语音"), variant="primary")
             output = gr.Audio(label=i18n("输出的语音"))
+            reset_button = gr.Button(i18n("推理音频降噪"))
+            reset_output = gr.Audio(label=i18n("降噪后的音频"))
         inference_button.click(
             get_tts_wav,
             [inp_ref, prompt_text, prompt_language, text, text_language],
             [output],
         )
 
+        # 音频降噪逻辑
+        reset_button.click(
+            reset_tts_wav,
+            [output],
+            [reset_output],
+        )
+
         gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好，所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
         with gr.Row():
             text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"),value="")