添加音频降噪功能，去除电音和杂音

2026-01-07 10:51:20 +08:00 · 2024-01-26 14:05:19 +08:00 · 2024-01-26 14:05:19 +08:00 · eef5b11b47
commit eef5b11b47
parent 813cf96e50
1 changed files with 26 additions and 0 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -5,6 +5,9 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
 logging.getLogger("httpx").setLevel(logging.ERROR)
 logging.getLogger("asyncio").setLevel(logging.ERROR)
 import pdb
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+import soundfile as sf

 gpt_path = os.environ.get(
    "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
@ -56,6 +59,20 @@ if is_half == True:
 else:
    bert_model = bert_model.to(device)

+# 音频降噪
+
+def reset_tts_wav(audio):
+
+
+    sf.write('./output.wav', audio[1], audio[0], 'PCM_24')
+
+    ans = pipeline(
+    Tasks.acoustic_noise_suppression,
+    model='damo/speech_frcrn_ans_cirm_16k')
+    ans('./output.wav',output_path='./output.wav')
+
+    return "./output.wav"
+
 def get_bert_feature(text, word2ph):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt")
@ -396,12 +413,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
            )
            inference_button = gr.Button(i18n("合成语音"), variant="primary")
            output = gr.Audio(label=i18n("输出的语音"))
+            reset_button = gr.Button(i18n("推理音频降噪"))
+            reset_output = gr.Audio(label=i18n("降噪后的音频"))
        inference_button.click(
            get_tts_wav,
            [inp_ref, prompt_text, prompt_language, text, text_language],
            [output],
        )

+        # 音频降噪逻辑
+        reset_button.click(
+            reset_tts_wav,
+            [output],
+            [reset_output],
+        )
+
        gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好，所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
        with gr.Row():
            text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"),value="")