添加音频降噪功能，去除电音和杂音

2026-01-09 11:57:01 +08:00 · 2024-01-26 14:05:19 +08:00 · 2024-01-26 14:05:19 +08:00 · eef5b11b47
commit eef5b11b47
parent 813cf96e50
1 changed files with 26 additions and 0 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -5,6 +5,9 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
 logging.getLogger("httpx").setLevel(logging.ERROR)
 logging.getLogger("asyncio").setLevel(logging.ERROR)
 import pdb
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 import soundfile as sf
 gpt_path = os.environ.get(
    "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
@ -56,6 +59,20 @@ if is_half == True:
 else:
    bert_model = bert_model.to(device)
 # 音频降噪
 def reset_tts_wav(audio):
    sf.write('./output.wav', audio[1], audio[0], 'PCM_24')
    ans = pipeline(
    Tasks.acoustic_noise_suppression,
    model='damo/speech_frcrn_ans_cirm_16k')
    ans('./output.wav',output_path='./output.wav')
    return "./output.wav"
 def get_bert_feature(text, word2ph):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt")
@ -396,12 +413,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
            )
            inference_button = gr.Button(i18n("合成语音"), variant="primary")
            output = gr.Audio(label=i18n("输出的语音"))
            reset_button = gr.Button(i18n("推理音频降噪"))
            reset_output = gr.Audio(label=i18n("降噪后的音频"))
        inference_button.click(
            get_tts_wav,
            [inp_ref, prompt_text, prompt_language, text, text_language],
            [output],
        )
        # 音频降噪逻辑
        reset_button.click(
            reset_tts_wav,
            [output],
            [reset_output],
        )
        gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好，所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
        with gr.Row():
            text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"),value="")