添加音频降噪功能,去除电音和杂音

添加音频降噪功能,去除电音和杂音
This commit is contained in:
刘悦 2024-01-26 14:05:19 +08:00 committed by GitHub
parent 813cf96e50
commit eef5b11b47
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5,6 +5,9 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
logging.getLogger("asyncio").setLevel(logging.ERROR)
import pdb
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import soundfile as sf
gpt_path = os.environ.get(
"gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
@ -56,6 +59,20 @@ if is_half == True:
else:
bert_model = bert_model.to(device)
# 音频降噪
def reset_tts_wav(audio):
sf.write('./output.wav', audio[1], audio[0], 'PCM_24')
ans = pipeline(
Tasks.acoustic_noise_suppression,
model='damo/speech_frcrn_ans_cirm_16k')
ans('./output.wav',output_path='./output.wav')
return "./output.wav"
def get_bert_feature(text, word2ph):
with torch.no_grad():
inputs = tokenizer(text, return_tensors="pt")
@ -396,12 +413,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
)
inference_button = gr.Button(i18n("合成语音"), variant="primary")
output = gr.Audio(label=i18n("输出的语音"))
reset_button = gr.Button(i18n("推理音频降噪"))
reset_output = gr.Audio(label=i18n("降噪后的音频"))
inference_button.click(
get_tts_wav,
[inp_ref, prompt_text, prompt_language, text, text_language],
[output],
)
# 音频降噪逻辑
reset_button.click(
reset_tts_wav,
[output],
[reset_output],
)
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
with gr.Row():
text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"),value="")