mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-07 15:19:59 +08:00
添加音频降噪功能,去除电音和杂音
添加音频降噪功能,去除电音和杂音
This commit is contained in:
parent
813cf96e50
commit
eef5b11b47
@ -5,6 +5,9 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
|
|||||||
logging.getLogger("httpx").setLevel(logging.ERROR)
|
logging.getLogger("httpx").setLevel(logging.ERROR)
|
||||||
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
||||||
import pdb
|
import pdb
|
||||||
|
from modelscope.pipelines import pipeline
|
||||||
|
from modelscope.utils.constant import Tasks
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
gpt_path = os.environ.get(
|
gpt_path = os.environ.get(
|
||||||
"gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
"gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
||||||
@ -56,6 +59,20 @@ if is_half == True:
|
|||||||
else:
|
else:
|
||||||
bert_model = bert_model.to(device)
|
bert_model = bert_model.to(device)
|
||||||
|
|
||||||
|
# 音频降噪
|
||||||
|
|
||||||
|
def reset_tts_wav(audio):
|
||||||
|
|
||||||
|
|
||||||
|
sf.write('./output.wav', audio[1], audio[0], 'PCM_24')
|
||||||
|
|
||||||
|
ans = pipeline(
|
||||||
|
Tasks.acoustic_noise_suppression,
|
||||||
|
model='damo/speech_frcrn_ans_cirm_16k')
|
||||||
|
ans('./output.wav',output_path='./output.wav')
|
||||||
|
|
||||||
|
return "./output.wav"
|
||||||
|
|
||||||
def get_bert_feature(text, word2ph):
|
def get_bert_feature(text, word2ph):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
inputs = tokenizer(text, return_tensors="pt")
|
inputs = tokenizer(text, return_tensors="pt")
|
||||||
@ -396,12 +413,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
)
|
)
|
||||||
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
||||||
output = gr.Audio(label=i18n("输出的语音"))
|
output = gr.Audio(label=i18n("输出的语音"))
|
||||||
|
reset_button = gr.Button(i18n("推理音频降噪"))
|
||||||
|
reset_output = gr.Audio(label=i18n("降噪后的音频"))
|
||||||
inference_button.click(
|
inference_button.click(
|
||||||
get_tts_wav,
|
get_tts_wav,
|
||||||
[inp_ref, prompt_text, prompt_language, text, text_language],
|
[inp_ref, prompt_text, prompt_language, text, text_language],
|
||||||
[output],
|
[output],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 音频降噪逻辑
|
||||||
|
reset_button.click(
|
||||||
|
reset_tts_wav,
|
||||||
|
[output],
|
||||||
|
[reset_output],
|
||||||
|
)
|
||||||
|
|
||||||
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"),value="")
|
text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"),value="")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user