mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-09 00:10:00 +08:00
Update inference_webui.py
增加音频增强逻辑
This commit is contained in:
parent
a3b108bfe6
commit
e0c0410f58
@ -17,6 +17,11 @@ logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
|||||||
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||||
import pdb
|
import pdb
|
||||||
import torch
|
import torch
|
||||||
|
from resemble_enhance.enhancer.inference import denoise, enhance
|
||||||
|
import torchaudio
|
||||||
|
import gc
|
||||||
|
import librosa
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
if os.path.exists("./gweight.txt"):
|
if os.path.exists("./gweight.txt"):
|
||||||
with open("./gweight.txt", 'r', encoding="utf-8") as file:
|
with open("./gweight.txt", 'r', encoding="utf-8") as file:
|
||||||
@ -83,6 +88,31 @@ if is_half == True:
|
|||||||
else:
|
else:
|
||||||
bert_model = bert_model.to(device)
|
bert_model = bert_model.to(device)
|
||||||
|
|
||||||
|
def clear_gpu_cash():
|
||||||
|
# del model
|
||||||
|
gc.collect()
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
def _fn(path, solver="Midpoint", nfe=64, tau=0.5,chunk_seconds=10,chunks_overlap=0.5, denoising=True):
|
||||||
|
if path is None:
|
||||||
|
return None, None
|
||||||
|
print(path)
|
||||||
|
sf.write('./output.wav', path[1], path[0], 'PCM_24')
|
||||||
|
|
||||||
|
solver = solver.lower()
|
||||||
|
nfe = int(nfe)
|
||||||
|
lambd = 0.9 if denoising else 0.1
|
||||||
|
|
||||||
|
dwav, sr = torchaudio.load('./output.wav')
|
||||||
|
dwav = dwav.mean(dim=0)
|
||||||
|
|
||||||
|
wav2, new_sr = enhance(dwav = dwav, sr = sr, device = device, nfe=nfe,chunk_seconds=chunk_seconds,chunks_overlap=chunks_overlap, solver=solver, lambd=lambd, tau=tau)
|
||||||
|
|
||||||
|
wav2 = wav2.cpu().numpy()
|
||||||
|
|
||||||
|
clear_gpu_cash()
|
||||||
|
return (new_sr, wav2)
|
||||||
|
|
||||||
def get_bert_feature(text, word2ph):
|
def get_bert_feature(text, word2ph):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
@ -590,6 +620,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
||||||
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
||||||
output = gr.Audio(label=i18n("输出的语音"))
|
output = gr.Audio(label=i18n("输出的语音"))
|
||||||
|
up_button = gr.Button(i18n("音频降噪增强"), variant="primary")
|
||||||
|
|
||||||
inference_button.click(
|
inference_button.click(
|
||||||
get_tts_wav,
|
get_tts_wav,
|
||||||
@ -597,6 +628,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
[output],
|
[output],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
up_button.click(_fn, [output], [output])
|
||||||
|
|
||||||
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="")
|
text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user