From f4f76ea431866a304acf52ecc373d8bc32722237 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Mon, 22 Jul 2024 18:23:33 +0800 Subject: [PATCH] support speed adjustment support speed adjustment --- GPT_SoVITS/inference_webui.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index cc79817..7f37b77 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -312,7 +312,7 @@ def merge_short_text_in_array(texts, threshold): result[len(result) - 1] += text return result -def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False): +def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False,speed=1): if prompt_text is None or len(prompt_text) == 0: ref_free = True t0 = ttime() @@ -421,7 +421,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, # audio = vq_model.decode(pred_semantic, all_phoneme_ids, refer).detach().cpu().numpy()[0, 0] audio = ( vq_model.decode( - pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refer + pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refer,speed=speed ) .detach() .cpu() @@ -623,15 +623,17 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) with gr.Row(): gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低):")) - top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True) + top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=10,interactive=True) top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True) temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True) + with gr.Row(): + speed = gr.Slider(minimum=0.5,maximum=2,step=0.05,label=i18n("speed"),value=1,interactive=True) inference_button = gr.Button(i18n("合成语音"), variant="primary") output = gr.Audio(label=i18n("输出的语音")) inference_button.click( get_tts_wav, - [inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free], + [inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed], [output], )