From 3a86edeff9cfba69c326e2ea91986e6ba094f98d Mon Sep 17 00:00:00 2001 From: XL Date: Thu, 3 Jul 2025 11:06:56 +0800 Subject: [PATCH] tts api --- GPT_SoVITS/configs/tts_infer.yaml | 26 +++++++++++++++++++++----- GPT_SoVITS/inference_webui_api.py | 16 +++++++++++++--- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/GPT_SoVITS/configs/tts_infer.yaml b/GPT_SoVITS/configs/tts_infer.yaml index 20c41a20..1ae466c9 100644 --- a/GPT_SoVITS/configs/tts_infer.yaml +++ b/GPT_SoVITS/configs/tts_infer.yaml @@ -1,11 +1,11 @@ custom: bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cuda - is_half: true - t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt - version: v2 - vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth + device: cpu + is_half: false + t2s_weights_path: GPT_weights_v2ProPlus/111-e15.ckpt + version: v2Pro + vits_weights_path: SoVITS_weights_v2ProPlus/111_e8_s136.pth v1: bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base @@ -22,6 +22,22 @@ v2: t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt version: v2 vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth +v2Pro: + bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large + cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base + device: cpu + is_half: false + t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt + version: v2Pro + vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2Pro.pth +v2ProPlus: + bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large + cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base + device: cpu + is_half: false + t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt + version: v2ProPlus + vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2ProPlus.pth v3: bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base diff --git a/GPT_SoVITS/inference_webui_api.py b/GPT_SoVITS/inference_webui_api.py index bd6a56ea..5dbab69d 100644 --- a/GPT_SoVITS/inference_webui_api.py +++ b/GPT_SoVITS/inference_webui_api.py @@ -18,6 +18,8 @@ import io import traceback import wave import torch +import numpy as np +from fastapi.responses import StreamingResponse now_dir = os.getcwd() sys.path.append(now_dir) @@ -52,8 +54,6 @@ from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config from tools.assets import css, js, top_html from tools.i18n.i18n import I18nAuto, scan_language_list -import numpy as np -from fastapi.responses import StreamingResponse language = os.environ.get("language", "Auto") language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language @@ -161,6 +161,7 @@ def inference( sample_steps, super_sampling, ): + seed = -1 if keep_random else seed actual_seed = seed if seed not in [-1, "", None] else random.randint(0, 2**32 - 1) inputs = { @@ -186,13 +187,20 @@ def inference( "super_sampling": super_sampling, } + logging.info( f"inference_button请求耗时: {inputs}" ) - try: + + start_time = time.time() + for item in tts_pipeline.run(inputs): yield item, actual_seed + + logging.info( + f"TTS请求耗时: {time.time() - start_time:.3f}s | 文本: {text}" + ) except NO_PROMPT_ERROR: gr.Warning(i18n("V3不支持无参考文本模式,请填写参考文本!")) @@ -429,6 +437,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css inference_button = gr.Button(i18n("合成语音"), variant="primary") stop_infer = gr.Button(i18n("终止合成"), variant="primary") + inference_button.click( inference, [ @@ -509,6 +518,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css cut_text.click(to_cut, [text_inp, _how_to_cut], [text_opt]) gr.Markdown(value=i18n("后续将支持转音素、手工修改音素、语音合成分步执行。")) + from fastapi import FastAPI, UploadFile, File, Form from fastapi.responses import FileResponse import tempfile