diff --git a/.gitignore b/.gitignore index d280e459..a277b2ac 100644 --- a/.gitignore +++ b/.gitignore @@ -193,3 +193,8 @@ cython_debug/ # PyPI configuration file .pypirc +/.vs +/GPT_SoVITS/configs/tts_infer.yaml +/GPT_SoVITS/configs/infer_settings.json +/last_selected_preset.json +/last_selected_models.json diff --git a/GPT_SoVITS/configs/tts_infer.yaml b/GPT_SoVITS/configs/tts_infer.yaml deleted file mode 100644 index f31061cc..00000000 --- a/GPT_SoVITS/configs/tts_infer.yaml +++ /dev/null @@ -1,56 +0,0 @@ -custom: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cuda - is_half: true - t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt - version: v2 - vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth -v1: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cpu - is_half: false - t2s_weights_path: GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt - version: v1 - vits_weights_path: GPT_SoVITS/pretrained_models/s2G488k.pth -v2: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cpu - is_half: false - t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt - version: v2 - vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth -v2Pro: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cpu - is_half: false - t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt - version: v2Pro - vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2Pro.pth -v2ProPlus: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cpu - is_half: false - t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt - version: v2ProPlus - vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2ProPlus.pth -v3: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cpu - is_half: false - t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt - version: v3 - vits_weights_path: GPT_SoVITS/pretrained_models/s2Gv3.pth -v4: - bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large - cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base - device: cpu - is_half: false - t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt - version: v4 - vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py index 92d145b3..53f6bf69 100644 --- a/GPT_SoVITS/inference_webui_fast.py +++ b/GPT_SoVITS/inference_webui_fast.py @@ -1,18 +1,26 @@ +# -*- coding: utf-8 -*- """ -按中英混合识别 -按日英混合识别 -多语种启动切分识别语种 -全部按中文识别 -全部按英文识别 -全部按日文识别 +GPT-SoVITS WebUI 精简版 +保留功能:模型持久化、参考音频持久化、推理参数持久化、记住最后选中预设 +核心优化:抽离持久化逻辑到 persistence_tools.py,主文件大幅精简,结构清晰 """ import psutil import os +import sys +import json +import yaml +import random +import re +import shutil +from pathlib import Path +import torch +import gradio as gr + +# 设置进程优先级(仅Windows有效) def set_high_priority(): - """把当前 Python 进程设为 HIGH_PRIORITY_CLASS""" if os.name != "nt": - return # 仅 Windows 有效 + return p = psutil.Process(os.getpid()) try: p.nice(psutil.HIGH_PRIORITY_CLASS) @@ -20,19 +28,23 @@ def set_high_priority(): except psutil.AccessDenied: print("权限不足,无法修改优先级(请用管理员运行)") set_high_priority() -import json -import logging -import os -import random -import re -import sys -import torch +# ===================== 导入自定义持久化工具类(核心精简关键) ===================== +from persistence_tools import ( + init_last_selected_models, read_last_selected_models, write_last_selected_models, + read_last_selected_preset, write_last_selected_preset, clear_last_selected_preset, + load_ref_presets, get_preset_by_name, save_ref_preset_core, delete_ref_preset_core, + load_infer_settings, save_infer_settings_core, restore_default_infer_settings_core, + REF_AUDIO_DIR +) +# ===================== 原有核心依赖导入 ===================== now_dir = os.getcwd() sys.path.append(now_dir) sys.path.append("%s/GPT_SoVITS" % (now_dir)) +# 屏蔽无关日志 +import logging logging.getLogger("markdown_it").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("httpcore").setLevel(logging.ERROR) @@ -41,11 +53,9 @@ logging.getLogger("asyncio").setLevel(logging.ERROR) logging.getLogger("charset_normalizer").setLevel(logging.ERROR) logging.getLogger("torchaudio._extension").setLevel(logging.ERROR) - -infer_ttswebui = os.environ.get("infer_ttswebui", 9872) -infer_ttswebui = int(infer_ttswebui) -is_share = os.environ.get("is_share", "False") -is_share = eval(is_share) +# 配置参数初始化 +infer_ttswebui = int(os.environ.get("infer_ttswebui", 9872)) +is_share = eval(os.environ.get("is_share", "False")) if "_CUDA_VISIBLE_DEVICES" in os.environ: os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] @@ -56,53 +66,53 @@ cnhubert_base_path = os.environ.get("cnhubert_base_path", None) bert_path = os.environ.get("bert_path", None) version = model_version = os.environ.get("version", "v2") -import gradio as gr -from TTS_infer_pack.text_segmentation_method import get_method -from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config +# 标记是否直接打开推理页 +is_direct_launch = (gpt_path is None) and (sovits_path is None) -from tools.assets import css, js, top_html +# 多语言配置 from tools.i18n.i18n import I18nAuto, scan_language_list - language = os.environ.get("language", "Auto") language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language i18n = I18nAuto(language=language) +# TTS 推理核心 +from TTS_infer_pack.text_segmentation_method import get_method +from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config -# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 +# 样式与配置工具 +from tools.assets import css, js, top_html +from config import change_choices, get_weights_names, name2gpt_path, name2sovits_path +from process_ckpt import get_sovits_version_from_path_fast -if torch.cuda.is_available(): - device = "cuda" -# elif torch.backends.mps.is_available(): -# device = "mps" -else: - device = "cpu" - -# is_half = False -# device = "cpu" +# ===================== 全局变量初始化 ===================== +# 设备配置 +device = "cuda" if torch.cuda.is_available() else "cpu" +# 语种字典 dict_language_v1 = { - i18n("中文"): "all_zh", # 全部按中文识别 - i18n("英文"): "en", # 全部按英文识别#######不变 - i18n("日文"): "all_ja", # 全部按日文识别 - i18n("中英混合"): "zh", # 按中英混合识别####不变 - i18n("日英混合"): "ja", # 按日英混合识别####不变 - i18n("多语种混合"): "auto", # 多语种启动切分识别语种 + i18n("中文"): "all_zh", + i18n("英文"): "en", + i18n("日文"): "all_ja", + i18n("中英混合"): "zh", + i18n("日英混合"): "ja", + i18n("多语种混合"): "auto", } dict_language_v2 = { - i18n("中文"): "all_zh", # 全部按中文识别 - i18n("英文"): "en", # 全部按英文识别#######不变 - i18n("日文"): "all_ja", # 全部按日文识别 - i18n("粤语"): "all_yue", # 全部按中文识别 - i18n("韩文"): "all_ko", # 全部按韩文识别 - i18n("中英混合"): "zh", # 按中英混合识别####不变 - i18n("日英混合"): "ja", # 按日英混合识别####不变 - i18n("粤英混合"): "yue", # 按粤英混合识别####不变 - i18n("韩英混合"): "ko", # 按韩英混合识别####不变 - i18n("多语种混合"): "auto", # 多语种启动切分识别语种 - i18n("多语种混合(粤语)"): "auto_yue", # 多语种启动切分识别语种 + i18n("中文"): "all_zh", + i18n("英文"): "en", + i18n("日文"): "all_ja", + i18n("粤语"): "all_yue", + i18n("韩文"): "all_ko", + i18n("中英混合"): "zh", + i18n("日英混合"): "ja", + i18n("粤英混合"): "yue", + i18n("韩英混合"): "ko", + i18n("多语种混合"): "auto", + i18n("多语种混合(粤语)"): "auto_yue", } dict_language = dict_language_v1 if version == "v1" else dict_language_v2 +# 文本切分方法 cut_method = { i18n("不切"): "cut0", i18n("凑四句一切"): "cut1", @@ -112,41 +122,113 @@ cut_method = { i18n("按标点符号切"): "cut5", } -from config import change_choices, get_weights_names, name2gpt_path, name2sovits_path +# V3/V4 标记 +v3v4set = {"v3", "v4"} +# 模型列表初始化 SoVITS_names, GPT_names = get_weights_names() from config import pretrained_sovits_name - path_sovits_v3 = pretrained_sovits_name["v3"] path_sovits_v4 = pretrained_sovits_name["v4"] is_exist_s2gv3 = os.path.exists(path_sovits_v3) is_exist_s2gv4 = os.path.exists(path_sovits_v4) +# TTS 配置与管道初始化 tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml") tts_config.device = device tts_config.is_half = is_half -# tts_config.version = version tts_config.update_version(version) -if gpt_path is not None: - if "!" in gpt_path or "!" in gpt_path: - gpt_path = name2gpt_path[gpt_path] +if gpt_path is not None and "!" not in gpt_path and "!" not in gpt_path: tts_config.t2s_weights_path = gpt_path -if sovits_path is not None: - if "!" in sovits_path or "!" in sovits_path: - sovits_path = name2sovits_path[sovits_path] +if sovits_path is not None and "!" not in sovits_path and "!" not in sovits_path: tts_config.vits_weights_path = sovits_path if cnhubert_base_path is not None: tts_config.cnhuhbert_base_path = cnhubert_base_path if bert_path is not None: tts_config.bert_base_path = bert_path -print(tts_config) tts_pipeline = TTS(tts_config) gpt_path = tts_config.t2s_weights_path sovits_path = tts_config.vits_weights_path version = tts_config.version +# 参考预设全局变量 +ref_presets = load_ref_presets() +preset_names = [p["name"] for p in ref_presets] if ref_presets else [] +# ===================== 精简版辅助函数 ===================== +def custom_sort_key(s): + """自定义排序键(数字自然排序)""" + parts = re.split("(\d+)", s) + return [int(part) if part.isdigit() else part for part in parts] + +def init_ui_preset_config(): + """初始化UI配置(优先加载最后选中的预设)""" + global ref_presets, preset_names + ref_presets = load_ref_presets() + preset_names = [p["name"] for p in ref_presets] if ref_presets else [] + is_interactive = bool(preset_names) + + # 优先读取最后选中的预设 + last_selected = read_last_selected_preset() + default_selected = last_selected if (last_selected and last_selected in preset_names) else (preset_names[0] if preset_names else None) + default_preset = get_preset_by_name(default_selected) + + return ( + gr.update(choices=preset_names, value=default_selected, interactive=is_interactive), + default_preset["name"], + default_preset["ref_audio_path"], + default_preset["prompt_text"], + default_preset["prompt_language"] + ) + +def update_popup_text(preset_name, is_delete): + """更新弹窗提示文本""" + preset_name = preset_name.strip() + if is_delete: + return gr.update(value=i18n(f"确定要删除配置「{preset_name}」吗?删除后无法恢复!")) + else: + return gr.update(value=i18n(f"配置「{preset_name}」已存在,确定要覆盖吗?覆盖后无法恢复!")) + +def reset_confirm_result(): + """重置确认结果为False""" + return False + +def save_ref_preset_wrapper(preset_name, ref_audio_path, prompt_text, prompt_language, confirm_override=False): + """保存预设包装器(适配Gradio输出)""" + msg, success, new_preset_names = save_ref_preset_core(preset_name, ref_audio_path, prompt_text, prompt_language, confirm_override) + style = gr.update(elem_classes=["config-error-border"]) if not success else gr.update(elem_classes=["config-default-border"]) + dropdown_update = gr.update(choices=new_preset_names, value=preset_name if success and preset_name in new_preset_names else (new_preset_names[0] if new_preset_names else None), interactive=bool(new_preset_names)) + return msg, style, dropdown_update + +def delete_ref_preset_wrapper(preset_name): + """删除预设包装器(适配Gradio输出)""" + msg, new_preset_names, new_selected = delete_ref_preset_core(preset_name) + dropdown_update = gr.update(choices=new_preset_names, value=new_selected, interactive=bool(new_preset_names)) + new_preset = get_preset_by_name(new_selected) + + return ( + msg, + dropdown_update, + new_preset["name"], + new_preset["ref_audio_path"], + new_preset["prompt_text"], + new_preset["prompt_language"] + ) + +def on_preset_selected(preset_name): + """预设切换回调(记录最后选中)""" + if not preset_name or not ref_presets: + return "", None, "", i18n("中文") + preset = get_preset_by_name(preset_name) + audio_path = preset["ref_audio_path"] + + # 记录最后选中的预设 + write_last_selected_preset(preset_name) + + return preset["name"], audio_path, preset["prompt_text"], preset["prompt_language"] + +# ===================== 推理核心函数 ===================== def inference( text, text_lang, @@ -170,8 +252,11 @@ def inference( sample_steps, super_sampling, ): + """语音合成推理核心""" seed = -1 if keep_random else seed actual_seed = seed if seed not in [-1, "", None] else random.randint(0, 2**32 - 1) + ref_audio_path = ref_audio_path if ref_audio_path else "" + inputs = { "text": text, "text_lang": dict_language[text_lang], @@ -194,89 +279,64 @@ def inference( "sample_steps": int(sample_steps), "super_sampling": super_sampling, } + try: for item in tts_pipeline.run(inputs): yield item, actual_seed except NO_PROMPT_ERROR: - gr.Warning(i18n("V3不支持无参考文本模式,请填写参考文本!")) + return i18n("V3不支持无参考文本模式,请填写参考文本!") - -def custom_sort_key(s): - # 使用正则表达式提取字符串中的数字部分和非数字部分 - parts = re.split("(\d+)", s) - # 将数字部分转换为整数,非数字部分保持不变 - parts = [int(part) if part.isdigit() else part for part in parts] - return parts - - -if os.path.exists("./weight.json"): - pass -else: - with open("./weight.json", "w", encoding="utf-8") as file: - json.dump({"GPT": {}, "SoVITS": {}}, file) - -with open("./weight.json", "r", encoding="utf-8") as file: - weight_data = file.read() - weight_data = json.loads(weight_data) - gpt_path = os.environ.get("gpt_path", weight_data.get("GPT", {}).get(version, GPT_names[-1])) - sovits_path = os.environ.get("sovits_path", weight_data.get("SoVITS", {}).get(version, SoVITS_names[0])) - if isinstance(gpt_path, list): - gpt_path = gpt_path[0] - if isinstance(sovits_path, list): - sovits_path = sovits_path[0] - -from process_ckpt import get_sovits_version_from_path_fast - -v3v4set = {"v3", "v4"} - - -def change_sovits_weights(sovits_path, prompt_language=None, text_language=None): +# ===================== 模型切换函数(保留预设记忆) ===================== +def change_sovits_weights(sovits_path, current_gpt_selected, prompt_language=None, text_language=None): + """切换SoVITS模型(保留最后选中预设)""" if "!" in sovits_path or "!" in sovits_path: sovits_path = name2sovits_path[sovits_path] - global version, model_version, dict_language, if_lora_v3 + + global version, model_version, dict_language, if_lora_v3, ref_presets, preset_names version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path) - # print(sovits_path,version, model_version, if_lora_v3) is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4 path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4 - if if_lora_v3 == True and is_exist == False: + + # 底模缺失校验 + if if_lora_v3 and not is_exist: info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重") - gr.Warning(info) - raise FileExistsError(info) + return i18n(info) + + # 更新语种字典与预设 dict_language = dict_language_v1 if version == "v1" else dict_language_v2 - if prompt_language is not None and text_language is not None: - if prompt_language in list(dict_language.keys()): - prompt_text_update, prompt_language_update = ( - {"__type__": "update"}, - {"__type__": "update", "value": prompt_language}, - ) - else: - prompt_text_update = {"__type__": "update", "value": ""} - prompt_language_update = {"__type__": "update", "value": i18n("中文")} - if text_language in list(dict_language.keys()): - text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language} - else: - text_update = {"__type__": "update", "value": ""} - text_language_update = {"__type__": "update", "value": i18n("中文")} - if model_version in v3v4set: - visible_sample_steps = True - visible_inp_refs = False - else: - visible_sample_steps = False - visible_inp_refs = True - yield ( - {"__type__": "update", "choices": list(dict_language.keys())}, - {"__type__": "update", "choices": list(dict_language.keys())}, - prompt_text_update, - prompt_language_update, - text_update, - text_language_update, - {"__type__": "update", "interactive": visible_sample_steps, "value": 32}, - {"__type__": "update", "visible": visible_inp_refs}, - {"__type__": "update", "interactive": True if model_version not in v3v4set else False}, - {"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False}, - ) - - tts_pipeline.init_vits_weights(sovits_path) + ref_presets = load_ref_presets() + preset_names = [p["name"] for p in ref_presets] if ref_presets else [] + + # 恢复最后选中的预设 + last_selected = read_last_selected_preset() + default_selected = last_selected if (last_selected and last_selected in preset_names) else (preset_names[0] if preset_names else None) + + # 更新模型配置文件 + if is_direct_launch: + valid_gpt_path = current_gpt_selected if (current_gpt_selected in GPT_names and os.path.exists(current_gpt_selected)) else GPT_names[-1] + write_last_selected_models(valid_gpt_path, sovits_path, version) + + # 初始化返回值 + if prompt_language is None or text_language is None: + return + + # 语种兼容性校验 + prompt_text_update, prompt_language_update = {"__type__": "update"}, {"__type__": "update", "value": prompt_language} + if prompt_language not in list(dict_language.keys()): + prompt_text_update = {"__type__": "update", "value": ""} + prompt_language_update = {"__type__": "update", "value": i18n("中文")} + + text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language} + if text_language not in list(dict_language.keys()): + text_update = {"__type__": "update", "value": ""} + text_language_update = {"__type__": "update", "value": i18n("中文")} + + # V3/V4 特殊配置 + visible_sample_steps = model_version in v3v4set + visible_inp_refs = not visible_sample_steps + ref_text_free_interactive = model_version not in v3v4set + + # 加载中状态 yield ( {"__type__": "update", "choices": list(dict_language.keys())}, {"__type__": "update", "choices": list(dict_language.keys())}, @@ -286,34 +346,176 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None) text_language_update, {"__type__": "update", "interactive": visible_sample_steps, "value": 32}, {"__type__": "update", "visible": visible_inp_refs}, - {"__type__": "update", "interactive": True if model_version not in v3v4set else False}, - {"__type__": "update", "value": i18n("合成语音"), "interactive": True}, + {"__type__": "update", "interactive": ref_text_free_interactive}, + {"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False}, + gr.update(choices=preset_names, value=default_selected, interactive=bool(preset_names)), ) - with open("./weight.json") as f: - data = f.read() - data = json.loads(data) - data["SoVITS"][version] = sovits_path + + # 加载模型权重 + tts_pipeline.init_vits_weights(sovits_path) + + # 加载完成状态 + yield ( + {"__type__": "update", "choices": list(dict_language.keys())}, + {"__type__": "update", "choices": list(dict_language.keys())}, + prompt_text_update, + prompt_language_update, + text_update, + text_language_update, + {"__type__": "update", "interactive": visible_sample_steps, "value": 32}, + {"__type__": "update", "visible": visible_inp_refs}, + {"__type__": "update", "interactive": ref_text_free_interactive}, + {"__type__": "update", "value": i18n("合成语音"), "interactive": True}, + gr.update(choices=preset_names, value=default_selected, interactive=bool(preset_names)), + ) + + # 更新 weight.json + with open("./weight.json", "r") as f: + data = json.loads(f.read()) + data["SoVITS"][version] = sovits_path with open("./weight.json", "w") as f: - f.write(json.dumps(data)) - + json.dump(data, f) def change_gpt_weights(gpt_path): + """切换GPT模型""" if "!" in gpt_path or "!" in gpt_path: gpt_path = name2gpt_path[gpt_path] tts_pipeline.init_t2s_weights(gpt_path) + + if is_direct_launch: + current_sovits_path = sovits_path if 'sovits_path' in globals() else SoVITS_names[0] + current_version = version if 'version' in globals() else "v2" + write_last_selected_models(gpt_path, current_sovits_path, current_version) - -with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app: - gr.HTML( - top_html.format( - i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") - + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") - ), - elem_classes="markdown", +# ===================== 推理参数持久化包装函数 ===================== +def init_infer_settings(): + """初始化推理参数""" + settings = load_infer_settings() + return ( + settings["batch_size"], + settings["sample_steps"], + settings["fragment_interval"], + settings["speed_factor"], + settings["top_k"], + settings["top_p"], + settings["temperature"], + settings["repetition_penalty"], + settings["how_to_cut"], + settings["super_sampling"], + settings["parallel_infer"], + settings["split_bucket"], + settings["seed"], + settings["keep_random"] ) +def save_infer_settings_wrapper(batch_size, sample_steps, fragment_interval, speed_factor, + top_k, top_p, temperature, repetition_penalty, how_to_cut, + super_sampling, parallel_infer, split_bucket, seed, keep_random): + """保存推理参数包装器""" + settings = { + "batch_size": batch_size, + "sample_steps": sample_steps, + "fragment_interval": fragment_interval, + "speed_factor": speed_factor, + "top_k": top_k, + "top_p": top_p, + "temperature": temperature, + "repetition_penalty": repetition_penalty, + "how_to_cut": how_to_cut, + "super_sampling": super_sampling, + "parallel_infer": parallel_infer, + "split_bucket": split_bucket, + "seed": -1 if keep_random else seed, + "keep_random": keep_random + } + return save_infer_settings_core(settings) + +# ===================== Gradio UI 构建(精简版) ===================== +custom_css = """ +/* 保存失败红边框样式(强制覆盖默认样式) */ +.config-error-border { + border: 2px solid #ff3b30 !important; + border-radius: 4px !important; + padding: 6px !important; +} +/* 保存成功/默认边框样式(还原原有样式) */ +.config-default-border { + border: 1px solid #e5e7eb !important; + border-radius: 4px !important; + padding: 6px !important; +} +/* 模拟弹窗样式:紧凑居中+自动换行+窄宽度 */ +.simulated-popup { + position: fixed; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + background: white; + border: 1px solid #e5e7eb; + border-radius: 8px; + box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1); + padding: 15px; + z-index: 9999; + min-width: 300px; + max-width: 350px; + text-align: center; +} +/* 弹窗文本自动换行 */ +.simulated-popup .markdown-text { + word-wrap: break-word; + line-height: 1.5; + color: #333; +} +/* 弹窗遮罩层 */ +.popup-mask { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background: rgba(0, 0, 0, 0.4); + z-index: 9998; +} +/* 按钮区域:紧凑间距+适配宽度 */ +.popup-buttons { + margin-top: 15px; + display: flex; + justify-content: center; + gap: 10px; +} +/* 按钮样式优化:适配弹窗宽度 */ +.popup-buttons button { + min-width: 80px; + padding: 6px 12px; +} +.btn-group-spacing { + display: flex !important; + flex-direction: row !important; + gap: 10px !important; + padding:5px 10px 5px 10px; + align-items: center !important; + justify-content: flex-start !important; +} +.btn-group-spacing button{ + border-radius:5px; +} +""" + +with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css + custom_css) as app: + # 状态变量 + current_preset = gr.State("") + confirm_flag = gr.State(False) + infer_confirm_flag = gr.State(False) + infer_restore_flag = gr.State(False) + + # 顶部HTML + gr.HTML(top_html.format( + i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + ), elem_classes="markdown") + + # 模型切换区域 with gr.Column(): - # with gr.Group(): gr.Markdown(value=i18n("模型切换")) with gr.Row(): GPT_dropdown = gr.Dropdown( @@ -331,193 +533,428 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) + # 核心功能区域 with gr.Row(): with gr.Column(): - gr.Markdown(value=i18n("*请上传并填写参考信息")) + gr.Markdown(value=i18n("*请上传并填写参考信息(支持多组预设配置切换/删除,主参考音频为必填项,自动持久化不丢失)")) + + # 预设选择与名称输入 with gr.Row(): - inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath") + preset_dropdown = gr.Dropdown( + label=i18n("选择预设参考配置"), + choices=preset_names, + value=preset_names[0] if preset_names else None, + interactive=bool(preset_names), + ) + preset_name_input = gr.Textbox( + label=i18n("当前配置名称(保存时使用)"), + value="", + placeholder=i18n("填写配置名称,点击保存按钮即可持久化"), + lines=1, + scale=1 + ) + + # 配置提示框 + save_config_msg = gr.Textbox( + label=i18n("配置操作提示"), + value="", + lines=1, + interactive=False, + elem_classes=["config-default-border"] + ) + + # 保存/删除按钮 + with gr.Row(): + save_ref_config_btn = gr.Button(i18n("保存当前参考为选中配置"), variant="primary") + delete_ref_config_btn = gr.Button(i18n("删除当前选中配置"), variant="primary") + + # 音频与文本输入 + with gr.Row(): + inp_ref = gr.Audio( + label=i18n("主参考音频(3~10秒)【必填,自动持久化】"), + type="filepath", + value=None + ) inp_refs = gr.File( - label=i18n("辅参考音频(可选多个,或不选)"), + label=i18n("辅参考音频(可选多个)"), file_count="multiple", visible=True if model_version != "v3" else False, ) - prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2) + + prompt_text = gr.Textbox( + label=i18n("主参考音频的文本【可选】"), + value="", + lines=2 + ) + with gr.Row(): prompt_language = gr.Dropdown( - label=i18n("主参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文") + label=i18n("主参考音频的语种"), + choices=list(dict_language.keys()), + value=i18n("中文") ) - with gr.Column(): - ref_text_free = gr.Checkbox( - label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), - value=False, - interactive=True if model_version != "v3" else False, - show_label=True, - ) - gr.Markdown( - i18n("使用无参考文本模式时建议使用微调的GPT") - + "
" - + i18n("听不清参考音频说的啥(不晓得写啥)可以开。开启后无视填写的参考文本。") - ) + ref_text_free = gr.Checkbox( + label=i18n("开启无参考文本模式"), + value=False, + interactive=True if model_version != "v3" else False, + show_label=True, + ) + + # 模拟弹窗(删除/覆盖) + delete_mask = gr.Column(visible=False, elem_classes=["popup-mask"]) + with gr.Column(visible=False, elem_classes=["simulated-popup"]) as delete_popup: + delete_text = gr.Markdown(value="") + with gr.Row(elem_classes=["popup-buttons"]): + delete_confirm_btn = gr.Button(i18n("确认删除"), variant="primary") + delete_cancel_btn = gr.Button(i18n("取消"), variant="primary") + + override_mask = gr.Column(visible=False, elem_classes=["popup-mask"]) + with gr.Column(visible=False, elem_classes=["simulated-popup"]) as override_popup: + override_text = gr.Markdown(value="") + with gr.Row(elem_classes=["popup-buttons"]): + override_confirm_btn = gr.Button(i18n("确认覆盖"), variant="primary") + override_cancel_btn = gr.Button(i18n("取消"), variant="primary") + + # 事件绑定:预设切换 + preset_dropdown.change( + fn=on_preset_selected, + inputs=[preset_dropdown], + outputs=[preset_name_input, inp_ref, prompt_text, prompt_language] + ) + + # 事件绑定:保存预设 + save_ref_config_btn.click( + fn=lambda pname: (pname.strip(), any(p["name"].strip() == pname.strip() for p in ref_presets)), + inputs=[preset_name_input], + outputs=[current_preset, confirm_flag] + ).then( + fn=lambda exists, pname: ( + gr.update(visible=exists), + gr.update(visible=exists), + update_popup_text(pname, False) + ), + inputs=[confirm_flag, preset_name_input], + outputs=[override_mask, override_popup, override_text] + ).then( + fn=save_ref_preset_wrapper, + inputs=[preset_name_input, inp_ref, prompt_text, prompt_language, confirm_flag], + outputs=[save_config_msg, save_config_msg, preset_dropdown] + ) + + # 事件绑定:覆盖确认 + override_confirm_btn.click( + fn=lambda: (True, gr.update(visible=False), gr.update(visible=False)), + inputs=[], + outputs=[confirm_flag, override_mask, override_popup] + ).then( + fn=save_ref_preset_wrapper, + inputs=[preset_name_input, inp_ref, prompt_text, prompt_language, confirm_flag], + outputs=[save_config_msg, save_config_msg, preset_dropdown] + ).then( + fn=reset_confirm_result, + inputs=[], + outputs=[confirm_flag] + ) + + # 事件绑定:覆盖取消 + override_cancel_btn.click( + fn=lambda: (False, gr.update(visible=False), gr.update(visible=False), i18n("覆盖操作已取消")), + inputs=[], + outputs=[confirm_flag, override_mask, override_popup, save_config_msg] + ) + + # 事件绑定:删除预设 + delete_ref_config_btn.click( + fn=lambda pname: (pname, gr.update(visible=True), gr.update(visible=True), update_popup_text(pname, True)), + inputs=[preset_dropdown], + outputs=[current_preset, delete_mask, delete_popup, delete_text] + ) + + # 事件绑定:删除确认 + delete_confirm_btn.click( + fn=lambda: (True, gr.update(visible=False), gr.update(visible=False)), + inputs=[], + outputs=[confirm_flag, delete_mask, delete_popup] + ).then( + fn=delete_ref_preset_wrapper, + inputs=[preset_dropdown], + outputs=[save_config_msg, preset_dropdown, preset_name_input, inp_ref, prompt_text, prompt_language] + ).then( + fn=reset_confirm_result, + inputs=[], + outputs=[confirm_flag] + ) + + # 事件绑定:删除取消 + delete_cancel_btn.click( + fn=lambda pname: (False, gr.update(visible=False), gr.update(visible=False), i18n("删除操作已取消")), + inputs=[preset_dropdown], + outputs=[confirm_flag, delete_mask, delete_popup, save_config_msg] + ) with gr.Column(): gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式")) + + # 操作教程提示 + gr.HTML(''' +
+

GPT-SoVITS 新增功能说明

+
+ 1.默认模型:进入推理页面时,自动加载最后一次使用的模型。 +
+
+ 2.参考音频:自动保存到 GPT_SoVITS/ref_audios/,多组预设可切换,重启不丢失。 +
+
+ 3.推理参数:可保存自定义参数,恢复默认值,无需每次重新调整。 +
+
+ ''') + + # 目标文本输入 text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=20, max_lines=20) text_language = gr.Dropdown( - label=i18n("需要合成的文本的语种"), choices=list(dict_language.keys()), value=i18n("中文") + label=i18n("需要合成的文本的语种"), + choices=list(dict_language.keys()), + value=i18n("中文") ) + # 推理设置区域 with gr.Group(): gr.Markdown(value=i18n("推理设置")) with gr.Row(): with gr.Column(): with gr.Row(): - batch_size = gr.Slider( - minimum=1, maximum=200, step=1, label=i18n("batch_size"), value=20, interactive=True - ) - sample_steps = gr.Radio( - label=i18n("采样步数(仅对V3/4生效)"), value=32, choices=[4, 8, 16, 32, 64, 128], visible=True - ) + batch_size = gr.Slider(minimum=1, maximum=200, step=1, label=i18n("batch_size"), value=20, interactive=True) + sample_steps = gr.Radio(label=i18n("采样步数(仅V3/4生效)"), value=32, choices=[4, 8, 16, 32, 64, 128], visible=True) with gr.Row(): - fragment_interval = gr.Slider( - minimum=0.01, maximum=1, step=0.01, label=i18n("分段间隔(秒)"), value=0.3, interactive=True - ) - speed_factor = gr.Slider( - minimum=0.6, maximum=1.65, step=0.05, label="语速", value=1.0, interactive=True - ) + fragment_interval = gr.Slider(minimum=0.01, maximum=1, step=0.01, label=i18n("分段间隔(秒)"), value=0.2, interactive=True) + speed_factor = gr.Slider(minimum=0.6, maximum=1.65, step=0.05, label="语速", value=1.0, interactive=True) with gr.Row(): - top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n("top_k"), value=15, interactive=True) + top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n("top_k"), value=5, interactive=True) top_p = gr.Slider(minimum=0, maximum=1, step=0.05, label=i18n("top_p"), value=1, interactive=True) with gr.Row(): - temperature = gr.Slider( - minimum=0, maximum=1, step=0.05, label=i18n("temperature"), value=1, interactive=True - ) - repetition_penalty = gr.Slider( - minimum=0, maximum=2, step=0.05, label=i18n("重复惩罚"), value=1.35, interactive=True - ) + temperature = gr.Slider(minimum=0, maximum=1, step=0.05, label=i18n("temperature"), value=1, interactive=True) + repetition_penalty = gr.Slider(minimum=0, maximum=2, step=0.05, label=i18n("重复惩罚"), value=1.35, interactive=True) with gr.Column(): with gr.Row(): how_to_cut = gr.Dropdown( label=i18n("怎么切"), - choices=[ - i18n("不切"), - i18n("凑四句一切"), - i18n("凑50字一切"), - i18n("按中文句号。切"), - i18n("按英文句号.切"), - i18n("按标点符号切"), - ], + choices=list(cut_method.keys()), value=i18n("凑四句一切"), interactive=True, scale=1, ) - super_sampling = gr.Checkbox( - label=i18n("音频超采样(仅对V3生效))"), value=False, interactive=True, show_label=True - ) + super_sampling = gr.Checkbox(label=i18n("音频超采样(仅V3生效)"), value=False, interactive=True, show_label=True) with gr.Row(): parallel_infer = gr.Checkbox(label=i18n("并行推理"), value=True, interactive=True, show_label=True) - split_bucket = gr.Checkbox( - label=i18n("数据分桶(并行推理时会降低一点计算量)"), - value=True, - interactive=True, - show_label=True, - ) + split_bucket = gr.Checkbox(label=i18n("数据分桶"), value=True, interactive=True, show_label=True) with gr.Row(): seed = gr.Number(label=i18n("随机种子"), value=-1) keep_random = gr.Checkbox(label=i18n("保持随机"), value=True, interactive=True, show_label=True) output = gr.Audio(label=i18n("输出的语音")) - with gr.Row(): - inference_button = gr.Button(i18n("合成语音"), variant="primary") - stop_infer = gr.Button(i18n("终止合成"), variant="primary") + + # 推理操作按钮 + with gr.Row(): + # ========== 新增:两个按钮(保存推理设置、恢复默认设置) ========== + with gr.Row(elem_classes=["btn-group-spacing"]): + save_infer_settings_btn = gr.Button(i18n("保存推理设置"), variant="primary") + restore_default_settings_btn = gr.Button(i18n("恢复默认设置"), variant="primary") + with gr.Row(elem_classes=["btn-group-spacing"]): + inference_button = gr.Button(i18n("合成语音"), variant="primary") + stop_infer = gr.Button(i18n("终止合成"), variant="primary") + # ===================== 推理保存确认弹窗(原有,保持不变) ===================== + infer_save_mask = gr.Column(visible=False, elem_classes=["popup-mask"]) + with gr.Column(visible=False, elem_classes=["simulated-popup"]) as infer_save_popup: + infer_save_text = gr.Markdown(value=i18n("确定要保存当前推理设置吗?保存后将覆盖原有配置!")) + with gr.Row(elem_classes=["popup-buttons"]): + infer_save_confirm_btn = gr.Button(i18n("确认保存"), variant="primary") + infer_save_cancel_btn = gr.Button(i18n("取消"), variant="primary") + + # ===================== 【新增】:推理恢复默认确认弹窗 ===================== + infer_restore_mask = gr.Column(visible=False, elem_classes=["popup-mask"]) + with gr.Column(visible=False, elem_classes=["simulated-popup"]) as infer_restore_popup: + infer_restore_text = gr.Markdown(value=i18n("确定要恢复推理设置为默认值吗?所有自定义参数将被覆盖,且无法恢复!")) + with gr.Row(elem_classes=["popup-buttons"]): + infer_restore_confirm_btn = gr.Button(i18n("确认恢复"), variant="primary") + infer_restore_cancel_btn = gr.Button(i18n("取消"), variant="primary") + + # 推理保存专用提示框(原有,保持不变) + infer_save_msg = gr.Textbox( + label=i18n("推理设置操作提示"), + value="", + lines=1, + interactive=False, + elem_classes=["config-default-border"] + ) + + # 事件绑定:保存推理参数 + save_infer_settings_btn.click( + fn=lambda: (gr.update(visible=True), gr.update(visible=True)), + inputs=[], + outputs=[infer_save_mask, infer_save_popup] + ) + + infer_save_confirm_btn.click( + fn=lambda: (True, gr.update(visible=False), gr.update(visible=False)), + inputs=[], + outputs=[infer_confirm_flag, infer_save_mask, infer_save_popup] + ).then( + fn=save_infer_settings_wrapper, + inputs=[batch_size, sample_steps, fragment_interval, speed_factor, + top_k, top_p, temperature, repetition_penalty, how_to_cut, + super_sampling, parallel_infer, split_bucket, seed, keep_random], + outputs=[infer_save_msg] + ).then( + fn=lambda: False, + inputs=[], + outputs=[infer_confirm_flag] + ) + + infer_save_cancel_btn.click( + fn=lambda: (False, gr.update(visible=False), gr.update(visible=False), i18n("保存操作已取消")), + inputs=[], + outputs=[infer_confirm_flag, infer_save_mask, infer_save_popup, infer_save_msg] + ) + + # 事件绑定:恢复默认推理参数 + restore_default_settings_btn.click( + fn=lambda: (gr.update(visible=True), gr.update(visible=True)), + inputs=[], + outputs=[infer_restore_mask, infer_restore_popup] + ) + + infer_restore_confirm_btn.click( + fn=lambda: (True, gr.update(visible=False), gr.update(visible=False)), + inputs=[], + outputs=[infer_restore_flag, infer_restore_mask, infer_restore_popup] + ).then( + fn=restore_default_infer_settings_core, + inputs=[], + outputs=[batch_size, sample_steps, fragment_interval, speed_factor, + top_k, top_p, temperature, repetition_penalty, how_to_cut, + super_sampling, parallel_infer, split_bucket, seed, keep_random] + ).then( + fn=lambda: i18n("推理设置已恢复为默认值!"), + inputs=[], + outputs=[infer_save_msg] + ).then( + fn=lambda: False, + inputs=[], + outputs=[infer_restore_flag] + ) + + infer_restore_cancel_btn.click( + fn=lambda: (False, gr.update(visible=False), gr.update(visible=False), i18n("恢复操作已取消")), + inputs=[], + outputs=[infer_restore_flag, infer_restore_mask, infer_restore_popup, infer_save_msg] + ) + + # 事件绑定:合成语音 inference_button.click( inference, - [ - text, - text_language, - inp_ref, - inp_refs, - prompt_text, - prompt_language, - top_k, - top_p, - temperature, - how_to_cut, - batch_size, - speed_factor, - ref_text_free, - split_bucket, - fragment_interval, - seed, - keep_random, - parallel_infer, - repetition_penalty, - sample_steps, - super_sampling, - ], - [output, seed], + inputs=[text, text_language, inp_ref, inp_refs, prompt_text, prompt_language, + top_k, top_p, temperature, how_to_cut, batch_size, speed_factor, + ref_text_free, split_bucket, fragment_interval, seed, keep_random, + parallel_infer, repetition_penalty, sample_steps, super_sampling], + outputs=[output, seed] ) + + # 事件绑定:终止合成 stop_infer.click(tts_pipeline.stop, [], []) + + # 事件绑定:SoVITS模型切换 SoVITS_dropdown.change( change_sovits_weights, - [SoVITS_dropdown, prompt_language, text_language], - [ - prompt_language, - text_language, - prompt_text, - prompt_language, - text, - text_language, - sample_steps, - inp_refs, - ref_text_free, - inference_button, - ], - ) # + inputs=[SoVITS_dropdown, GPT_dropdown, prompt_language, text_language], + outputs=[prompt_language, text_language, prompt_text, prompt_language, + text, text_language, sample_steps, inp_refs, ref_text_free, + inference_button, preset_dropdown] + ) + + # 事件绑定:GPT模型切换 GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], []) + # 文本切分工具 with gr.Group(): - gr.Markdown( - value=i18n( - "文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。" - ) - ) + gr.Markdown(value=i18n("文本切分工具(太长的文本建议先切分)")) with gr.Row(): - text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="", lines=4) - with gr.Column(): - _how_to_cut = gr.Radio( - label=i18n("怎么切"), - choices=[ - i18n("不切"), - i18n("凑四句一切"), - i18n("凑50字一切"), - i18n("按中文句号。切"), - i18n("按英文句号.切"), - i18n("按标点符号切"), - ], - value=i18n("凑四句一切"), - interactive=True, - ) - cut_text = gr.Button(i18n("切分"), variant="primary") + text_inp = gr.Textbox(label=i18n("切分前文本"), value="", lines=4) + _how_to_cut = gr.Radio(label=i18n("怎么切"), choices=list(cut_method.keys()), value=i18n("凑四句一切"), interactive=True) + cut_text = gr.Button(i18n("切分"), variant="primary") + text_opt = gr.Textbox(label=i18n("切分后文本"), value="", lines=4) def to_cut(text_inp, how_to_cut): - if len(text_inp.strip()) == 0 or text_inp == []: + if not text_inp.strip(): return "" method = get_method(cut_method[how_to_cut]) return method(text_inp) - text_opt = gr.Textbox(label=i18n("切分后文本"), value="", lines=4) cut_text.click(to_cut, [text_inp, _how_to_cut], [text_opt]) - gr.Markdown(value=i18n("后续将支持转音素、手工修改音素、语音合成分步执行。")) + # 页面加载初始化 + app.load(fn=init_ui_preset_config, inputs=[], outputs=[preset_dropdown, preset_name_input, inp_ref, prompt_text, prompt_language]) + app.load(fn=init_infer_settings, inputs=[], outputs=[batch_size, sample_steps, fragment_interval, speed_factor, + top_k, top_p, temperature, repetition_penalty, how_to_cut, + super_sampling, parallel_infer, split_bucket, seed, keep_random]) + +# ===================== 应用入口 ===================== if __name__ == "__main__": - app.queue().launch( # concurrency_count=511, max_size=1022 + # 初始化 weight.json + if not os.path.exists("./weight.json"): + with open("./weight.json", "w", encoding="utf-8") as file: + json.dump({"GPT": {}, "SoVITS": {}}, file) + + # 直接打开推理页的模型配置初始化 + if is_direct_launch: + default_gpt_path = gpt_path + default_sovits_path = sovits_path + default_version = version + + last_selected_data = read_last_selected_models() + if last_selected_data is None: + init_last_selected_models(default_gpt_path, default_sovits_path, default_version) + else: + # 加载保存的模型配置 + saved_gpt = last_selected_data["gpt_model_path"] + saved_sovits = last_selected_data["sovits_model_path"] + saved_version = last_selected_data["version"] + + if saved_gpt in GPT_names and os.path.exists(saved_gpt): + gpt_path = saved_gpt + if saved_sovits in SoVITS_names and os.path.exists(saved_sovits): + sovits_path = saved_sovits + if saved_version in ["v1", "v2", "v3", "v4"]: + version = saved_version + + # 主动加载模型权重 + valid_gpt_path = gpt_path if (gpt_path in GPT_names and os.path.exists(gpt_path)) else GPT_names[-1] + if "!" in valid_gpt_path or "!" in valid_gpt_path: + valid_gpt_path = name2gpt_path[valid_gpt_path] + tts_pipeline.init_t2s_weights(valid_gpt_path) + + valid_sovits_path = sovits_path if (sovits_path in SoVITS_names and os.path.exists(sovits_path)) else SoVITS_names[0] + if "!" in valid_sovits_path or "!" in valid_sovits_path: + valid_sovits_path = name2sovits_path[valid_sovits_path] + tts_pipeline.init_vits_weights(valid_sovits_path) + + # 更新 weight.json + with open("./weight.json", "r", encoding="utf-8") as f: + weight_data = json.loads(f.read()) + weight_data["GPT"][version] = valid_gpt_path + weight_data["SoVITS"][version] = valid_sovits_path + with open("./weight.json", "w", encoding="utf-8") as f: + json.dump(weight_data, f, ensure_ascii=False, indent=4) + + # 启动Gradio应用 + app.queue().launch( server_name="0.0.0.0", inbrowser=True, share=is_share, server_port=infer_ttswebui, - # quiet=True, - ) + ) \ No newline at end of file diff --git a/GPT_SoVITS/persistence_tools.py b/GPT_SoVITS/persistence_tools.py new file mode 100644 index 00000000..ef1774a3 --- /dev/null +++ b/GPT_SoVITS/persistence_tools.py @@ -0,0 +1,425 @@ +# -*- coding: utf-8 -*- +""" +GPT-SoVITS 持久化工具类 +包含:模型配置、参考音频、推理参数 的持久化读写与管理 +抽离自主文件,减少主文件臃肿,方便后续维护 +""" +import json +import yaml +import hashlib +import os +import shutil +import random +from pathlib import Path + +# ===================== 全局配置(统一管理所有持久化文件路径) ===================== +# 模型持久化配置文件 +LAST_SELECTED_MODELS_JSON = Path("./last_selected_models.json") +# 参考预设最后选中配置文件 +LAST_SELECTED_PRESET_JSON = Path("./last_selected_preset.json") +# 参考音频持久化目录 +REF_AUDIO_DIR = Path("GPT_SoVITS/ref_audios") +# 参考预设配置文件 +REF_PRESETS_YAML = Path("GPT_SoVITS/configs/ref_audios_presets.yaml") +# 推理参数配置文件 +INFER_SETTINGS_JSON = Path("GPT_SoVITS/configs/infer_settings.json") + +# 参考音频配置常量 +MAX_FILENAME_LENGTH = 40 +INVALID_FILE_CHARS = set(r'\/:*?"<>|') + +# 默认推理参数 +DEFAULT_INFER_SETTINGS = { + "batch_size": 20, + "sample_steps": 32, + "fragment_interval": 0.2, + "speed_factor": 1.0, + "top_k": 5, + "top_p": 1.0, + "temperature": 1.0, + "repetition_penalty": 1.35, + "how_to_cut": "凑四句一切", + "super_sampling": False, + "parallel_infer": True, + "split_bucket": True, + "seed": -1, + "keep_random": True +} + +# ===================== 通用工具函数(抽离重复逻辑) ===================== +def sanitize_filename(name): + """清理文件名中的非法字符,替换为下划线""" + if not name: + return "unnamed_preset" + return ''.join(c if c not in INVALID_FILE_CHARS else '_' for c in name) + +def get_audio_md5(file_path, chunk_size=4096): + """计算音频文件的MD5值(取前8位),用于区分不同音频内容""" + if not os.path.exists(file_path): + return "invalid_file" + try: + md5 = hashlib.md5() + with open(file_path, 'rb') as f: + while chunk := f.read(chunk_size): + md5.update(chunk) + return md5.hexdigest()[:8] + except Exception as e: + print(f"计算音频MD5失败:{e}") + return f"err_{random.randint(10000000, 99999999)}" + +def ensure_dir_exists(dir_path): + """确保目录存在,不存在则创建""" + if dir_path and not dir_path.exists(): + dir_path.mkdir(exist_ok=True, parents=True) + +# ===================== 1. 模型配置持久化(last_selected_models.json) ===================== +def init_last_selected_models(gpt_default, sovits_default, current_version): + """初始化模型配置文件,写入默认模型路径""" + ensure_dir_exists(LAST_SELECTED_MODELS_JSON.parent) + init_data = { + "gpt_model_path": gpt_default, + "sovits_model_path": sovits_default, + "version": current_version + } + with open(LAST_SELECTED_MODELS_JSON, "w", encoding="utf-8") as f: + json.dump(init_data, f, ensure_ascii=False, indent=4) + print(f"首次生成模型配置文件:{LAST_SELECTED_MODELS_JSON}") + return init_data + +def read_last_selected_models(): + """读取模型配置文件中的路径""" + if not LAST_SELECTED_MODELS_JSON.exists(): + return None + try: + with open(LAST_SELECTED_MODELS_JSON, "r", encoding="utf-8") as f: + data = json.load(f) + # 校验必要字段 + required_fields = ["gpt_model_path", "sovits_model_path", "version"] + for field in required_fields: + if field not in data: + return None + return data + except Exception as e: + print(f"读取模型配置失败:{e}") + return None + +def write_last_selected_models(gpt_path_new, sovits_path_new, current_version): + """写入新的模型路径到配置文件""" + ensure_dir_exists(LAST_SELECTED_MODELS_JSON.parent) + try: + data = read_last_selected_models() or {} + data["gpt_model_path"] = gpt_path_new + data["sovits_model_path"] = sovits_path_new + data["version"] = current_version + with open(LAST_SELECTED_MODELS_JSON, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + except Exception as e: + print(f"写入模型配置失败:{e}") + +# ===================== 2. 参考音频预设持久化(last_selected_preset.json + ref_audios_presets.yaml) ===================== +# 2.1 最后选中预设的读写清 +def read_last_selected_preset(): + """读取最后一次选中的预设名称""" + if not LAST_SELECTED_PRESET_JSON.exists(): + return None + try: + with open(LAST_SELECTED_PRESET_JSON, "r", encoding="utf-8") as f: + data = json.load(f) + return data.get("last_selected_preset") + except Exception as e: + print(f"读取最后选中预设失败:{e}") + return None + +def write_last_selected_preset(preset_name): + """写入最后一次选中的预设名称""" + ensure_dir_exists(LAST_SELECTED_PRESET_JSON.parent) + try: + data = {"last_selected_preset": preset_name.strip()} + with open(LAST_SELECTED_PRESET_JSON, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + print(f"已记录最后选中的预设:{preset_name.strip()}") + except Exception as e: + print(f"写入最后选中预设失败:{e}") + +def clear_last_selected_preset(): + """清空最后选中的预设记录""" + if not LAST_SELECTED_PRESET_JSON.exists(): + return + try: + with open(LAST_SELECTED_PRESET_JSON, "w", encoding="utf-8") as f: + json.dump({"last_selected_preset": ""}, f, ensure_ascii=False, indent=4) + except Exception as e: + print(f"清空最后选中预设失败:{e}") + +# 2.2 参考预设配置的加载/保存/删除 +def load_ref_presets(): + """加载多组参考预设配置""" + ensure_dir_exists(REF_PRESETS_YAML.parent) + + # 新增:配置文件不存在时,自动创建空文件 + if not REF_PRESETS_YAML.exists(): + with open(REF_PRESETS_YAML, "w", encoding="utf-8") as f: + yaml.dump([], f, indent=4, allow_unicode=True) + print(f"暂未检测到参考预设配置文件,已自动创建空文件:{REF_PRESETS_YAML}") + return [] + + try: + with open(REF_PRESETS_YAML, "r", encoding="utf-8") as f: + presets = yaml.load(f, Loader=yaml.FullLoader) or [] + + # 兼容旧格式转换 + if isinstance(presets, dict): + presets = [{"name": "旧配置转换", "ref_audio_path": presets.get("ref_audio_path"), + "prompt_text": presets.get("prompt_text", ""), "prompt_language": presets.get("prompt_language", "中文")}] + + # 补充缺失字段 + 校验音频路径 + default_template = {"name": "", "ref_audio_path": None, "prompt_text": "", "prompt_language": "中文"} + for preset in presets: + for key, value in default_template.items(): + preset.setdefault(key, value) + # 校验音频路径有效性 + audio_path = preset["ref_audio_path"] + if audio_path and not os.path.exists(str(audio_path)): + preset["ref_audio_path"] = None + + # 清理冗余音频 + clean_unreferenced_audios(presets) + print(f"参考预设加载成功,共 {len(presets)} 组") + return presets + except Exception as e: + print(f"参考预设加载失败:{e}") + return [] + +def get_preset_by_name(preset_name, presets=None): + """根据配置名称查询对应的配置详情""" + # 核心修复:先判断 preset_name 是否为 None,避免 AttributeError + if preset_name is None: + return {"name": "", "ref_audio_path": None, "prompt_text": "", "prompt_language": "中文"} + + if not presets: + presets = load_ref_presets() + + # 现在再调用 strip(),确保 preset_name 不是 None + preset_name_str = preset_name.strip() + for preset in presets: + if preset["name"].strip() == preset_name_str: + return preset + + # 无匹配预设时,返回空的合法预设字典 + return {"name": "", "ref_audio_path": None, "prompt_text": "", "prompt_language": "中文"} + +def save_ref_preset_core(preset_name, ref_audio_path, prompt_text, prompt_language, confirm_override=False): + """保存/覆盖参考预设核心逻辑(返回:提示信息、是否成功、预设列表)""" + ensure_dir_exists(REF_AUDIO_DIR) + presets = load_ref_presets() + preset_name = preset_name.strip() + + # 前置校验 + if not ref_audio_path or not os.path.exists(str(ref_audio_path)): + return "保存失败!请先上传有效的主参考音频文件。", False, [p["name"] for p in presets] + if not preset_name: + return "保存失败!配置名称不能为空。", False, [p["name"] for p in presets] + + # 音频持久化处理 + persistent_audio_path = get_persistent_audio_path(ref_audio_path, preset_name) + if not persistent_audio_path: + return "保存失败!音频文件持久化存储失败。", False, [p["name"] for p in presets] + + # 同名检测 + preset_index = -1 + for idx, p in enumerate(presets): + if p["name"].strip() == preset_name: + preset_index = idx + break + + if preset_index >= 0 and not confirm_override: + return f"配置「{preset_name}」已存在,如需替换请确认覆盖!", False, [p["name"] for p in presets] + + # 构造新配置 + new_preset = { + "name": preset_name, + "ref_audio_path": persistent_audio_path, + "prompt_text": prompt_text, + "prompt_language": prompt_language + } + + # 更新配置列表 + is_new_preset = preset_index < 0 + if preset_index >= 0: + presets[preset_index] = new_preset + tip = "同名配置已覆盖!" + else: + presets.append(new_preset) + tip = "新配置已新增!" + + # 写入配置文件 + try: + with open(REF_PRESETS_YAML, "w", encoding="utf-8") as f: + yaml.dump(presets, f, indent=4, allow_unicode=True) + + # 新增预设自动记录为最后选中 + if is_new_preset: + write_last_selected_preset(preset_name) + + preset_names = [p["name"] for p in presets] + return f"配置保存成功!{tip}", True, preset_names + except Exception as e: + return f"保存失败:{str(e)}", False, [p["name"] for p in presets] + +def delete_ref_preset_core(preset_name): + """删除参考预设核心逻辑(返回:提示信息、预设列表、默认选中预设)""" + presets = load_ref_presets() + preset_name = preset_name.strip() + + if not presets: + return "暂无配置可删除!", [], None + + # 获取待删除音频路径 + target_audio_path = None + for p in presets: + if p["name"].strip() == preset_name: + target_audio_path = p.get("ref_audio_path") + break + + # 过滤删除 + presets = [p for p in presets if p["name"].strip() != preset_name] + + # 写入配置文件 + try: + with open(REF_PRESETS_YAML, "w", encoding="utf-8") as f: + yaml.dump(presets, f, indent=4, allow_unicode=True) + + # 删除对应音频 + if target_audio_path and os.path.exists(target_audio_path): + os.unlink(target_audio_path) + print(f"同步删除配置对应音频:{target_audio_path}") + + # 清空最后选中记录(若删除的是最后选中的预设) + last_selected = read_last_selected_preset() + if last_selected and last_selected == preset_name: + clear_last_selected_preset() + + preset_names = [p["name"] for p in presets] + new_selected = preset_names[0] if preset_names else None + tip = "配置删除成功!已同步清理对应音频文件" if preset_names else "配置删除成功!已同步清理对应音频文件,当前无剩余配置" + return tip, preset_names, new_selected + except Exception as e: + return f"删除失败:{str(e)}", [p["name"] for p in presets], preset_name + +# 2.3 参考音频文件管理 +def get_persistent_audio_path(src_audio_path, preset_name): + """获取音频持久化路径,清理同配置名旧音频""" + if not src_audio_path or not os.path.exists(src_audio_path): + return None + + # 清理文件名 + safe_preset_name = sanitize_filename(preset_name) + safe_preset_name = safe_preset_name[:MAX_FILENAME_LENGTH] + + # 提取后缀 + src_suffix = Path(src_audio_path).suffix.lower() + if not src_suffix or src_suffix not in [".wav", ".mp3", ".flac", ".ogg", ".m4a"]: + src_suffix = ".wav" + + # 计算MD5 + audio_md5 = get_audio_md5(src_audio_path) + dst_filename = f"{safe_preset_name}_{audio_md5}{src_suffix}" + dst_path = REF_AUDIO_DIR / dst_filename + + # 清理同配置名旧音频 + for old_audio in REF_AUDIO_DIR.glob(f"{safe_preset_name}_*"): + if old_audio.suffix.lower() in [".wav", ".mp3", ".flac", ".ogg", ".m4a"]: + try: + old_audio.unlink() + except Exception as e: + print(f"清理旧音频失败:{e}") + + # 复制新音频 + try: + shutil.copy2(src_audio_path, dst_path) + return str(dst_path) + except Exception as e: + print(f"音频持久化复制失败:{e}") + return None + +def clean_unreferenced_audios(presets): + """清理未被任何预设引用的冗余音频""" + if not REF_AUDIO_DIR.exists(): + return + + # 收集已引用音频 + referenced = set() + for preset in presets: + audio_path = preset.get("ref_audio_path") + if audio_path and os.path.exists(audio_path): + referenced.add(Path(audio_path).absolute()) + + # 删除未引用音频 + deleted_count = 0 + for audio_file in REF_AUDIO_DIR.glob("*"): + if audio_file.is_file() and audio_file.suffix.lower() in [".wav", ".mp3", ".flac", ".ogg", ".m4a"]: + if audio_file.absolute() not in referenced: + try: + audio_file.unlink() + deleted_count += 1 + except Exception as e: + print(f"清理冗余音频失败:{e}") + + if deleted_count > 0: + print(f"清理冗余未引用音频 {deleted_count} 个") + +# ===================== 3. 推理参数持久化(infer_settings.json) ===================== +def load_infer_settings(): + """加载推理参数配置""" + ensure_dir_exists(INFER_SETTINGS_JSON.parent) + if not INFER_SETTINGS_JSON.exists(): + return DEFAULT_INFER_SETTINGS + try: + with open(INFER_SETTINGS_JSON, "r", encoding="utf-8") as f: + saved = json.load(f) + return {**DEFAULT_INFER_SETTINGS, **saved} + except Exception as e: + print(f"加载推理参数失败,使用默认值:{e}") + return DEFAULT_INFER_SETTINGS + +def save_infer_settings_core(settings): + """保存推理参数核心逻辑(返回:提示信息)""" + ensure_dir_exists(INFER_SETTINGS_JSON.parent) + try: + with open(INFER_SETTINGS_JSON, "w", encoding="utf-8") as f: + json.dump(settings, f, indent=4, ensure_ascii=False) + + # 精简日志输出 + print(f"✅ 推理配置保存成功:{INFER_SETTINGS_JSON.absolute()}") + return "推理设置保存成功!已覆盖原有配置文件。" + except Exception as e: + print(f"❌ 推理配置保存失败:{e}") + return f"推理设置保存失败:{str(e)}" + +def restore_default_infer_settings_core(): + """恢复推理参数默认值核心逻辑(返回:默认参数列表)""" + ensure_dir_exists(INFER_SETTINGS_JSON.parent) + try: + with open(INFER_SETTINGS_JSON, "w", encoding="utf-8") as f: + json.dump(DEFAULT_INFER_SETTINGS, f, indent=4, ensure_ascii=False) + print(f"✅ 推理配置已恢复默认值:{INFER_SETTINGS_JSON.absolute()}") + except Exception as e: + print(f"❌ 推理配置恢复默认失败:{e}") + + # 返回默认参数(按顺序对应UI组件) + return [ + DEFAULT_INFER_SETTINGS["batch_size"], + DEFAULT_INFER_SETTINGS["sample_steps"], + DEFAULT_INFER_SETTINGS["fragment_interval"], + DEFAULT_INFER_SETTINGS["speed_factor"], + DEFAULT_INFER_SETTINGS["top_k"], + DEFAULT_INFER_SETTINGS["top_p"], + DEFAULT_INFER_SETTINGS["temperature"], + DEFAULT_INFER_SETTINGS["repetition_penalty"], + DEFAULT_INFER_SETTINGS["how_to_cut"], + DEFAULT_INFER_SETTINGS["super_sampling"], + DEFAULT_INFER_SETTINGS["parallel_infer"], + DEFAULT_INFER_SETTINGS["split_bucket"], + DEFAULT_INFER_SETTINGS["seed"], + DEFAULT_INFER_SETTINGS["keep_random"] + ] \ No newline at end of file diff --git a/gowebui_batched_infer.bat b/gowebui_batched_infer.bat new file mode 100644 index 00000000..78035de9 --- /dev/null +++ b/gowebui_batched_infer.bat @@ -0,0 +1,33 @@ +@echo off +:: 1. 切换命令行编码为UTF-8,解决中文显示乱码(必须放在最前面) +chcp 65001 > nul + +:: 2. 获取当前bat文件所在目录并格式化 +set "SCRIPT_DIR=%~dp0" +set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +:: 3. 切换到脚本根目录 +cd /d "%SCRIPT_DIR%" + +:: 4. 创建专属TEMP目录(补充主页面的核心步骤) +if not exist "TEMP" md "TEMP" +set "TEMP=%SCRIPT_DIR%\TEMP" + +:: 5. 设置核心环境变量(补充推理脚本依赖的配置) +set "version=v2Pro" +:: 语言配置 +set "language=zh_CN" +:: 启用半精度推理(GPU用户推荐,CPU用户改为False) +set "is_half=True" +:: 指定GPU卡号(多卡可修改,无GPU则删除此行) +set "_CUDA_VISIBLE_DEVICES=0" + +:: 6. 将runtime目录加入环境变量,确保能调用内置python +set "PATH=%SCRIPT_DIR%\runtime;%PATH%" + +:: 7. 直接启动并行推理脚本,传入中文语言参数 +echo 正在启动GPT-SoVITS并行推理页面... +runtime\python.exe -I GPT_SoVITS/inference_webui_fast.py zh_CN + +:: 8. 执行完成后暂停,便于查看报错信息 +pause \ No newline at end of file