diff --git a/.gitignore b/.gitignore
index d280e459..a277b2ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -193,3 +193,8 @@ cython_debug/
# PyPI configuration file
.pypirc
+/.vs
+/GPT_SoVITS/configs/tts_infer.yaml
+/GPT_SoVITS/configs/infer_settings.json
+/last_selected_preset.json
+/last_selected_models.json
diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py
index 486f85a3..ac905f4b 100644
--- a/GPT_SoVITS/AR/models/t2s_model.py
+++ b/GPT_SoVITS/AR/models/t2s_model.py
@@ -707,12 +707,11 @@ class Text2SemanticDecoder(nn.Module):
if idx == 0:
attn_mask = F.pad(attn_mask[:, :, -1].unsqueeze(-2), (0, 1), value=False)
- logits = logits[:, :-1]
else:
attn_mask = F.pad(attn_mask, (0, 1), value=False)
if idx < 11: ###至少预测出10个token不然不给停止(0.4s)
- logits[:, -1] = float("-inf")
+ logits = logits[:, :-1]
samples = sample(
logits, y, top_k=top_k, top_p=top_p, repetition_penalty=repetition_penalty, temperature=temperature
diff --git a/GPT_SoVITS/configs/tts_infer.yaml b/GPT_SoVITS/configs/tts_infer.yaml
deleted file mode 100644
index f31061cc..00000000
--- a/GPT_SoVITS/configs/tts_infer.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-custom:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cuda
- is_half: true
- t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
- version: v2
- vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
-v1:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cpu
- is_half: false
- t2s_weights_path: GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
- version: v1
- vits_weights_path: GPT_SoVITS/pretrained_models/s2G488k.pth
-v2:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cpu
- is_half: false
- t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
- version: v2
- vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
-v2Pro:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cpu
- is_half: false
- t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
- version: v2Pro
- vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2Pro.pth
-v2ProPlus:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cpu
- is_half: false
- t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
- version: v2ProPlus
- vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2ProPlus.pth
-v3:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cpu
- is_half: false
- t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
- version: v3
- vits_weights_path: GPT_SoVITS/pretrained_models/s2Gv3.pth
-v4:
- bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
- cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
- device: cpu
- is_half: false
- t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
- version: v4
- vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth
diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py
index 92d145b3..53f6bf69 100644
--- a/GPT_SoVITS/inference_webui_fast.py
+++ b/GPT_SoVITS/inference_webui_fast.py
@@ -1,18 +1,26 @@
+# -*- coding: utf-8 -*-
"""
-按中英混合识别
-按日英混合识别
-多语种启动切分识别语种
-全部按中文识别
-全部按英文识别
-全部按日文识别
+GPT-SoVITS WebUI 精简版
+保留功能:模型持久化、参考音频持久化、推理参数持久化、记住最后选中预设
+核心优化:抽离持久化逻辑到 persistence_tools.py,主文件大幅精简,结构清晰
"""
import psutil
import os
+import sys
+import json
+import yaml
+import random
+import re
+import shutil
+from pathlib import Path
+import torch
+import gradio as gr
+
+# 设置进程优先级(仅Windows有效)
def set_high_priority():
- """把当前 Python 进程设为 HIGH_PRIORITY_CLASS"""
if os.name != "nt":
- return # 仅 Windows 有效
+ return
p = psutil.Process(os.getpid())
try:
p.nice(psutil.HIGH_PRIORITY_CLASS)
@@ -20,19 +28,23 @@ def set_high_priority():
except psutil.AccessDenied:
print("权限不足,无法修改优先级(请用管理员运行)")
set_high_priority()
-import json
-import logging
-import os
-import random
-import re
-import sys
-import torch
+# ===================== 导入自定义持久化工具类(核心精简关键) =====================
+from persistence_tools import (
+ init_last_selected_models, read_last_selected_models, write_last_selected_models,
+ read_last_selected_preset, write_last_selected_preset, clear_last_selected_preset,
+ load_ref_presets, get_preset_by_name, save_ref_preset_core, delete_ref_preset_core,
+ load_infer_settings, save_infer_settings_core, restore_default_infer_settings_core,
+ REF_AUDIO_DIR
+)
+# ===================== 原有核心依赖导入 =====================
now_dir = os.getcwd()
sys.path.append(now_dir)
sys.path.append("%s/GPT_SoVITS" % (now_dir))
+# 屏蔽无关日志
+import logging
logging.getLogger("markdown_it").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("httpcore").setLevel(logging.ERROR)
@@ -41,11 +53,9 @@ logging.getLogger("asyncio").setLevel(logging.ERROR)
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
-
-infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
-infer_ttswebui = int(infer_ttswebui)
-is_share = os.environ.get("is_share", "False")
-is_share = eval(is_share)
+# 配置参数初始化
+infer_ttswebui = int(os.environ.get("infer_ttswebui", 9872))
+is_share = eval(os.environ.get("is_share", "False"))
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
@@ -56,53 +66,53 @@ cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
bert_path = os.environ.get("bert_path", None)
version = model_version = os.environ.get("version", "v2")
-import gradio as gr
-from TTS_infer_pack.text_segmentation_method import get_method
-from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config
+# 标记是否直接打开推理页
+is_direct_launch = (gpt_path is None) and (sovits_path is None)
-from tools.assets import css, js, top_html
+# 多语言配置
from tools.i18n.i18n import I18nAuto, scan_language_list
-
language = os.environ.get("language", "Auto")
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
i18n = I18nAuto(language=language)
+# TTS 推理核心
+from TTS_infer_pack.text_segmentation_method import get_method
+from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config
-# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
+# 样式与配置工具
+from tools.assets import css, js, top_html
+from config import change_choices, get_weights_names, name2gpt_path, name2sovits_path
+from process_ckpt import get_sovits_version_from_path_fast
-if torch.cuda.is_available():
- device = "cuda"
-# elif torch.backends.mps.is_available():
-# device = "mps"
-else:
- device = "cpu"
-
-# is_half = False
-# device = "cpu"
+# ===================== 全局变量初始化 =====================
+# 设备配置
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# 语种字典
dict_language_v1 = {
- i18n("中文"): "all_zh", # 全部按中文识别
- i18n("英文"): "en", # 全部按英文识别#######不变
- i18n("日文"): "all_ja", # 全部按日文识别
- i18n("中英混合"): "zh", # 按中英混合识别####不变
- i18n("日英混合"): "ja", # 按日英混合识别####不变
- i18n("多语种混合"): "auto", # 多语种启动切分识别语种
+ i18n("中文"): "all_zh",
+ i18n("英文"): "en",
+ i18n("日文"): "all_ja",
+ i18n("中英混合"): "zh",
+ i18n("日英混合"): "ja",
+ i18n("多语种混合"): "auto",
}
dict_language_v2 = {
- i18n("中文"): "all_zh", # 全部按中文识别
- i18n("英文"): "en", # 全部按英文识别#######不变
- i18n("日文"): "all_ja", # 全部按日文识别
- i18n("粤语"): "all_yue", # 全部按中文识别
- i18n("韩文"): "all_ko", # 全部按韩文识别
- i18n("中英混合"): "zh", # 按中英混合识别####不变
- i18n("日英混合"): "ja", # 按日英混合识别####不变
- i18n("粤英混合"): "yue", # 按粤英混合识别####不变
- i18n("韩英混合"): "ko", # 按韩英混合识别####不变
- i18n("多语种混合"): "auto", # 多语种启动切分识别语种
- i18n("多语种混合(粤语)"): "auto_yue", # 多语种启动切分识别语种
+ i18n("中文"): "all_zh",
+ i18n("英文"): "en",
+ i18n("日文"): "all_ja",
+ i18n("粤语"): "all_yue",
+ i18n("韩文"): "all_ko",
+ i18n("中英混合"): "zh",
+ i18n("日英混合"): "ja",
+ i18n("粤英混合"): "yue",
+ i18n("韩英混合"): "ko",
+ i18n("多语种混合"): "auto",
+ i18n("多语种混合(粤语)"): "auto_yue",
}
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
+# 文本切分方法
cut_method = {
i18n("不切"): "cut0",
i18n("凑四句一切"): "cut1",
@@ -112,41 +122,113 @@ cut_method = {
i18n("按标点符号切"): "cut5",
}
-from config import change_choices, get_weights_names, name2gpt_path, name2sovits_path
+# V3/V4 标记
+v3v4set = {"v3", "v4"}
+# 模型列表初始化
SoVITS_names, GPT_names = get_weights_names()
from config import pretrained_sovits_name
-
path_sovits_v3 = pretrained_sovits_name["v3"]
path_sovits_v4 = pretrained_sovits_name["v4"]
is_exist_s2gv3 = os.path.exists(path_sovits_v3)
is_exist_s2gv4 = os.path.exists(path_sovits_v4)
+# TTS 配置与管道初始化
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
tts_config.device = device
tts_config.is_half = is_half
-# tts_config.version = version
tts_config.update_version(version)
-if gpt_path is not None:
- if "!" in gpt_path or "!" in gpt_path:
- gpt_path = name2gpt_path[gpt_path]
+if gpt_path is not None and "!" not in gpt_path and "!" not in gpt_path:
tts_config.t2s_weights_path = gpt_path
-if sovits_path is not None:
- if "!" in sovits_path or "!" in sovits_path:
- sovits_path = name2sovits_path[sovits_path]
+if sovits_path is not None and "!" not in sovits_path and "!" not in sovits_path:
tts_config.vits_weights_path = sovits_path
if cnhubert_base_path is not None:
tts_config.cnhuhbert_base_path = cnhubert_base_path
if bert_path is not None:
tts_config.bert_base_path = bert_path
-print(tts_config)
tts_pipeline = TTS(tts_config)
gpt_path = tts_config.t2s_weights_path
sovits_path = tts_config.vits_weights_path
version = tts_config.version
+# 参考预设全局变量
+ref_presets = load_ref_presets()
+preset_names = [p["name"] for p in ref_presets] if ref_presets else []
+# ===================== 精简版辅助函数 =====================
+def custom_sort_key(s):
+ """自定义排序键(数字自然排序)"""
+ parts = re.split("(\d+)", s)
+ return [int(part) if part.isdigit() else part for part in parts]
+
+def init_ui_preset_config():
+ """初始化UI配置(优先加载最后选中的预设)"""
+ global ref_presets, preset_names
+ ref_presets = load_ref_presets()
+ preset_names = [p["name"] for p in ref_presets] if ref_presets else []
+ is_interactive = bool(preset_names)
+
+ # 优先读取最后选中的预设
+ last_selected = read_last_selected_preset()
+ default_selected = last_selected if (last_selected and last_selected in preset_names) else (preset_names[0] if preset_names else None)
+ default_preset = get_preset_by_name(default_selected)
+
+ return (
+ gr.update(choices=preset_names, value=default_selected, interactive=is_interactive),
+ default_preset["name"],
+ default_preset["ref_audio_path"],
+ default_preset["prompt_text"],
+ default_preset["prompt_language"]
+ )
+
+def update_popup_text(preset_name, is_delete):
+ """更新弹窗提示文本"""
+ preset_name = preset_name.strip()
+ if is_delete:
+ return gr.update(value=i18n(f"确定要删除配置「{preset_name}」吗?删除后无法恢复!"))
+ else:
+ return gr.update(value=i18n(f"配置「{preset_name}」已存在,确定要覆盖吗?覆盖后无法恢复!"))
+
+def reset_confirm_result():
+ """重置确认结果为False"""
+ return False
+
+def save_ref_preset_wrapper(preset_name, ref_audio_path, prompt_text, prompt_language, confirm_override=False):
+ """保存预设包装器(适配Gradio输出)"""
+ msg, success, new_preset_names = save_ref_preset_core(preset_name, ref_audio_path, prompt_text, prompt_language, confirm_override)
+ style = gr.update(elem_classes=["config-error-border"]) if not success else gr.update(elem_classes=["config-default-border"])
+ dropdown_update = gr.update(choices=new_preset_names, value=preset_name if success and preset_name in new_preset_names else (new_preset_names[0] if new_preset_names else None), interactive=bool(new_preset_names))
+ return msg, style, dropdown_update
+
+def delete_ref_preset_wrapper(preset_name):
+ """删除预设包装器(适配Gradio输出)"""
+ msg, new_preset_names, new_selected = delete_ref_preset_core(preset_name)
+ dropdown_update = gr.update(choices=new_preset_names, value=new_selected, interactive=bool(new_preset_names))
+ new_preset = get_preset_by_name(new_selected)
+
+ return (
+ msg,
+ dropdown_update,
+ new_preset["name"],
+ new_preset["ref_audio_path"],
+ new_preset["prompt_text"],
+ new_preset["prompt_language"]
+ )
+
+def on_preset_selected(preset_name):
+ """预设切换回调(记录最后选中)"""
+ if not preset_name or not ref_presets:
+ return "", None, "", i18n("中文")
+ preset = get_preset_by_name(preset_name)
+ audio_path = preset["ref_audio_path"]
+
+ # 记录最后选中的预设
+ write_last_selected_preset(preset_name)
+
+ return preset["name"], audio_path, preset["prompt_text"], preset["prompt_language"]
+
+# ===================== 推理核心函数 =====================
def inference(
text,
text_lang,
@@ -170,8 +252,11 @@ def inference(
sample_steps,
super_sampling,
):
+ """语音合成推理核心"""
seed = -1 if keep_random else seed
actual_seed = seed if seed not in [-1, "", None] else random.randint(0, 2**32 - 1)
+ ref_audio_path = ref_audio_path if ref_audio_path else ""
+
inputs = {
"text": text,
"text_lang": dict_language[text_lang],
@@ -194,89 +279,64 @@ def inference(
"sample_steps": int(sample_steps),
"super_sampling": super_sampling,
}
+
try:
for item in tts_pipeline.run(inputs):
yield item, actual_seed
except NO_PROMPT_ERROR:
- gr.Warning(i18n("V3不支持无参考文本模式,请填写参考文本!"))
+ return i18n("V3不支持无参考文本模式,请填写参考文本!")
-
-def custom_sort_key(s):
- # 使用正则表达式提取字符串中的数字部分和非数字部分
- parts = re.split("(\d+)", s)
- # 将数字部分转换为整数,非数字部分保持不变
- parts = [int(part) if part.isdigit() else part for part in parts]
- return parts
-
-
-if os.path.exists("./weight.json"):
- pass
-else:
- with open("./weight.json", "w", encoding="utf-8") as file:
- json.dump({"GPT": {}, "SoVITS": {}}, file)
-
-with open("./weight.json", "r", encoding="utf-8") as file:
- weight_data = file.read()
- weight_data = json.loads(weight_data)
- gpt_path = os.environ.get("gpt_path", weight_data.get("GPT", {}).get(version, GPT_names[-1]))
- sovits_path = os.environ.get("sovits_path", weight_data.get("SoVITS", {}).get(version, SoVITS_names[0]))
- if isinstance(gpt_path, list):
- gpt_path = gpt_path[0]
- if isinstance(sovits_path, list):
- sovits_path = sovits_path[0]
-
-from process_ckpt import get_sovits_version_from_path_fast
-
-v3v4set = {"v3", "v4"}
-
-
-def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
+# ===================== 模型切换函数(保留预设记忆) =====================
+def change_sovits_weights(sovits_path, current_gpt_selected, prompt_language=None, text_language=None):
+ """切换SoVITS模型(保留最后选中预设)"""
if "!" in sovits_path or "!" in sovits_path:
sovits_path = name2sovits_path[sovits_path]
- global version, model_version, dict_language, if_lora_v3
+
+ global version, model_version, dict_language, if_lora_v3, ref_presets, preset_names
version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path)
- # print(sovits_path,version, model_version, if_lora_v3)
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
- if if_lora_v3 == True and is_exist == False:
+
+ # 底模缺失校验
+ if if_lora_v3 and not is_exist:
info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重")
- gr.Warning(info)
- raise FileExistsError(info)
+ return i18n(info)
+
+ # 更新语种字典与预设
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
- if prompt_language is not None and text_language is not None:
- if prompt_language in list(dict_language.keys()):
- prompt_text_update, prompt_language_update = (
- {"__type__": "update"},
- {"__type__": "update", "value": prompt_language},
- )
- else:
- prompt_text_update = {"__type__": "update", "value": ""}
- prompt_language_update = {"__type__": "update", "value": i18n("中文")}
- if text_language in list(dict_language.keys()):
- text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
- else:
- text_update = {"__type__": "update", "value": ""}
- text_language_update = {"__type__": "update", "value": i18n("中文")}
- if model_version in v3v4set:
- visible_sample_steps = True
- visible_inp_refs = False
- else:
- visible_sample_steps = False
- visible_inp_refs = True
- yield (
- {"__type__": "update", "choices": list(dict_language.keys())},
- {"__type__": "update", "choices": list(dict_language.keys())},
- prompt_text_update,
- prompt_language_update,
- text_update,
- text_language_update,
- {"__type__": "update", "interactive": visible_sample_steps, "value": 32},
- {"__type__": "update", "visible": visible_inp_refs},
- {"__type__": "update", "interactive": True if model_version not in v3v4set else False},
- {"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False},
- )
-
- tts_pipeline.init_vits_weights(sovits_path)
+ ref_presets = load_ref_presets()
+ preset_names = [p["name"] for p in ref_presets] if ref_presets else []
+
+ # 恢复最后选中的预设
+ last_selected = read_last_selected_preset()
+ default_selected = last_selected if (last_selected and last_selected in preset_names) else (preset_names[0] if preset_names else None)
+
+ # 更新模型配置文件
+ if is_direct_launch:
+ valid_gpt_path = current_gpt_selected if (current_gpt_selected in GPT_names and os.path.exists(current_gpt_selected)) else GPT_names[-1]
+ write_last_selected_models(valid_gpt_path, sovits_path, version)
+
+ # 初始化返回值
+ if prompt_language is None or text_language is None:
+ return
+
+ # 语种兼容性校验
+ prompt_text_update, prompt_language_update = {"__type__": "update"}, {"__type__": "update", "value": prompt_language}
+ if prompt_language not in list(dict_language.keys()):
+ prompt_text_update = {"__type__": "update", "value": ""}
+ prompt_language_update = {"__type__": "update", "value": i18n("中文")}
+
+ text_update, text_language_update = {"__type__": "update"}, {"__type__": "update", "value": text_language}
+ if text_language not in list(dict_language.keys()):
+ text_update = {"__type__": "update", "value": ""}
+ text_language_update = {"__type__": "update", "value": i18n("中文")}
+
+ # V3/V4 特殊配置
+ visible_sample_steps = model_version in v3v4set
+ visible_inp_refs = not visible_sample_steps
+ ref_text_free_interactive = model_version not in v3v4set
+
+ # 加载中状态
yield (
{"__type__": "update", "choices": list(dict_language.keys())},
{"__type__": "update", "choices": list(dict_language.keys())},
@@ -286,34 +346,176 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
text_language_update,
{"__type__": "update", "interactive": visible_sample_steps, "value": 32},
{"__type__": "update", "visible": visible_inp_refs},
- {"__type__": "update", "interactive": True if model_version not in v3v4set else False},
- {"__type__": "update", "value": i18n("合成语音"), "interactive": True},
+ {"__type__": "update", "interactive": ref_text_free_interactive},
+ {"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False},
+ gr.update(choices=preset_names, value=default_selected, interactive=bool(preset_names)),
)
- with open("./weight.json") as f:
- data = f.read()
- data = json.loads(data)
- data["SoVITS"][version] = sovits_path
+
+ # 加载模型权重
+ tts_pipeline.init_vits_weights(sovits_path)
+
+ # 加载完成状态
+ yield (
+ {"__type__": "update", "choices": list(dict_language.keys())},
+ {"__type__": "update", "choices": list(dict_language.keys())},
+ prompt_text_update,
+ prompt_language_update,
+ text_update,
+ text_language_update,
+ {"__type__": "update", "interactive": visible_sample_steps, "value": 32},
+ {"__type__": "update", "visible": visible_inp_refs},
+ {"__type__": "update", "interactive": ref_text_free_interactive},
+ {"__type__": "update", "value": i18n("合成语音"), "interactive": True},
+ gr.update(choices=preset_names, value=default_selected, interactive=bool(preset_names)),
+ )
+
+ # 更新 weight.json
+ with open("./weight.json", "r") as f:
+ data = json.loads(f.read())
+ data["SoVITS"][version] = sovits_path
with open("./weight.json", "w") as f:
- f.write(json.dumps(data))
-
+ json.dump(data, f)
def change_gpt_weights(gpt_path):
+ """切换GPT模型"""
if "!" in gpt_path or "!" in gpt_path:
gpt_path = name2gpt_path[gpt_path]
tts_pipeline.init_t2s_weights(gpt_path)
+
+ if is_direct_launch:
+ current_sovits_path = sovits_path if 'sovits_path' in globals() else SoVITS_names[0]
+ current_version = version if 'version' in globals() else "v2"
+ write_last_selected_models(gpt_path, current_sovits_path, current_version)
-
-with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app:
- gr.HTML(
- top_html.format(
- i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.")
- + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
- ),
- elem_classes="markdown",
+# ===================== 推理参数持久化包装函数 =====================
+def init_infer_settings():
+ """初始化推理参数"""
+ settings = load_infer_settings()
+ return (
+ settings["batch_size"],
+ settings["sample_steps"],
+ settings["fragment_interval"],
+ settings["speed_factor"],
+ settings["top_k"],
+ settings["top_p"],
+ settings["temperature"],
+ settings["repetition_penalty"],
+ settings["how_to_cut"],
+ settings["super_sampling"],
+ settings["parallel_infer"],
+ settings["split_bucket"],
+ settings["seed"],
+ settings["keep_random"]
)
+def save_infer_settings_wrapper(batch_size, sample_steps, fragment_interval, speed_factor,
+ top_k, top_p, temperature, repetition_penalty, how_to_cut,
+ super_sampling, parallel_infer, split_bucket, seed, keep_random):
+ """保存推理参数包装器"""
+ settings = {
+ "batch_size": batch_size,
+ "sample_steps": sample_steps,
+ "fragment_interval": fragment_interval,
+ "speed_factor": speed_factor,
+ "top_k": top_k,
+ "top_p": top_p,
+ "temperature": temperature,
+ "repetition_penalty": repetition_penalty,
+ "how_to_cut": how_to_cut,
+ "super_sampling": super_sampling,
+ "parallel_infer": parallel_infer,
+ "split_bucket": split_bucket,
+ "seed": -1 if keep_random else seed,
+ "keep_random": keep_random
+ }
+ return save_infer_settings_core(settings)
+
+# ===================== Gradio UI 构建(精简版) =====================
+custom_css = """
+/* 保存失败红边框样式(强制覆盖默认样式) */
+.config-error-border {
+ border: 2px solid #ff3b30 !important;
+ border-radius: 4px !important;
+ padding: 6px !important;
+}
+/* 保存成功/默认边框样式(还原原有样式) */
+.config-default-border {
+ border: 1px solid #e5e7eb !important;
+ border-radius: 4px !important;
+ padding: 6px !important;
+}
+/* 模拟弹窗样式:紧凑居中+自动换行+窄宽度 */
+.simulated-popup {
+ position: fixed;
+ top: 50%;
+ left: 50%;
+ transform: translate(-50%, -50%);
+ background: white;
+ border: 1px solid #e5e7eb;
+ border-radius: 8px;
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
+ padding: 15px;
+ z-index: 9999;
+ min-width: 300px;
+ max-width: 350px;
+ text-align: center;
+}
+/* 弹窗文本自动换行 */
+.simulated-popup .markdown-text {
+ word-wrap: break-word;
+ line-height: 1.5;
+ color: #333;
+}
+/* 弹窗遮罩层 */
+.popup-mask {
+ position: fixed;
+ top: 0;
+ left: 0;
+ width: 100%;
+ height: 100%;
+ background: rgba(0, 0, 0, 0.4);
+ z-index: 9998;
+}
+/* 按钮区域:紧凑间距+适配宽度 */
+.popup-buttons {
+ margin-top: 15px;
+ display: flex;
+ justify-content: center;
+ gap: 10px;
+}
+/* 按钮样式优化:适配弹窗宽度 */
+.popup-buttons button {
+ min-width: 80px;
+ padding: 6px 12px;
+}
+.btn-group-spacing {
+ display: flex !important;
+ flex-direction: row !important;
+ gap: 10px !important;
+ padding:5px 10px 5px 10px;
+ align-items: center !important;
+ justify-content: flex-start !important;
+}
+.btn-group-spacing button{
+ border-radius:5px;
+}
+"""
+
+with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css + custom_css) as app:
+ # 状态变量
+ current_preset = gr.State("")
+ confirm_flag = gr.State(False)
+ infer_confirm_flag = gr.State(False)
+ infer_restore_flag = gr.State(False)
+
+ # 顶部HTML
+ gr.HTML(top_html.format(
+ i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.")
+ + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
+ ), elem_classes="markdown")
+
+ # 模型切换区域
with gr.Column():
- # with gr.Group():
gr.Markdown(value=i18n("模型切换"))
with gr.Row():
GPT_dropdown = gr.Dropdown(
@@ -331,193 +533,428 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
+ # 核心功能区域
with gr.Row():
with gr.Column():
- gr.Markdown(value=i18n("*请上传并填写参考信息"))
+ gr.Markdown(value=i18n("*请上传并填写参考信息(支持多组预设配置切换/删除,主参考音频为必填项,自动持久化不丢失)"))
+
+ # 预设选择与名称输入
with gr.Row():
- inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath")
+ preset_dropdown = gr.Dropdown(
+ label=i18n("选择预设参考配置"),
+ choices=preset_names,
+ value=preset_names[0] if preset_names else None,
+ interactive=bool(preset_names),
+ )
+ preset_name_input = gr.Textbox(
+ label=i18n("当前配置名称(保存时使用)"),
+ value="",
+ placeholder=i18n("填写配置名称,点击保存按钮即可持久化"),
+ lines=1,
+ scale=1
+ )
+
+ # 配置提示框
+ save_config_msg = gr.Textbox(
+ label=i18n("配置操作提示"),
+ value="",
+ lines=1,
+ interactive=False,
+ elem_classes=["config-default-border"]
+ )
+
+ # 保存/删除按钮
+ with gr.Row():
+ save_ref_config_btn = gr.Button(i18n("保存当前参考为选中配置"), variant="primary")
+ delete_ref_config_btn = gr.Button(i18n("删除当前选中配置"), variant="primary")
+
+ # 音频与文本输入
+ with gr.Row():
+ inp_ref = gr.Audio(
+ label=i18n("主参考音频(3~10秒)【必填,自动持久化】"),
+ type="filepath",
+ value=None
+ )
inp_refs = gr.File(
- label=i18n("辅参考音频(可选多个,或不选)"),
+ label=i18n("辅参考音频(可选多个)"),
file_count="multiple",
visible=True if model_version != "v3" else False,
)
- prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2)
+
+ prompt_text = gr.Textbox(
+ label=i18n("主参考音频的文本【可选】"),
+ value="",
+ lines=2
+ )
+
with gr.Row():
prompt_language = gr.Dropdown(
- label=i18n("主参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文")
+ label=i18n("主参考音频的语种"),
+ choices=list(dict_language.keys()),
+ value=i18n("中文")
)
- with gr.Column():
- ref_text_free = gr.Checkbox(
- label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"),
- value=False,
- interactive=True if model_version != "v3" else False,
- show_label=True,
- )
- gr.Markdown(
- i18n("使用无参考文本模式时建议使用微调的GPT")
- + "
"
- + i18n("听不清参考音频说的啥(不晓得写啥)可以开。开启后无视填写的参考文本。")
- )
+ ref_text_free = gr.Checkbox(
+ label=i18n("开启无参考文本模式"),
+ value=False,
+ interactive=True if model_version != "v3" else False,
+ show_label=True,
+ )
+
+ # 模拟弹窗(删除/覆盖)
+ delete_mask = gr.Column(visible=False, elem_classes=["popup-mask"])
+ with gr.Column(visible=False, elem_classes=["simulated-popup"]) as delete_popup:
+ delete_text = gr.Markdown(value="")
+ with gr.Row(elem_classes=["popup-buttons"]):
+ delete_confirm_btn = gr.Button(i18n("确认删除"), variant="primary")
+ delete_cancel_btn = gr.Button(i18n("取消"), variant="primary")
+
+ override_mask = gr.Column(visible=False, elem_classes=["popup-mask"])
+ with gr.Column(visible=False, elem_classes=["simulated-popup"]) as override_popup:
+ override_text = gr.Markdown(value="")
+ with gr.Row(elem_classes=["popup-buttons"]):
+ override_confirm_btn = gr.Button(i18n("确认覆盖"), variant="primary")
+ override_cancel_btn = gr.Button(i18n("取消"), variant="primary")
+
+ # 事件绑定:预设切换
+ preset_dropdown.change(
+ fn=on_preset_selected,
+ inputs=[preset_dropdown],
+ outputs=[preset_name_input, inp_ref, prompt_text, prompt_language]
+ )
+
+ # 事件绑定:保存预设
+ save_ref_config_btn.click(
+ fn=lambda pname: (pname.strip(), any(p["name"].strip() == pname.strip() for p in ref_presets)),
+ inputs=[preset_name_input],
+ outputs=[current_preset, confirm_flag]
+ ).then(
+ fn=lambda exists, pname: (
+ gr.update(visible=exists),
+ gr.update(visible=exists),
+ update_popup_text(pname, False)
+ ),
+ inputs=[confirm_flag, preset_name_input],
+ outputs=[override_mask, override_popup, override_text]
+ ).then(
+ fn=save_ref_preset_wrapper,
+ inputs=[preset_name_input, inp_ref, prompt_text, prompt_language, confirm_flag],
+ outputs=[save_config_msg, save_config_msg, preset_dropdown]
+ )
+
+ # 事件绑定:覆盖确认
+ override_confirm_btn.click(
+ fn=lambda: (True, gr.update(visible=False), gr.update(visible=False)),
+ inputs=[],
+ outputs=[confirm_flag, override_mask, override_popup]
+ ).then(
+ fn=save_ref_preset_wrapper,
+ inputs=[preset_name_input, inp_ref, prompt_text, prompt_language, confirm_flag],
+ outputs=[save_config_msg, save_config_msg, preset_dropdown]
+ ).then(
+ fn=reset_confirm_result,
+ inputs=[],
+ outputs=[confirm_flag]
+ )
+
+ # 事件绑定:覆盖取消
+ override_cancel_btn.click(
+ fn=lambda: (False, gr.update(visible=False), gr.update(visible=False), i18n("覆盖操作已取消")),
+ inputs=[],
+ outputs=[confirm_flag, override_mask, override_popup, save_config_msg]
+ )
+
+ # 事件绑定:删除预设
+ delete_ref_config_btn.click(
+ fn=lambda pname: (pname, gr.update(visible=True), gr.update(visible=True), update_popup_text(pname, True)),
+ inputs=[preset_dropdown],
+ outputs=[current_preset, delete_mask, delete_popup, delete_text]
+ )
+
+ # 事件绑定:删除确认
+ delete_confirm_btn.click(
+ fn=lambda: (True, gr.update(visible=False), gr.update(visible=False)),
+ inputs=[],
+ outputs=[confirm_flag, delete_mask, delete_popup]
+ ).then(
+ fn=delete_ref_preset_wrapper,
+ inputs=[preset_dropdown],
+ outputs=[save_config_msg, preset_dropdown, preset_name_input, inp_ref, prompt_text, prompt_language]
+ ).then(
+ fn=reset_confirm_result,
+ inputs=[],
+ outputs=[confirm_flag]
+ )
+
+ # 事件绑定:删除取消
+ delete_cancel_btn.click(
+ fn=lambda pname: (False, gr.update(visible=False), gr.update(visible=False), i18n("删除操作已取消")),
+ inputs=[preset_dropdown],
+ outputs=[confirm_flag, delete_mask, delete_popup, save_config_msg]
+ )
with gr.Column():
gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
+
+ # 操作教程提示
+ gr.HTML('''
+