From f20f17c2c06c8cf2f99fcf66c922a881bbcc34a6 Mon Sep 17 00:00:00 2001 From: Karasukaigan <80465610+Karasukaigan@users.noreply.github.com> Date: Fri, 16 May 2025 17:58:56 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=80=9A=E8=BF=87Gradio?= =?UTF-8?q?=20API=E8=B0=83=E7=94=A8=E5=90=88=E6=88=90=E8=AF=AD=E9=9F=B3?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E6=97=B6=E5=87=BA=E7=8E=B0=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E9=94=99=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复通过Gradio API调用合成语音接口`/get_tts_wav`时出现参数类型错误的问题。 ## 报错信息 TypeError: unsupported operand type(s) for /: 'int' and 'str' ## 错误原因 `inference_webui.py`的`get_tts_wav`里并未对传入`sample_steps`的类型进行判断。而由于Gradio在自动生成接口文档时会将`gr.Radio`传入的值判定为字符串,因此如果有用户参考WebUI下面”通过 API 使用“里的说明调用`/get_tts_wav`时,则会因为文档错误导致传参类型错误,从而导致最终的报错。 ## 修复方式 通过在`get_tts_wav`开头部分添加对`sample_steps`格式的转换(统一转为int)来解决传参类型错误的问题。 --- GPT_SoVITS/inference_webui.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 4bee27cd..7bf93087 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -766,6 +766,12 @@ def get_tts_wav( if not ref_free: phones1, bert1, norm_text1 = get_phones_and_bert(prompt_text, prompt_language, version) + # 确保sample_steps为int + try: + sample_steps = int(sample_steps) + except (TypeError, ValueError): + sample_steps = 8 + for i_text, text in enumerate(texts): # 解决输入目标文本的空行导致报错的问题 if len(text.strip()) == 0: From 47426d18e7e2fc33d9d0f9872bf9b051197a3539 Mon Sep 17 00:00:00 2001 From: Karasukaigan <80465610+Karasukaigan@users.noreply.github.com> Date: Tue, 27 May 2025 22:22:15 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=BE=AE=E8=B0=83?= =?UTF-8?q?=E8=AE=AD=E7=BB=83WebUI=E7=AE=80=E5=8C=96=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 用户不再需要多次切换不同的选项卡页面来完成一次微调训练。现在微调训练的所有流程都在同一个页面里,按照从上往下的顺序排好,并且隐藏了非常用的设置项。 --- go-webui-simple-mode.bat | 2 + webui_simple.py | 2080 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 2082 insertions(+) create mode 100644 go-webui-simple-mode.bat create mode 100644 webui_simple.py diff --git a/go-webui-simple-mode.bat b/go-webui-simple-mode.bat new file mode 100644 index 00000000..a55a79bc --- /dev/null +++ b/go-webui-simple-mode.bat @@ -0,0 +1,2 @@ +runtime\python.exe -I webui_simple.py zh_CN +pause diff --git a/webui_simple.py b/webui_simple.py new file mode 100644 index 00000000..b9074be1 --- /dev/null +++ b/webui_simple.py @@ -0,0 +1,2080 @@ +import os +import sys + +if len(sys.argv) == 1: + sys.argv.append("v2") +version = "v1" if sys.argv[1] == "v1" else "v4" +os.environ["version"] = version +now_dir = os.getcwd() +sys.path.insert(0, now_dir) +import warnings + +warnings.filterwarnings("ignore") +import json +import platform +import re +import shutil +import signal + +import psutil +import torch +import yaml + +os.environ["TORCH_DISTRIBUTED_DEBUG"] = "INFO" +torch.manual_seed(233333) +tmp = os.path.join(now_dir, "TEMP") +os.makedirs(tmp, exist_ok=True) +os.environ["TEMP"] = tmp +if os.path.exists(tmp): + for name in os.listdir(tmp): + if name == "jieba.cache": + continue + path = "%s/%s" % (tmp, name) + delete = os.remove if os.path.isfile(path) else shutil.rmtree + try: + delete(path) + except Exception as e: + print(str(e)) + pass +import site +import traceback + +site_packages_roots = [] +for path in site.getsitepackages(): + if "packages" in path: + site_packages_roots.append(path) +if site_packages_roots == []: + site_packages_roots = ["%s/runtime/Lib/site-packages" % now_dir] +# os.environ["OPENBLAS_NUM_THREADS"] = "4" +os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" +os.environ["all_proxy"] = "" +for site_packages_root in site_packages_roots: + if os.path.exists(site_packages_root): + try: + with open("%s/users.pth" % (site_packages_root), "w") as f: + f.write( + # "%s\n%s/runtime\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" + "%s\n%s/GPT_SoVITS/BigVGAN\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" + % (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir) + ) + break + except PermissionError: + traceback.print_exc() +import shutil +import subprocess +from subprocess import Popen + +from config import ( + exp_root, + infer_device, + is_half, + is_share, + python_exec, + webui_port_infer_tts, + webui_port_main, + webui_port_subfix, + webui_port_uvr5, +) +from tools import my_utils +from tools.i18n.i18n import I18nAuto, scan_language_list + +language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" +os.environ["language"] = language +i18n = I18nAuto(language=language) +from multiprocessing import cpu_count + +from tools.my_utils import check_details, check_for_existance + +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu +try: + import gradio.analytics as analytics + + analytics.version_check = lambda: None +except: + ... +import gradio as gr + +n_cpu = cpu_count() + +ngpu = torch.cuda.device_count() +gpu_infos = [] +mem = [] +if_gpu_ok = False + +# 判断是否有能用来训练和加速推理的N卡 +ok_gpu_keywords = { + "10", + "16", + "20", + "30", + "40", + "A2", + "A3", + "A4", + "P4", + "A50", + "500", + "A60", + "70", + "80", + "90", + "M4", + "T4", + "TITAN", + "L4", + "4060", + "H", + "600", + "506", + "507", + "508", + "509", +} +set_gpu_numbers = set() +if torch.cuda.is_available() or ngpu != 0: + for i in range(ngpu): + gpu_name = torch.cuda.get_device_name(i) + if any(value in gpu_name.upper() for value in ok_gpu_keywords): + # A10#A100#V100#A40#P40#M40#K80#A4500 + if_gpu_ok = True # 至少有一张能用的N卡 + gpu_infos.append("%s\t%s" % (i, gpu_name)) + set_gpu_numbers.add(i) + mem.append(int(torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4)) +# # 判断是否支持mps加速 +# if torch.backends.mps.is_available(): +# if_gpu_ok = True +# gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) +# mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 + + +v3v4set={"v3","v4"} +def set_default(): + global \ + default_batch_size, \ + default_max_batch_size, \ + gpu_info, \ + default_sovits_epoch, \ + default_sovits_save_every_epoch, \ + max_sovits_epoch, \ + max_sovits_save_every_epoch, \ + default_batch_size_s1, \ + if_force_ckpt + if_force_ckpt = False + if if_gpu_ok and len(gpu_infos) > 0: + gpu_info = "\n".join(gpu_infos) + minmem = min(mem) + default_batch_size = minmem // 2 if version not in v3v4set else minmem // 8 + default_batch_size_s1 = minmem // 2 + else: + gpu_info = "%s\t%s" % ("0", "CPU") + gpu_infos.append("%s\t%s" % ("0", "CPU")) + set_gpu_numbers.add(0) + default_batch_size = default_batch_size_s1 = int(psutil.virtual_memory().total / 1024 / 1024 / 1024 / 4) + if version not in v3v4set: + default_sovits_epoch = 8 + default_sovits_save_every_epoch = 4 + max_sovits_epoch = 25 # 40 + max_sovits_save_every_epoch = 25 # 10 + else: + default_sovits_epoch = 2 + default_sovits_save_every_epoch = 1 + max_sovits_epoch = 20 # 40 # 3 + max_sovits_save_every_epoch = 10 # 10 # 3 + + default_batch_size = max(1, default_batch_size) + default_batch_size_s1 = max(1, default_batch_size_s1) + default_max_batch_size = default_batch_size * 3 + + +set_default() + +gpus = "-".join([i[0] for i in gpu_infos]) +default_gpu_numbers = str(sorted(list(set_gpu_numbers))[0]) + + +def fix_gpu_number(input): # 将越界的number强制改到界内 + try: + if int(input) not in set_gpu_numbers: + return default_gpu_numbers + except: + return input + return input + + +def fix_gpu_numbers(inputs): + output = [] + try: + for input in inputs.split(","): + output.append(str(fix_gpu_number(input))) + return ",".join(output) + except: + return inputs + + +pretrained_sovits_name = [ + "GPT_SoVITS/pretrained_models/s2G488k.pth", + "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", + "GPT_SoVITS/pretrained_models/s2Gv3.pth", + "GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth", +] +pretrained_gpt_name = [ + "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt", + "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", + "GPT_SoVITS/pretrained_models/s1v3.ckpt", + "GPT_SoVITS/pretrained_models/s1v3.ckpt", +] + +pretrained_model_list = ( + pretrained_sovits_name[int(version[-1]) - 1], + pretrained_sovits_name[int(version[-1]) - 1].replace("s2G", "s2D"), + pretrained_gpt_name[int(version[-1]) - 1], + "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + "GPT_SoVITS/pretrained_models/chinese-hubert-base", +) + +_ = "" +for i in pretrained_model_list: + if "s2Dv3" not in i and os.path.exists(i) == False: + if "s2Dv4" in i: + continue + _ += f"\n {i}" +if os.path.exists(pretrained_sovits_name[3]) == False: + _ += f"\n {pretrained_sovits_name[3]}" +if _: + print("warning: ", i18n("以下模型不存在:") + _) + +_ = [[], []] +for i in range(4): + if os.path.exists(pretrained_gpt_name[i]): + _[0].append(pretrained_gpt_name[i]) + else: + _[0].append("") ##没有下pretrained模型的,说不定他们是想自己从零训底模呢 + if os.path.exists(pretrained_sovits_name[i]): + _[-1].append(pretrained_sovits_name[i]) + else: + _[-1].append("") +pretrained_gpt_name, pretrained_sovits_name = _ + +SoVITS_weight_root = ["SoVITS_weights", "SoVITS_weights_v2", "SoVITS_weights_v3", "SoVITS_weights_v4"] +GPT_weight_root = ["GPT_weights", "GPT_weights_v2", "GPT_weights_v3", "GPT_weights_v4"] +for root in SoVITS_weight_root + GPT_weight_root: + os.makedirs(root, exist_ok=True) + + +def get_weights_names(): + SoVITS_names = [name for name in pretrained_sovits_name if name != ""] + for path in SoVITS_weight_root: + for name in os.listdir(path): + if name.endswith(".pth"): + SoVITS_names.append("%s/%s" % (path, name)) + GPT_names = [name for name in pretrained_gpt_name if name != ""] + for path in GPT_weight_root: + for name in os.listdir(path): + if name.endswith(".ckpt"): + GPT_names.append("%s/%s" % (path, name)) + return SoVITS_names, GPT_names + + +SoVITS_names, GPT_names = get_weights_names() +for path in SoVITS_weight_root + GPT_weight_root: + os.makedirs(path, exist_ok=True) + + +def custom_sort_key(s): + # 使用正则表达式提取字符串中的数字部分和非数字部分 + parts = re.split("(\d+)", s) + # 将数字部分转换为整数,非数字部分保持不变 + parts = [int(part) if part.isdigit() else part for part in parts] + return parts + + +def change_choices(): + SoVITS_names, GPT_names = get_weights_names() + return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, { + "choices": sorted(GPT_names, key=custom_sort_key), + "__type__": "update", + } + + +p_label = None +p_uvr5 = None +p_asr = None +p_denoise = None +p_tts_inference = None + + +def kill_proc_tree(pid, including_parent=True): + try: + parent = psutil.Process(pid) + except psutil.NoSuchProcess: + # Process already terminated + return + + children = parent.children(recursive=True) + for child in children: + try: + os.kill(child.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + if including_parent: + try: + os.kill(parent.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + + +system = platform.system() + + +def kill_process(pid, process_name=""): + if system == "Windows": + cmd = "taskkill /t /f /pid %s" % pid + # os.system(cmd) + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + else: + kill_proc_tree(pid) + print(process_name + i18n("进程已终止")) + + +def process_info(process_name="", indicator=""): + if indicator == "opened": + return process_name + i18n("已开启") + elif indicator == "open": + return i18n("开启") + process_name + elif indicator == "closed": + return process_name + i18n("已关闭") + elif indicator == "close": + return i18n("关闭") + process_name + elif indicator == "running": + return process_name + i18n("运行中") + elif indicator == "occupy": + return process_name + i18n("占用中") + "," + i18n("需先终止才能开启下一次任务") + elif indicator == "finish": + return process_name + i18n("已完成") + elif indicator == "failed": + return process_name + i18n("失败") + elif indicator == "info": + return process_name + i18n("进程输出信息") + else: + return process_name + + +process_name_subfix = i18n("音频标注WebUI") + + +def change_label(path_list): + global p_label + if p_label is None: + check_for_existance([path_list]) + path_list = my_utils.clean_path(path_list) + cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % ( + python_exec, + path_list, + webui_port_subfix, + is_share, + ) + yield ( + process_info(process_name_subfix, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + print(cmd) + p_label = Popen(cmd, shell=True) + else: + kill_process(p_label.pid, process_name_subfix) + p_label = None + yield ( + process_info(process_name_subfix, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +process_name_uvr5 = i18n("人声分离WebUI") + + +def change_uvr5(): + global p_uvr5 + if p_uvr5 is None: + cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s' % (python_exec, infer_device, is_half, webui_port_uvr5, is_share) + yield ( + process_info(process_name_uvr5, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + print(cmd) + p_uvr5 = Popen(cmd, shell=True) + else: + kill_process(p_uvr5.pid, process_name_uvr5) + p_uvr5 = None + yield ( + process_info(process_name_uvr5, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +process_name_tts = i18n("TTS推理WebUI") + + +def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, sovits_path, batched_infer_enabled): + global p_tts_inference + if batched_infer_enabled: + cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language) + else: + cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language) + # #####v3暂不支持加速推理 + # if version=="v3": + # cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) + if p_tts_inference is None: + os.environ["gpt_path"] = gpt_path if "/" in gpt_path else "%s/%s" % (GPT_weight_root, gpt_path) + os.environ["sovits_path"] = sovits_path if "/" in sovits_path else "%s/%s" % (SoVITS_weight_root, sovits_path) + os.environ["cnhubert_base_path"] = cnhubert_base_path + os.environ["bert_path"] = bert_path + os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_number(gpu_number) + os.environ["is_half"] = str(is_half) + os.environ["infer_ttswebui"] = str(webui_port_infer_tts) + os.environ["is_share"] = str(is_share) + yield ( + process_info(process_name_tts, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + print(cmd) + p_tts_inference = Popen(cmd, shell=True) + else: + kill_process(p_tts_inference.pid, process_name_tts) + p_tts_inference = None + yield ( + process_info(process_name_tts, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +from tools.asr.config import asr_dict + +process_name_asr = i18n("语音识别") + + +def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision): + global p_asr + if p_asr is None: + asr_inp_dir = my_utils.clean_path(asr_inp_dir) + asr_opt_dir = my_utils.clean_path(asr_opt_dir) + check_for_existance([asr_inp_dir]) + cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}' + cmd += f' -i "{asr_inp_dir}"' + cmd += f' -o "{asr_opt_dir}"' + cmd += f" -s {asr_model_size}" + cmd += f" -l {asr_lang}" + cmd += f" -p {asr_precision}" + output_file_name = os.path.basename(asr_inp_dir) + output_folder = asr_opt_dir or "output/asr_opt" + output_file_path = os.path.abspath(f"{output_folder}/{output_file_name}.list") + yield ( + process_info(process_name_asr, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_asr = Popen(cmd, shell=True) + p_asr.wait() + p_asr = None + yield ( + process_info(process_name_asr, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": output_file_path}, + {"__type__": "update", "value": output_file_path}, + {"__type__": "update", "value": asr_inp_dir}, + ) + else: + yield ( + process_info(process_name_asr, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close_asr(): + global p_asr + if p_asr is not None: + kill_process(p_asr.pid, process_name_asr) + p_asr = None + return ( + process_info(process_name_asr, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +process_name_denoise = i18n("语音降噪") + + +def open_denoise(denoise_inp_dir, denoise_opt_dir): + global p_denoise + if p_denoise == None: + denoise_inp_dir = my_utils.clean_path(denoise_inp_dir) + denoise_opt_dir = my_utils.clean_path(denoise_opt_dir) + check_for_existance([denoise_inp_dir]) + cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s' % ( + python_exec, + denoise_inp_dir, + denoise_opt_dir, + "float16" if is_half == True else "float32", + ) + + yield ( + process_info(process_name_denoise, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_denoise = Popen(cmd, shell=True) + p_denoise.wait() + p_denoise = None + yield ( + process_info(process_name_denoise, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": denoise_opt_dir}, + {"__type__": "update", "value": denoise_opt_dir}, + ) + else: + yield ( + process_info(process_name_denoise, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close_denoise(): + global p_denoise + if p_denoise is not None: + kill_process(p_denoise.pid, process_name_denoise) + p_denoise = None + return ( + process_info(process_name_denoise, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +p_train_SoVITS = None +process_name_sovits = i18n("SoVITS训练") + +def open1Ba( + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, +): + global p_train_SoVITS + if p_train_SoVITS == None: + with open("GPT_SoVITS/configs/s2.json") as f: + data = f.read() + data = json.loads(data) + s2_dir = "%s/%s" % (exp_root, exp_name) + os.makedirs("%s/logs_s2_%s" % (s2_dir, version), exist_ok=True) + if check_for_existance([s2_dir], is_train=True): + check_details([s2_dir], is_train=True) + if is_half == False: + data["train"]["fp16_run"] = False + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"] = batch_size + data["train"]["epochs"] = total_epoch + data["train"]["text_low_lr_rate"] = text_low_lr_rate + data["train"]["pretrained_s2G"] = pretrained_s2G + data["train"]["pretrained_s2D"] = pretrained_s2D + data["train"]["if_save_latest"] = if_save_latest + data["train"]["if_save_every_weights"] = if_save_every_weights + data["train"]["save_every_epoch"] = save_every_epoch + data["train"]["gpu_numbers"] = gpu_numbers1Ba + data["train"]["grad_ckpt"] = if_grad_ckpt + data["train"]["lora_rank"] = lora_rank + data["model"]["version"] = version + data["data"]["exp_dir"] = data["s2_ckpt_dir"] = s2_dir + data["save_weight_dir"] = SoVITS_weight_root[int(version[-1]) - 1] + data["name"] = exp_name + data["version"] = version + tmp_config_path = "%s/tmp_s2.json" % tmp + with open(tmp_config_path, "w") as f: + f.write(json.dumps(data)) + if version in ["v1", "v2"]: + cmd = '"%s" GPT_SoVITS/s2_train.py --config "%s"' % (python_exec, tmp_config_path) + else: + cmd = '"%s" GPT_SoVITS/s2_train_v3_lora.py --config "%s"' % (python_exec, tmp_config_path) + yield ( + process_info(process_name_sovits, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + ) + print(cmd) + p_train_SoVITS = Popen(cmd, shell=True) + p_train_SoVITS.wait() + p_train_SoVITS = None + SoVITS_dropdown_update, GPT_dropdown_update = change_choices() + yield ( + process_info(process_name_sovits, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False},SoVITS_dropdown_update,GPT_dropdown_update + ) + else: + yield ( + process_info(process_name_sovits, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + ) + + +def close1Ba(): + global p_train_SoVITS + if p_train_SoVITS is not None: + kill_process(p_train_SoVITS.pid, process_name_sovits) + p_train_SoVITS = None + return ( + process_info(process_name_sovits, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +p_train_GPT = None +process_name_gpt = i18n("GPT训练") + + +def open1Bb( + batch_size, + total_epoch, + exp_name, + if_dpo, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers, + pretrained_s1, +): + global p_train_GPT + if p_train_GPT == None: + with open( + "GPT_SoVITS/configs/s1longer.yaml" if version == "v1" else "GPT_SoVITS/configs/s1longer-v2.yaml" + ) as f: + data = f.read() + data = yaml.load(data, Loader=yaml.FullLoader) + s1_dir = "%s/%s" % (exp_root, exp_name) + os.makedirs("%s/logs_s1" % (s1_dir), exist_ok=True) + if check_for_existance([s1_dir], is_train=True): + check_details([s1_dir], is_train=True) + if is_half == False: + data["train"]["precision"] = "32" + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"] = batch_size + data["train"]["epochs"] = total_epoch + data["pretrained_s1"] = pretrained_s1 + data["train"]["save_every_n_epoch"] = save_every_epoch + data["train"]["if_save_every_weights"] = if_save_every_weights + data["train"]["if_save_latest"] = if_save_latest + data["train"]["if_dpo"] = if_dpo + data["train"]["half_weights_save_dir"] = GPT_weight_root[int(version[-1]) - 1] + data["train"]["exp_name"] = exp_name + data["train_semantic_path"] = "%s/6-name2semantic.tsv" % s1_dir + data["train_phoneme_path"] = "%s/2-name2text.txt" % s1_dir + data["output_dir"] = "%s/logs_s1_%s" % (s1_dir, version) + # data["version"]=version + + os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_numbers(gpu_numbers.replace("-", ",")) + os.environ["hz"] = "25hz" + tmp_config_path = "%s/tmp_s1.yaml" % tmp + with open(tmp_config_path, "w") as f: + f.write(yaml.dump(data, default_flow_style=False)) + # cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir) + cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" ' % (python_exec, tmp_config_path) + yield ( + process_info(process_name_gpt, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + ) + print(cmd) + p_train_GPT = Popen(cmd, shell=True) + p_train_GPT.wait() + p_train_GPT = None + SoVITS_dropdown_update, GPT_dropdown_update = change_choices() + yield ( + process_info(process_name_gpt, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False},SoVITS_dropdown_update,GPT_dropdown_update + ) + else: + yield ( + process_info(process_name_gpt, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + ) + + +def close1Bb(): + global p_train_GPT + if p_train_GPT is not None: + kill_process(p_train_GPT.pid, process_name_gpt) + p_train_GPT = None + return ( + process_info(process_name_gpt, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps_slice = [] +process_name_slice = i18n("语音切分") + + +def open_slice(inp_list, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_parts): + if not inp_list or len(inp_list) == 0: + yield ( + i18n("未选择任何文件"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + return + global ps_slice + opt_root = my_utils.clean_path(opt_root) + os.makedirs(opt_root, exist_ok=True) + + for idx, inp in enumerate(inp_list): + inp = my_utils.clean_path(inp) + if not os.path.isfile(inp): + yield ( + f"{i18n('文件')} {inp} {i18n('不存在')}", + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + continue + cmd = '"%s" tools/slice_audio.py "%s" "%s" %s %s %s %s %s %s %s %s %s' % ( + python_exec, + inp, + opt_root, + threshold, + min_length, + min_interval, + hop_size, + max_sil_kept, + _max, + alpha, + "0", + "1", + ) + print(f"执行命令: {cmd}") + p = Popen(cmd, shell=True) + ps_slice.append(p) + + yield ( + process_info(process_name_slice, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + for p in ps_slice: + p.wait() + ps_slice = [] + + yield ( + process_info(process_name_slice, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": opt_root}, + {"__type__": "update", "value": opt_root}, + {"__type__": "update", "value": opt_root}, + ) + + +def close_slice(): + global ps_slice + if ps_slice != []: + for p_slice in ps_slice: + try: + kill_process(p_slice.pid, process_name_slice) + except: + traceback.print_exc() + ps_slice = [] + return ( + process_info(process_name_slice, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1a = [] +process_name_1a = i18n("文本分词与特征提取") + + +def open1a(inp_text, inp_wav_dir, exp_name, gpu_numbers, bert_pretrained_dir): + global ps1a + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1a == []: + opt_dir = "%s/%s" % (exp_root, exp_name) + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "bert_pretrained_dir": bert_pretrained_dir, + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + "is_half": str(is_half), + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1a.append(p) + yield ( + process_info(process_name_1a, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1a: + p.wait() + opt = [] + for i_part in range(all_parts): + txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + path_text = "%s/2-name2text.txt" % opt_dir + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1a = [] + if len("".join(opt)) > 0: + yield ( + process_info(process_name_1a, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1a, "failed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1a, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1a(): + global ps1a + if ps1a != []: + for p1a in ps1a: + try: + kill_process(p1a.pid, process_name_1a) + except: + traceback.print_exc() + ps1a = [] + return ( + process_info(process_name_1a, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1b = [] +process_name_1b = i18n("语音自监督特征提取") + + +def open1b(inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): + global ps1b + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1b == []: + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": "%s/%s" % (exp_root, exp_name), + "cnhubert_base_dir": ssl_pretrained_dir, + "is_half": str(is_half), + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + yield ( + process_info(process_name_1b, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1b: + p.wait() + ps1b = [] + yield ( + process_info(process_name_1b, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1b, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1b(): + global ps1b + if ps1b != []: + for p1b in ps1b: + try: + kill_process(p1b.pid, process_name_1b) + except: + traceback.print_exc() + ps1b = [] + return ( + process_info(process_name_1b, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1c = [] +process_name_1c = i18n("语义Token提取") + + +def open1c(inp_text, exp_name, gpu_numbers, pretrained_s2G_path): + global ps1c + inp_text = my_utils.clean_path(inp_text) + if check_for_existance([inp_text, ""], is_dataset_processing=True): + check_details([inp_text, ""], is_dataset_processing=True) + if ps1c == []: + opt_dir = "%s/%s" % (exp_root, exp_name) + config = { + "inp_text": inp_text, + "exp_name": exp_name, + "opt_dir": opt_dir, + "pretrained_s2G": pretrained_s2G_path, + "s2config_path": "GPT_SoVITS/configs/s2.json", + "is_half": str(is_half), + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1c.append(p) + yield ( + process_info(process_name_1c, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1c: + p.wait() + opt = ["item_name\tsemantic_audio"] + path_semantic = "%s/6-name2semantic.tsv" % opt_dir + for i_part in range(all_parts): + semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1c = [] + yield ( + process_info(process_name_1c, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1c, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1c(): + global ps1c + if ps1c != []: + for p1c in ps1c: + try: + kill_process(p1c.pid, process_name_1c) + except: + traceback.print_exc() + ps1c = [] + return ( + process_info(process_name_1c, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1abc = [] +process_name_1abc = i18n("训练集格式化一键三连") + + +def open1abc( + inp_text, + inp_wav_dir, + exp_name, + gpu_numbers1a, + gpu_numbers1Ba, + gpu_numbers1c, + bert_pretrained_dir, + ssl_pretrained_dir, + pretrained_s2G_path, +): + global ps1abc + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1abc == []: + opt_dir = "%s/%s" % (exp_root, exp_name) + try: + #############################1a + path_text = "%s/2-name2text.txt" % opt_dir + if os.path.exists(path_text) == False or ( + os.path.exists(path_text) == True + and len(open(path_text, "r", encoding="utf8").read().strip("\n").split("\n")) < 2 + ): + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "bert_pretrained_dir": bert_pretrained_dir, + "is_half": str(is_half), + } + gpu_names = gpu_numbers1a.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + + opt = [] + for i_part in range(all_parts): # txt_path="%s/2-name2text-%s.txt"%(opt_dir,i_part) + txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + assert len("".join(opt)) > 0, process_info(process_name_1a, "failed") + yield ( + i18n("进度") + ": 1A-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + #############################1b + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "cnhubert_base_dir": ssl_pretrained_dir, + } + gpu_names = gpu_numbers1Ba.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Done, 1B-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + yield ( + i18n("进度") + ": 1A-Done, 1B-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + #############################1c + path_semantic = "%s/6-name2semantic.tsv" % opt_dir + if os.path.exists(path_semantic) == False or ( + os.path.exists(path_semantic) == True and os.path.getsize(path_semantic) < 31 + ): + config = { + "inp_text": inp_text, + "exp_name": exp_name, + "opt_dir": opt_dir, + "pretrained_s2G": pretrained_s2G_path, + "s2config_path": "GPT_SoVITS/configs/s2.json", + } + gpu_names = gpu_numbers1c.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Done, 1B-Done, 1C-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + + opt = ["item_name\tsemantic_audio"] + for i_part in range(all_parts): + semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + yield ( + i18n("进度") + ": 1A-Done, 1B-Done, 1C-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + yield ( + process_info(process_name_1abc, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + except: + traceback.print_exc() + close1abc() + yield ( + process_info(process_name_1abc, "failed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1abc, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1abc(): + global ps1abc + if ps1abc != []: + for p1abc in ps1abc: + try: + kill_process(p1abc.pid, process_name_1abc) + except: + traceback.print_exc() + ps1abc = [] + return ( + process_info(process_name_1abc, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + +def switch_version(version_): + os.environ["version"] = version_ + global version + version = version_ + if pretrained_sovits_name[int(version[-1]) - 1] != "" and pretrained_gpt_name[int(version[-1]) - 1] != "": + ... + else: + gr.Warning(i18n("未下载模型") + ": " + version.upper()) + set_default() + print(f"{i18n('预训练SoVITS-G模型路径')}: {pretrained_sovits_name[int(version[-1]) - 1]}") + print(f"{i18n('预训练SoVITS-D模型路径')}: {pretrained_sovits_name[int(version[-1]) - 1].replace('s2G', 's2D')}") + print(f"{i18n('预训练GPT模型路径')}: {pretrained_gpt_name[int(version[-1]) - 1]}") + return ( + {"__type__": "update", "value": pretrained_sovits_name[int(version[-1]) - 1]}, + {"__type__": "update", "value": pretrained_sovits_name[int(version[-1]) - 1].replace("s2G", "s2D")}, + {"__type__": "update", "value": pretrained_gpt_name[int(version[-1]) - 1]}, + {"__type__": "update", "value": pretrained_gpt_name[int(version[-1]) - 1]}, + {"__type__": "update", "value": pretrained_sovits_name[int(version[-1]) - 1]}, + {"__type__": "update", "value": default_batch_size, "maximum": default_max_batch_size}, + {"__type__": "update", "value": default_sovits_epoch, "maximum": max_sovits_epoch}, + {"__type__": "update", "value": default_sovits_save_every_epoch, "maximum": max_sovits_save_every_epoch}, + {"__type__": "update", "visible": True if version not in v3v4set else False}, + { + "__type__": "update", + "value": False if not if_force_ckpt else True, + "interactive": True if not if_force_ckpt else False, + }, + {"__type__": "update", "interactive": True, "value": False}, + {"__type__": "update", "visible": True if version in v3v4set else False}, + ) # {'__type__': 'update', "interactive": False if version in v3v4set else True, "value": False}, \ ####batch infer + + +if os.path.exists("GPT_SoVITS/text/G2PWModel"): + ... +else: + cmd = '"%s" GPT_SoVITS/download.py' % python_exec + p = Popen(cmd, shell=True) + p.wait() + + +def sync(text): + return {"__type__": "update", "value": text} + + +with gr.Blocks(title="GPT-SoVITS WebUI") as app: + with gr.Accordion("使用说明", open=False): + gr.Markdown( + value=""" +## 微调训练步骤(简化版) +0. 填写模型名。 +1. 选择一个或多个音频文件,**勾选`自动开启语音识别`**,然后点击`1.开启语音切分`。`2.开启语音识别`会被自动执行。 +2. **勾选`自动开启SoVITS训练`、`自动开启GPT训练`**,然后点击`4.开启训练集格式化一键三连`。`5.开启SoVITS训练`、`6.开启GPT训练`会被自动执行。 + +## 微调训练步骤 +0. 填写模型名。 +1. 选择一个或多个音频文件,然后点击`1.开启语音切分`。如果已事先完成切分,可直接在`语音切分文件夹路径`里输入对应的路径。 +2. 选择一个ASR模型,然后点击`2.开启语音识别`来生成标注文件。如果已事先准备了标注文件,可直接在`标注文件路径`里输入对应的`.list`文件路径。 +3. 如果需要对标注文件进行修改,可以点击`3.开启音频标注WebUI`,但这一步是非必须的。 +4. 点击`4.开启训练集格式化一键三连`,这一步是为了将数据集整理为特定格式,后续可在`logs`目录里查看。 +5. 根据自己显卡的情况调整batch_size,设置合适的训练轮数,然后点击`5.开启SoVITS训练`,可在终端查看训练进度。 +6. 同样需要调整batch_size,设置合适的训练轮数,然后点击`6.开启GPT训练`,可在终端查看训练进度。如果显存大于12G,且数据集质量好,建议开启DPO。 + +## 推理步骤 +0. 默认是`微调训练`页面,所以需要先切换到`推理`页面。 +1. 在列表里选择之前训练好的GPT模型和SoVITS模型。 +2. 点击`开启TTS推理WebUI`。 + +## 训练新的模型 +1. 切换到`其他工具`页面。 +2. 点击`清空输出目录`。 +3. 切换回`微调训练`页面,按照微调训练步骤从头开始。**记得换一个模型名。** + +## 免责声明 +本软件以MIT协议开源,作者不对软件具备任何控制力,使用软件者、传播软件导出的声音者自负全责。 +如不认可该条款,则不能使用或引用软件包内任何代码和文件,详见根目录LICENSE。 + +## 快速跳转 +- 中文教程文档: https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e +- GitHub项目页面: https://github.com/RVC-Boss/GPT-SoVITS +""") + + with gr.Row(): + with gr.Row(): + exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True) + gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False) + version_checkbox = gr.Radio(label=i18n("版本"), value=version, choices=["v1", "v2", "v4"]) + with gr.Row(visible=False): + pretrained_s2G = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[int(version[-1]) - 1], + interactive=True, + lines=2, + max_lines=3, + scale=9, + ) + pretrained_s2D = gr.Textbox( + label=i18n("预训练SoVITS-D模型路径"), + value=pretrained_sovits_name[int(version[-1]) - 1].replace("s2G", "s2D"), + interactive=True, + lines=2, + max_lines=3, + scale=9, + ) + pretrained_s1 = gr.Textbox( + label=i18n("预训练GPT模型路径"), + value=pretrained_gpt_name[int(version[-1]) - 1], + interactive=True, + lines=2, + max_lines=3, + scale=10, + ) + + with gr.Tabs(): + with gr.TabItem(i18n("微调训练")): + # 语音切分工具 + with gr.Row(): + slice_inp_path = gr.Files(label=i18n("选择一个或多个音频文件"), file_types=["audio"]) + slice_opt_root = gr.Textbox(label=i18n("切分后的子音频的输出根目录"), value="output/slicer_opt", visible=False) + with gr.Row(visible=False): + threshold = gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"), value="-34") + min_length = gr.Textbox( + label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"), + value="4000", + ) + min_interval = gr.Textbox(label=i18n("min_interval:最短切割间隔"), value="300") + hop_size = gr.Textbox( + label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"), + value="10", + ) + max_sil_kept = gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"), value="500") + with gr.Row(visible=False): + _max = gr.Slider( + minimum=0, + maximum=1, + step=0.05, + label=i18n("max:归一化后最大值多少"), + value=0.9, + interactive=True, + ) + alpha = gr.Slider( + minimum=0, + maximum=1, + step=0.05, + label=i18n("alpha_mix:混多少比例归一化后音频进来"), + value=0.25, + interactive=True, + ) + with gr.Row(visible=False): + n_process = gr.Slider( + minimum=1, maximum=n_cpu, step=1, label=i18n("切割使用的进程数"), value=4, interactive=True + ) + with gr.Row(): + slicer_info = gr.Textbox(label=process_info(process_name_slice, "info")) + open_slicer_button = gr.Button( + value="1."+process_info(process_name_slice, "open"), variant="primary", visible=True + ) + close_slicer_button = gr.Button( + value="1."+process_info(process_name_slice, "close"), variant="primary", visible=False + ) + + # 语音降噪工具 + with gr.Row(visible=False): + with gr.Column(scale=3): + with gr.Row(): + denoise_input_dir = gr.Textbox(label=i18n("输入文件夹路径"), value="output/slicer_opt") + denoise_output_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/denoise_opt") + with gr.Row(): + denoise_info = gr.Textbox(label=process_info(process_name_denoise, "info")) + open_denoise_button = gr.Button( + value=process_info(process_name_denoise, "open"), variant="primary", visible=True + ) + close_denoise_button = gr.Button( + value=process_info(process_name_denoise, "close"), variant="primary", visible=False + ) + + # 语音识别工具 + with gr.Row(): + asr_inp_dir = gr.Textbox( + label=i18n("语音切分文件夹路径"), value="output/slicer_opt", interactive=True + ) + asr_opt_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/asr_opt", interactive=True, visible=False) + if_auto_asr = gr.Checkbox( + label=i18n("自动开启语音识别"), + value=False, + interactive=True, + show_label=True, + ) + with gr.Row(): + asr_model = gr.Dropdown( + label=i18n("ASR 模型"), + choices=list(asr_dict.keys()), + interactive=True, + value="达摩 ASR (中文)", + ) + asr_size = gr.Dropdown( + label=i18n("ASR 模型尺寸"), choices=["large"], interactive=True, value="large" + ) + asr_lang = gr.Dropdown( + label=i18n("ASR 语言设置"), choices=["zh", "yue"], interactive=True, value="zh" + ) + asr_precision = gr.Dropdown( + label=i18n("数据类型精度"), choices=["float32"], interactive=True, value="float32" + ) + with gr.Row(): + asr_info = gr.Textbox(label=process_info(process_name_asr, "info")) + open_asr_button = gr.Button( + value="2."+process_info(process_name_asr, "open"), variant="primary", visible=True + ) + close_asr_button = gr.Button( + value="2."+process_info(process_name_asr, "close"), variant="primary", visible=False + ) + + def change_lang_choices(key): # 根据选择的模型修改可选的语言 + return {"__type__": "update", "choices": asr_dict[key]["lang"], "value": asr_dict[key]["lang"][0]} + + def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 + return {"__type__": "update", "choices": asr_dict[key]["size"], "value": asr_dict[key]["size"][-1]} + + def change_precision_choices(key): # 根据选择的模型修改可选的语言 + if key == "Faster Whisper (多语种)": + if default_batch_size <= 4: + precision = "int8" + elif is_half: + precision = "float16" + else: + precision = "float32" + else: + precision = "float32" + return {"__type__": "update", "choices": asr_dict[key]["precision"], "value": precision} + + asr_model.change(change_lang_choices, [asr_model], [asr_lang]) + asr_model.change(change_size_choices, [asr_model], [asr_size]) + asr_model.change(change_precision_choices, [asr_model], [asr_precision]) + + # 语音文本校对标注工具 + path_list = gr.Textbox( + label=i18n("标注文件路径 (含文件后缀 *.list)"), + value="output/asr_opt/slicer_opt.list", + interactive=True, + ) + with gr.Row(): + label_info = gr.Textbox(label=process_info(process_name_subfix, "info")) + open_label = gr.Button(value="3."+process_info(process_name_subfix, "open"), variant="primary", visible=True) + close_label = gr.Button( + value="3."+process_info(process_name_subfix, "close"), variant="primary", visible=False + ) + open_label.click(change_label, [path_list], [label_info, open_label, close_label]) + close_label.click(change_label, [path_list], [label_info, open_label, close_label]) + + # 训练集格式化工具 + with gr.Row(visible=False): + with gr.Row(): + inp_text = gr.Textbox( + label=i18n("*文本标注文件"), + value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "asr_opt", "slicer_opt.list"), + interactive=True, + scale=10, + ) + with gr.Row(): + inp_wav_dir = gr.Textbox( + label=i18n("*训练集音频文件目录"), + value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "slicer_opt"), + interactive=True, + placeholder=i18n( + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。" + ), + scale=10, + ) + with gr.Row(visible=False): + with gr.Row(): + gpu_numbers1a = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + bert_pretrained_dir = gr.Textbox( + label=i18n("预训练中文BERT模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + interactive=False, + lines=2, + ) + with gr.Row(): + button1a_open = gr.Button( + value=process_info(process_name_1a, "open"), variant="primary", visible=True + ) + button1a_close = gr.Button( + value=process_info(process_name_1a, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1a = gr.Textbox(label=process_info(process_name_1a, "info")) + with gr.Row(visible=False): + with gr.Row(): + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + cnhubert_base_dir = gr.Textbox( + label=i18n("预训练SSL模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-hubert-base", + interactive=False, + lines=2, + ) + with gr.Row(): + button1b_open = gr.Button( + value=process_info(process_name_1b, "open"), variant="primary", visible=True + ) + button1b_close = gr.Button( + value=process_info(process_name_1b, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1b = gr.Textbox(label=process_info(process_name_1b, "info")) + with gr.Row(visible=False): + with gr.Row(): + gpu_numbers1c = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + pretrained_s2G_ = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[int(version[-1]) - 1], + interactive=False, + lines=2, + ) + with gr.Row(): + button1c_open = gr.Button( + value=process_info(process_name_1c, "open"), variant="primary", visible=True + ) + button1c_close = gr.Button( + value=process_info(process_name_1c, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1c = gr.Textbox(label=process_info(process_name_1c, "info")) + with gr.Row(): + if_auto_sovits = gr.Checkbox( + label=i18n("自动开启SoVITS训练"), + value=False, + interactive=True, + show_label=True, + ) + if_auto_gpt = gr.Checkbox( + label=i18n("自动开启GPT训练"), + value=False, + interactive=True, + show_label=True, + ) + with gr.Row(): + info1abc = gr.Textbox(label=process_info(process_name_1abc, "info")) + button1abc_open = gr.Button( + value="4."+process_info(process_name_1abc, "open"), variant="primary", visible=True + ) + button1abc_close = gr.Button( + value="4."+process_info(process_name_1abc, "close"), variant="primary", visible=False + ) + + pretrained_s2G.change(sync, [pretrained_s2G], [pretrained_s2G_]) + + def conditional_open_asr(auto_asr, *args): + if auto_asr: + yield from open_asr(*args) + else: + yield [ + "跳过语音识别", + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + gr.update(), + gr.update(), + gr.update() + ] + def conditional_close_asr(auto_asr, *args): + if auto_asr: + yield from close_asr(*args) + open_slicer_button.click( + open_slice, + [ + slice_inp_path, + slice_opt_root, + threshold, + min_length, + min_interval, + hop_size, + max_sil_kept, + _max, + alpha, + n_process, + ], + [slicer_info, open_slicer_button, close_slicer_button, asr_inp_dir, denoise_input_dir, inp_wav_dir], + ).then( + conditional_open_asr, + [if_auto_asr, asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], + ) + close_slicer_button.click( + close_slice, + [], + [slicer_info, open_slicer_button, close_slicer_button] + ).then( + conditional_close_asr, + [if_auto_asr], + [asr_info, open_asr_button, close_asr_button] + ) + + open_denoise_button.click( + open_denoise, + [denoise_input_dir, denoise_output_dir], + [denoise_info, open_denoise_button, close_denoise_button, asr_inp_dir, inp_wav_dir], + ) + close_denoise_button.click(close_denoise, [], [denoise_info, open_denoise_button, close_denoise_button]) + + open_asr_button.click( + open_asr, + [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], + ) + close_asr_button.click(close_asr, [], [asr_info, open_asr_button, close_asr_button]) + + button1a_open.click( + open1a, + [inp_text, inp_wav_dir, exp_name, gpu_numbers1a, bert_pretrained_dir], + [info1a, button1a_open, button1a_close], + ) + button1a_close.click(close1a, [], [info1a, button1a_open, button1a_close]) + button1b_open.click( + open1b, + [inp_text, inp_wav_dir, exp_name, gpu_numbers1Ba, cnhubert_base_dir], + [info1b, button1b_open, button1b_close], + ) + button1b_close.click(close1b, [], [info1b, button1b_open, button1b_close]) + button1c_open.click( + open1c, [inp_text, exp_name, gpu_numbers1c, pretrained_s2G], [info1c, button1c_open, button1c_close] + ) + button1c_close.click(close1c, [], [info1c, button1c_open, button1c_close]) + def conditional_open1Ba(auto_sovits, *args): + if auto_sovits: + yield from open1Ba(*args) + else: + yield [ + "跳过SoVITS训练", + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + gr.update(), + gr.update() + ] + def conditional_close1Ba(auto_sovits, *args): + if auto_sovits: + yield from close1Ba(*args) + def conditional_open1Bb(auto_gpt, *args): + if auto_gpt: + yield from open1Bb(*args) + else: + yield [ + "跳过GPT训练", + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + gr.update(), + gr.update() + ] + def conditional_close1Bb(auto_gpt, *args): + if auto_gpt: + yield from close1Bb(*args) + + # SoVITS 训练 + with gr.Column(): + with gr.Row(): + batch_size = gr.Slider( + minimum=1, + maximum=default_max_batch_size, + step=1, + label=i18n("每张显卡的batch_size"), + value=3, + interactive=True, + ) + total_epoch = gr.Slider( + minimum=1, + maximum=100, + step=1, + label=i18n("总训练轮数total_epoch,不建议太高"), + value=20, + interactive=True, + ) + save_every_epoch = gr.Slider( + minimum=1, + maximum=max_sovits_save_every_epoch, + step=1, + label=i18n("保存频率save_every_epoch"), + value=5, + interactive=True, + ) + with gr.Row(): + text_low_lr_rate = gr.Slider( + minimum=0.2, + maximum=0.6, + step=0.05, + label=i18n("文本模块学习率权重"), + value=0.4, + visible=True if version not in v3v4set else False, + ) # v3v4 not need + lora_rank = gr.Radio( + label=i18n("LoRA秩"), + value="32", + choices=["16", "32", "64", "128"], + visible=True if version in v3v4set else False, + ) # v1v2 not need + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True + ) + with gr.Column(visible=False): + if_save_latest = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) + if_grad_ckpt = gr.Checkbox( + label="v3是否开启梯度检查点节省显存占用", + value=False, + interactive=True if version in v3v4set else False, + show_label=True, + visible=False, + ) # 只有V3s2可以用 + with gr.Row(): + info1Ba = gr.Textbox(label=process_info(process_name_sovits, "info")) + button1Ba_open = gr.Button( + value="5."+process_info(process_name_sovits, "open"), variant="primary", visible=True + ) + button1Ba_close = gr.Button( + value="5."+process_info(process_name_sovits, "close"), variant="primary", visible=False + ) + + # GPT 训练 + with gr.Column(): + with gr.Row(): + batch_size1Bb = gr.Slider( + minimum=1, + maximum=40, + step=1, + label=i18n("每张显卡的batch_size"), + value=3, + interactive=True, + ) + total_epoch1Bb = gr.Slider( + minimum=2, + maximum=100, + step=1, + label=i18n("总训练轮数total_epoch"), + value=15, + interactive=True, + ) + save_every_epoch1Bb = gr.Slider( + minimum=1, + maximum=50, + step=1, + label=i18n("保存频率save_every_epoch"), + value=5, + interactive=True, + ) + with gr.Row(): + if_dpo = gr.Checkbox( + label=i18n("是否开启DPO训练选项 (实验性)"), + value=False, + interactive=True, + show_label=True, + ) + gpu_numbers1Bb = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True + ) + with gr.Column(visible=False): + if_save_latest1Bb = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights1Bb = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) + with gr.Row(): + info1Bb = gr.Textbox(label=process_info(process_name_gpt, "info")) + button1Bb_open = gr.Button( + value="6."+process_info(process_name_gpt, "open"), variant="primary", visible=True + ) + button1Bb_close = gr.Button( + value="6."+process_info(process_name_gpt, "close"), variant="primary", visible=False + ) + + button1Ba_close.click(close1Ba, [], [info1Ba, button1Ba_open, button1Ba_close]).then( + conditional_close1Bb, + [if_auto_gpt], [info1Bb, button1Bb_open, button1Bb_close] + ) + button1Bb_close.click(close1Bb, [], [info1Bb, button1Bb_open, button1Bb_close]) + + with gr.TabItem(i18n("推理")): + with gr.Row(): + GPT_dropdown = gr.Dropdown( + label=i18n("GPT模型列表"), + choices=sorted(GPT_names, key=custom_sort_key), + value="GPT_SoVITS/pretrained_models/s1v3.ckpt", + interactive=True, + ) + SoVITS_dropdown = gr.Dropdown( + label=i18n("SoVITS模型列表"), + choices=sorted(SoVITS_names, key=custom_sort_key), + value="GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth", + interactive=True, + ) + refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") + refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) + with gr.Row(): + gpu_number_1C = gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True) + batched_infer_enabled = gr.Checkbox( + label=i18n("启用并行推理版本"), value=False, interactive=True, show_label=True + ) + with gr.Row(): + tts_info = gr.Textbox(label=process_info(process_name_tts, "info")) + open_tts = gr.Button( + value=process_info(process_name_tts, "open"), variant="primary", visible=True + ) + close_tts = gr.Button( + value=process_info(process_name_tts, "close"), variant="primary", visible=False + ) + open_tts.click( + change_tts_inference, + [ + bert_pretrained_dir, + cnhubert_base_dir, + gpu_number_1C, + GPT_dropdown, + SoVITS_dropdown, + batched_infer_enabled, + ], + [tts_info, open_tts, close_tts], + ) + close_tts.click( + change_tts_inference, + [ + bert_pretrained_dir, + cnhubert_base_dir, + gpu_number_1C, + GPT_dropdown, + SoVITS_dropdown, + batched_infer_enabled, + ], + [tts_info, open_tts, close_tts], + ) + button1Ba_open.click( + open1Ba, + [ + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, + ], + [info1Ba, button1Ba_open, button1Ba_close,SoVITS_dropdown,GPT_dropdown], + ).then( + conditional_open1Bb, + [ + if_auto_gpt, + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close,SoVITS_dropdown,GPT_dropdown], + ) + button1Bb_open.click( + open1Bb, + [ + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close,SoVITS_dropdown,GPT_dropdown], + ) + version_checkbox.change( + switch_version, + [version_checkbox], + [ + pretrained_s2G, + pretrained_s2D, + pretrained_s1, + GPT_dropdown, + SoVITS_dropdown, + batch_size, + total_epoch, + save_every_epoch, + text_low_lr_rate, + if_grad_ckpt, + batched_infer_enabled, + lora_rank, + ], + ) + + button1abc_open.click( + open1abc, + [ + inp_text, + inp_wav_dir, + exp_name, + gpu_numbers1a, + gpu_numbers1Ba, + gpu_numbers1c, + bert_pretrained_dir, + cnhubert_base_dir, + pretrained_s2G, + ], + [info1abc, button1abc_open, button1abc_close], + ).then( + conditional_open1Ba, + [ + if_auto_sovits, + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, + ], + [info1Ba, button1Ba_open, button1Ba_close,SoVITS_dropdown,GPT_dropdown], + ).then( + conditional_open1Bb, + [ + if_auto_gpt, + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close,SoVITS_dropdown,GPT_dropdown], + ) + button1abc_close.click(close1abc, [], [info1abc, button1abc_open, button1abc_close]).then( + conditional_close1Ba, + [if_auto_sovits], [info1Ba, button1Ba_open, button1Ba_close] + ).then( + conditional_close1Bb, + [if_auto_gpt], [info1Bb, button1Bb_open, button1Bb_close] + ) + + with gr.TabItem(i18n("其他工具")): + # UVR5人声伴奏分离&去混响去延迟工具 + with gr.Row(): + uvr5_info = gr.Textbox(label=process_info(process_name_uvr5, "info")) + open_uvr5 = gr.Button(value=process_info(process_name_uvr5, "open"), variant="primary", visible=True) + close_uvr5 = gr.Button(value=process_info(process_name_uvr5, "close"), variant="primary", visible=False) + open_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) + close_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) + + # 新增:清空输出目录功能 + with gr.Row(): + clear_output_info = gr.Textbox(label=i18n("清空输出目录状态"), value="", interactive=False) + clear_output_button = gr.Button(value=i18n("清空输出目录"), variant="stop") + def clear_output_directory(): + output_dir = "./output" + if os.path.exists(output_dir): + try: + shutil.rmtree(output_dir) # 删除整个目录 + return i18n("输出目录已成功清空") + except Exception as e: + return f"{i18n('清空输出目录时出错')}: {str(e)}" + else: + return i18n("输出目录不存在,无需清空") + clear_output_button.click(fn=clear_output_directory, inputs=[], outputs=[clear_output_info]) + + app.queue().launch( # concurrency_count=511, max_size=1022 + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=webui_port_main, + # quiet=True, + ) From b666becb197e3b515f69dec682a5bf82c09bd802 Mon Sep 17 00:00:00 2001 From: Karasukaigan <80465610+Karasukaigan@users.noreply.github.com> Date: Tue, 27 May 2025 22:41:20 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E6=92=A4=E5=9B=9E=E5=AF=B9=E4=BA=8Einferen?= =?UTF-8?q?ce=5Fwebui=E7=9A=84=E6=94=B9=E5=8A=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPT_SoVITS/inference_webui.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 4e706da8..46820145 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -781,12 +781,6 @@ def get_tts_wav( if not ref_free: phones1, bert1, norm_text1 = get_phones_and_bert(prompt_text, prompt_language, version) - # 确保sample_steps为int - try: - sample_steps = int(sample_steps) - except (TypeError, ValueError): - sample_steps = 8 - for i_text, text in enumerate(texts): # 解决输入目标文本的空行导致报错的问题 if len(text.strip()) == 0: From 2fb92e74a28af9f4704ac068ac0a10a10cf08200 Mon Sep 17 00:00:00 2001 From: Karasukaigan <80465610+Karasukaigan@users.noreply.github.com> Date: Fri, 6 Jun 2025 01:20:45 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0WebUI=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E7=89=88=E4=BB=A5=E6=94=AF=E6=8C=81V2Pro?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新了webui_simple.py以支持V2Pro系列模型。 --- webui_simple.py | 953 ++++++++++++++++++++++++------------------------ 1 file changed, 478 insertions(+), 475 deletions(-) diff --git a/webui_simple.py b/webui_simple.py index b9074be1..1a5e7b63 100644 --- a/webui_simple.py +++ b/webui_simple.py @@ -1,10 +1,7 @@ import os import sys -if len(sys.argv) == 1: - sys.argv.append("v2") -version = "v1" if sys.argv[1] == "v1" else "v4" -os.environ["version"] = version +os.environ["version"] = version = "v2Pro" now_dir = os.getcwd() sys.path.insert(0, now_dir) import warnings @@ -12,7 +9,6 @@ import warnings warnings.filterwarnings("ignore") import json import platform -import re import shutil import signal @@ -64,18 +60,7 @@ import shutil import subprocess from subprocess import Popen -from config import ( - exp_root, - infer_device, - is_half, - is_share, - python_exec, - webui_port_infer_tts, - webui_port_main, - webui_port_subfix, - webui_port_uvr5, -) -from tools import my_utils +from tools.assets import css, js, top_html from tools.i18n.i18n import I18nAuto, scan_language_list language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" @@ -83,6 +68,22 @@ os.environ["language"] = language i18n = I18nAuto(language=language) from multiprocessing import cpu_count +from config import ( + GPU_INDEX, + GPU_INFOS, + IS_GPU, + exp_root, + infer_device, + is_half, + is_share, + memset, + python_exec, + webui_port_infer_tts, + webui_port_main, + webui_port_subfix, + webui_port_uvr5, +) +from tools import my_utils from tools.my_utils import check_details, check_for_existance # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu @@ -96,58 +97,14 @@ import gradio as gr n_cpu = cpu_count() -ngpu = torch.cuda.device_count() -gpu_infos = [] -mem = [] -if_gpu_ok = False +set_gpu_numbers = GPU_INDEX +gpu_infos = GPU_INFOS +mem = memset +is_gpu_ok = IS_GPU -# 判断是否有能用来训练和加速推理的N卡 -ok_gpu_keywords = { - "10", - "16", - "20", - "30", - "40", - "A2", - "A3", - "A4", - "P4", - "A50", - "500", - "A60", - "70", - "80", - "90", - "M4", - "T4", - "TITAN", - "L4", - "4060", - "H", - "600", - "506", - "507", - "508", - "509", -} -set_gpu_numbers = set() -if torch.cuda.is_available() or ngpu != 0: - for i in range(ngpu): - gpu_name = torch.cuda.get_device_name(i) - if any(value in gpu_name.upper() for value in ok_gpu_keywords): - # A10#A100#V100#A40#P40#M40#K80#A4500 - if_gpu_ok = True # 至少有一张能用的N卡 - gpu_infos.append("%s\t%s" % (i, gpu_name)) - set_gpu_numbers.add(i) - mem.append(int(torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4)) -# # 判断是否支持mps加速 -# if torch.backends.mps.is_available(): -# if_gpu_ok = True -# gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) -# mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 +v3v4set = {"v3", "v4"} -v3v4set={"v3","v4"} def set_default(): global \ default_batch_size, \ @@ -160,15 +117,12 @@ def set_default(): default_batch_size_s1, \ if_force_ckpt if_force_ckpt = False - if if_gpu_ok and len(gpu_infos) > 0: - gpu_info = "\n".join(gpu_infos) + gpu_info = "\n".join(gpu_infos) + if is_gpu_ok: minmem = min(mem) default_batch_size = minmem // 2 if version not in v3v4set else minmem // 8 default_batch_size_s1 = minmem // 2 else: - gpu_info = "%s\t%s" % ("0", "CPU") - gpu_infos.append("%s\t%s" % ("0", "CPU")) - set_gpu_numbers.add(0) default_batch_size = default_batch_size_s1 = int(psutil.virtual_memory().total / 1024 / 1024 / 1024 / 4) if version not in v3v4set: default_sovits_epoch = 8 @@ -178,7 +132,7 @@ def set_default(): else: default_sovits_epoch = 2 default_sovits_save_every_epoch = 1 - max_sovits_epoch = 20 # 40 # 3 + max_sovits_epoch = 16 # 40 # 3 #训太多=作死 max_sovits_save_every_epoch = 10 # 10 # 3 default_batch_size = max(1, default_batch_size) @@ -188,8 +142,8 @@ def set_default(): set_default() -gpus = "-".join([i[0] for i in gpu_infos]) -default_gpu_numbers = str(sorted(list(set_gpu_numbers))[0]) +gpus = "-".join(map(str, GPU_INDEX)) +default_gpu_numbers = infer_device.index def fix_gpu_number(input): # 将越界的number强制改到界内 @@ -211,90 +165,45 @@ def fix_gpu_numbers(inputs): return inputs -pretrained_sovits_name = [ - "GPT_SoVITS/pretrained_models/s2G488k.pth", - "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", - "GPT_SoVITS/pretrained_models/s2Gv3.pth", - "GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth", -] -pretrained_gpt_name = [ - "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt", - "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", - "GPT_SoVITS/pretrained_models/s1v3.ckpt", - "GPT_SoVITS/pretrained_models/s1v3.ckpt", -] +from config import pretrained_gpt_name, pretrained_sovits_name -pretrained_model_list = ( - pretrained_sovits_name[int(version[-1]) - 1], - pretrained_sovits_name[int(version[-1]) - 1].replace("s2G", "s2D"), - pretrained_gpt_name[int(version[-1]) - 1], - "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", - "GPT_SoVITS/pretrained_models/chinese-hubert-base", + +def check_pretrained_is_exist(version): + pretrained_model_list = ( + pretrained_sovits_name[version], + pretrained_sovits_name[version].replace("s2G", "s2D"), + pretrained_gpt_name[version], + "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + "GPT_SoVITS/pretrained_models/chinese-hubert-base", + ) + _ = "" + for i in pretrained_model_list: + if "s2Dv3" not in i and "s2Dv4" not in i and os.path.exists(i) == False: + _ += f"\n {i}" + if _: + print("warning: ", i18n("以下模型不存在:") + _) + + +check_pretrained_is_exist(version) +for key in pretrained_sovits_name.keys(): + if os.path.exists(pretrained_sovits_name[key]) == False: + pretrained_sovits_name[key] = "" +for key in pretrained_gpt_name.keys(): + if os.path.exists(pretrained_gpt_name[key]) == False: + pretrained_gpt_name[key] = "" + +from config import ( + GPT_weight_root, + GPT_weight_version2root, + SoVITS_weight_root, + SoVITS_weight_version2root, + change_choices, + get_weights_names, ) -_ = "" -for i in pretrained_model_list: - if "s2Dv3" not in i and os.path.exists(i) == False: - if "s2Dv4" in i: - continue - _ += f"\n {i}" -if os.path.exists(pretrained_sovits_name[3]) == False: - _ += f"\n {pretrained_sovits_name[3]}" -if _: - print("warning: ", i18n("以下模型不存在:") + _) - -_ = [[], []] -for i in range(4): - if os.path.exists(pretrained_gpt_name[i]): - _[0].append(pretrained_gpt_name[i]) - else: - _[0].append("") ##没有下pretrained模型的,说不定他们是想自己从零训底模呢 - if os.path.exists(pretrained_sovits_name[i]): - _[-1].append(pretrained_sovits_name[i]) - else: - _[-1].append("") -pretrained_gpt_name, pretrained_sovits_name = _ - -SoVITS_weight_root = ["SoVITS_weights", "SoVITS_weights_v2", "SoVITS_weights_v3", "SoVITS_weights_v4"] -GPT_weight_root = ["GPT_weights", "GPT_weights_v2", "GPT_weights_v3", "GPT_weights_v4"] for root in SoVITS_weight_root + GPT_weight_root: os.makedirs(root, exist_ok=True) - - -def get_weights_names(): - SoVITS_names = [name for name in pretrained_sovits_name if name != ""] - for path in SoVITS_weight_root: - for name in os.listdir(path): - if name.endswith(".pth"): - SoVITS_names.append("%s/%s" % (path, name)) - GPT_names = [name for name in pretrained_gpt_name if name != ""] - for path in GPT_weight_root: - for name in os.listdir(path): - if name.endswith(".ckpt"): - GPT_names.append("%s/%s" % (path, name)) - return SoVITS_names, GPT_names - - SoVITS_names, GPT_names = get_weights_names() -for path in SoVITS_weight_root + GPT_weight_root: - os.makedirs(path, exist_ok=True) - - -def custom_sort_key(s): - # 使用正则表达式提取字符串中的数字部分和非数字部分 - parts = re.split("(\d+)", s) - # 将数字部分转换为整数,非数字部分保持不变 - parts = [int(part) if part.isdigit() else part for part in parts] - return parts - - -def change_choices(): - SoVITS_names, GPT_names = get_weights_names() - return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, { - "choices": sorted(GPT_names, key=custom_sort_key), - "__type__": "update", - } - p_label = None p_uvr5 = None @@ -367,7 +276,7 @@ def change_label(path_list): if p_label is None: check_for_existance([path_list]) path_list = my_utils.clean_path(path_list) - cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % ( + cmd = '"%s" -s tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % ( python_exec, path_list, webui_port_subfix, @@ -396,7 +305,13 @@ process_name_uvr5 = i18n("人声分离WebUI") def change_uvr5(): global p_uvr5 if p_uvr5 is None: - cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s' % (python_exec, infer_device, is_half, webui_port_uvr5, is_share) + cmd = '"%s" -s tools/uvr5/webui.py "%s" %s %s %s' % ( + python_exec, + infer_device, + is_half, + webui_port_uvr5, + is_share, + ) yield ( process_info(process_name_uvr5, "opened"), {"__type__": "update", "visible": False}, @@ -420,15 +335,15 @@ process_name_tts = i18n("TTS推理WebUI") def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, sovits_path, batched_infer_enabled): global p_tts_inference if batched_infer_enabled: - cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language) + cmd = '"%s" -s GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language) else: - cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language) + cmd = '"%s" -s GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language) # #####v3暂不支持加速推理 # if version=="v3": # cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) if p_tts_inference is None: - os.environ["gpt_path"] = gpt_path if "/" in gpt_path else "%s/%s" % (GPT_weight_root, gpt_path) - os.environ["sovits_path"] = sovits_path if "/" in sovits_path else "%s/%s" % (SoVITS_weight_root, sovits_path) + os.environ["gpt_path"] = gpt_path + os.environ["sovits_path"] = sovits_path os.environ["cnhubert_base_path"] = cnhubert_base_path os.environ["bert_path"] = bert_path os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_number(gpu_number) @@ -463,7 +378,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_ asr_inp_dir = my_utils.clean_path(asr_inp_dir) asr_opt_dir = my_utils.clean_path(asr_opt_dir) check_for_existance([asr_inp_dir]) - cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}' + cmd = f'"{python_exec}" -s tools/asr/{asr_dict[asr_model]["path"]}' cmd += f' -i "{asr_inp_dir}"' cmd += f' -o "{asr_opt_dir}"' cmd += f" -s {asr_model_size}" @@ -524,7 +439,7 @@ def open_denoise(denoise_inp_dir, denoise_opt_dir): denoise_inp_dir = my_utils.clean_path(denoise_inp_dir) denoise_opt_dir = my_utils.clean_path(denoise_opt_dir) check_for_existance([denoise_inp_dir]) - cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s' % ( + cmd = '"%s" -s tools/cmd-denoise.py -i "%s" -o "%s" -p %s' % ( python_exec, denoise_inp_dir, denoise_opt_dir, @@ -574,7 +489,9 @@ def close_denoise(): p_train_SoVITS = None process_name_sovits = i18n("SoVITS训练") + def open1Ba( + version, batch_size, total_epoch, exp_name, @@ -588,9 +505,15 @@ def open1Ba( if_grad_ckpt, lora_rank, ): - global p_train_SoVITS + global p_train_SoVITS, auto_gpt_stop_flag + auto_gpt_stop_flag = False if p_train_SoVITS == None: - with open("GPT_SoVITS/configs/s2.json") as f: + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + with open(config_file) as f: data = f.read() data = json.loads(data) s2_dir = "%s/%s" % (exp_root, exp_name) @@ -613,20 +536,22 @@ def open1Ba( data["train"]["lora_rank"] = lora_rank data["model"]["version"] = version data["data"]["exp_dir"] = data["s2_ckpt_dir"] = s2_dir - data["save_weight_dir"] = SoVITS_weight_root[int(version[-1]) - 1] + data["save_weight_dir"] = SoVITS_weight_version2root[version] data["name"] = exp_name data["version"] = version tmp_config_path = "%s/tmp_s2.json" % tmp with open(tmp_config_path, "w") as f: f.write(json.dumps(data)) - if version in ["v1", "v2"]: - cmd = '"%s" GPT_SoVITS/s2_train.py --config "%s"' % (python_exec, tmp_config_path) + if version in ["v1", "v2", "v2Pro", "v2ProPlus"]: + cmd = '"%s" -s GPT_SoVITS/s2_train.py --config "%s"' % (python_exec, tmp_config_path) else: - cmd = '"%s" GPT_SoVITS/s2_train_v3_lora.py --config "%s"' % (python_exec, tmp_config_path) + cmd = '"%s" -s GPT_SoVITS/s2_train_v3_lora.py --config "%s"' % (python_exec, tmp_config_path) yield ( process_info(process_name_sovits, "opened"), {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, ) print(cmd) p_train_SoVITS = Popen(cmd, shell=True) @@ -636,18 +561,23 @@ def open1Ba( yield ( process_info(process_name_sovits, "finish"), {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False},SoVITS_dropdown_update,GPT_dropdown_update + {"__type__": "update", "visible": False}, + SoVITS_dropdown_update, + GPT_dropdown_update, ) else: yield ( process_info(process_name_sovits, "occupy"), {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, ) def close1Ba(): - global p_train_SoVITS + global p_train_SoVITS, auto_gpt_stop_flag + auto_gpt_stop_flag = True if p_train_SoVITS is not None: kill_process(p_train_SoVITS.pid, process_name_sovits) p_train_SoVITS = None @@ -694,7 +624,7 @@ def open1Bb( data["train"]["if_save_every_weights"] = if_save_every_weights data["train"]["if_save_latest"] = if_save_latest data["train"]["if_dpo"] = if_dpo - data["train"]["half_weights_save_dir"] = GPT_weight_root[int(version[-1]) - 1] + data["train"]["half_weights_save_dir"] = GPT_weight_version2root[version] data["train"]["exp_name"] = exp_name data["train_semantic_path"] = "%s/6-name2semantic.tsv" % s1_dir data["train_phoneme_path"] = "%s/2-name2text.txt" % s1_dir @@ -707,11 +637,13 @@ def open1Bb( with open(tmp_config_path, "w") as f: f.write(yaml.dump(data, default_flow_style=False)) # cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir) - cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" ' % (python_exec, tmp_config_path) + cmd = '"%s" -s GPT_SoVITS/s1_train.py --config_file "%s" ' % (python_exec, tmp_config_path) yield ( process_info(process_name_gpt, "opened"), {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, ) print(cmd) p_train_GPT = Popen(cmd, shell=True) @@ -721,13 +653,17 @@ def open1Bb( yield ( process_info(process_name_gpt, "finish"), {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False},SoVITS_dropdown_update,GPT_dropdown_update + {"__type__": "update", "visible": False}, + SoVITS_dropdown_update, + GPT_dropdown_update, ) else: yield ( process_info(process_name_gpt, "occupy"), {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True},{"__type__": "update"},{"__type__": "update"} + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, ) @@ -747,6 +683,7 @@ ps_slice = [] process_name_slice = i18n("语音切分") +auto_asr_stop_flag = True def open_slice(inp_list, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_parts): if not inp_list or len(inp_list) == 0: yield ( @@ -758,7 +695,8 @@ def open_slice(inp_list, opt_root, threshold, min_length, min_interval, hop_size {"__type__": "update"}, ) return - global ps_slice + global ps_slice, auto_asr_stop_flag + auto_asr_stop_flag = False opt_root = my_utils.clean_path(opt_root) os.makedirs(opt_root, exist_ok=True) @@ -816,7 +754,8 @@ def open_slice(inp_list, opt_root, threshold, min_length, min_interval, hop_size def close_slice(): - global ps_slice + global ps_slice, auto_asr_stop_flag + auto_asr_stop_flag = True if ps_slice != []: for p_slice in ps_slice: try: @@ -862,7 +801,7 @@ def open1a(inp_text, inp_wav_dir, exp_name, gpu_numbers, bert_pretrained_dir): } ) os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec print(cmd) p = Popen(cmd, shell=True) ps1a.append(p) @@ -919,11 +858,12 @@ def close1a(): ) +sv_path = "GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt" ps1b = [] process_name_1b = i18n("语音自监督特征提取") -def open1b(inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): +def open1b(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): global ps1b inp_text = my_utils.clean_path(inp_text) inp_wav_dir = my_utils.clean_path(inp_wav_dir) @@ -936,6 +876,7 @@ def open1b(inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): "exp_name": exp_name, "opt_dir": "%s/%s" % (exp_root, exp_name), "cnhubert_base_dir": ssl_pretrained_dir, + "sv_path": sv_path, "is_half": str(is_half), } gpu_names = gpu_numbers.split("-") @@ -949,7 +890,7 @@ def open1b(inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): } ) os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec print(cmd) p = Popen(cmd, shell=True) ps1b.append(p) @@ -961,6 +902,23 @@ def open1b(inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): for p in ps1b: p.wait() ps1b = [] + if "Pro" in version: + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + for p in ps1b: + p.wait() + ps1b = [] yield ( process_info(process_name_1b, "finish"), {"__type__": "update", "visible": True}, @@ -994,19 +952,24 @@ ps1c = [] process_name_1c = i18n("语义Token提取") -def open1c(inp_text, exp_name, gpu_numbers, pretrained_s2G_path): +def open1c(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, pretrained_s2G_path): global ps1c inp_text = my_utils.clean_path(inp_text) - if check_for_existance([inp_text, ""], is_dataset_processing=True): - check_details([inp_text, ""], is_dataset_processing=True) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) if ps1c == []: opt_dir = "%s/%s" % (exp_root, exp_name) + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) config = { "inp_text": inp_text, "exp_name": exp_name, "opt_dir": opt_dir, "pretrained_s2G": pretrained_s2G_path, - "s2config_path": "GPT_SoVITS/configs/s2.json", + "s2config_path": config_file, "is_half": str(is_half), } gpu_names = gpu_numbers.split("-") @@ -1020,7 +983,7 @@ def open1c(inp_text, exp_name, gpu_numbers, pretrained_s2G_path): } ) os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec print(cmd) p = Popen(cmd, shell=True) ps1c.append(p) @@ -1073,8 +1036,10 @@ def close1c(): ps1abc = [] process_name_1abc = i18n("训练集格式化一键三连") - +auto_sovits_stop_flag = True +auto_gpt_stop_flag = True def open1abc( + version, inp_text, inp_wav_dir, exp_name, @@ -1085,7 +1050,9 @@ def open1abc( ssl_pretrained_dir, pretrained_s2G_path, ): - global ps1abc + global ps1abc, auto_sovits_stop_flag, auto_gpt_stop_flag + auto_sovits_stop_flag = False + auto_gpt_stop_flag = False inp_text = my_utils.clean_path(inp_text) inp_wav_dir = my_utils.clean_path(inp_wav_dir) if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): @@ -1118,7 +1085,7 @@ def open1abc( } ) os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec print(cmd) p = Popen(cmd, shell=True) ps1abc.append(p) @@ -1152,6 +1119,7 @@ def open1abc( "exp_name": exp_name, "opt_dir": opt_dir, "cnhubert_base_dir": ssl_pretrained_dir, + "sv_path": sv_path, } gpu_names = gpu_numbers1Ba.split("-") all_parts = len(gpu_names) @@ -1164,7 +1132,7 @@ def open1abc( } ) os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec print(cmd) p = Popen(cmd, shell=True) ps1abc.append(p) @@ -1175,23 +1143,45 @@ def open1abc( ) for p in ps1abc: p.wait() + ps1abc = [] + if "Pro" in version: + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + for p in ps1abc: + p.wait() + ps1abc = [] yield ( i18n("进度") + ": 1A-Done, 1B-Done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}, ) - ps1abc = [] #############################1c path_semantic = "%s/6-name2semantic.tsv" % opt_dir if os.path.exists(path_semantic) == False or ( os.path.exists(path_semantic) == True and os.path.getsize(path_semantic) < 31 ): + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) config = { "inp_text": inp_text, "exp_name": exp_name, "opt_dir": opt_dir, "pretrained_s2G": pretrained_s2G_path, - "s2config_path": "GPT_SoVITS/configs/s2.json", + "s2config_path": config_file, } gpu_names = gpu_numbers1c.split("-") all_parts = len(gpu_names) @@ -1204,7 +1194,7 @@ def open1abc( } ) os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec print(cmd) p = Popen(cmd, shell=True) ps1abc.append(p) @@ -1252,7 +1242,9 @@ def open1abc( def close1abc(): - global ps1abc + global ps1abc, auto_sovits_stop_flag, auto_gpt_stop_flag + auto_sovits_stop_flag = True + auto_gpt_stop_flag = True if ps1abc != []: for p1abc in ps1abc: try: @@ -1266,24 +1258,22 @@ def close1abc(): {"__type__": "update", "visible": False}, ) + def switch_version(version_): os.environ["version"] = version_ global version version = version_ - if pretrained_sovits_name[int(version[-1]) - 1] != "" and pretrained_gpt_name[int(version[-1]) - 1] != "": + if pretrained_sovits_name[version] != "" and pretrained_gpt_name[version] != "": ... else: gr.Warning(i18n("未下载模型") + ": " + version.upper()) set_default() - print(f"{i18n('预训练SoVITS-G模型路径')}: {pretrained_sovits_name[int(version[-1]) - 1]}") - print(f"{i18n('预训练SoVITS-D模型路径')}: {pretrained_sovits_name[int(version[-1]) - 1].replace('s2G', 's2D')}") - print(f"{i18n('预训练GPT模型路径')}: {pretrained_gpt_name[int(version[-1]) - 1]}") return ( - {"__type__": "update", "value": pretrained_sovits_name[int(version[-1]) - 1]}, - {"__type__": "update", "value": pretrained_sovits_name[int(version[-1]) - 1].replace("s2G", "s2D")}, - {"__type__": "update", "value": pretrained_gpt_name[int(version[-1]) - 1]}, - {"__type__": "update", "value": pretrained_gpt_name[int(version[-1]) - 1]}, - {"__type__": "update", "value": pretrained_sovits_name[int(version[-1]) - 1]}, + {"__type__": "update", "value": pretrained_sovits_name[version]}, + {"__type__": "update", "value": pretrained_sovits_name[version].replace("s2G", "s2D")}, + {"__type__": "update", "value": pretrained_gpt_name[version]}, + {"__type__": "update", "value": pretrained_gpt_name[version]}, + {"__type__": "update", "value": pretrained_sovits_name[version]}, {"__type__": "update", "value": default_batch_size, "maximum": default_max_batch_size}, {"__type__": "update", "value": default_sovits_epoch, "maximum": max_sovits_epoch}, {"__type__": "update", "value": default_sovits_save_every_epoch, "maximum": max_sovits_save_every_epoch}, @@ -1301,7 +1291,7 @@ def switch_version(version_): if os.path.exists("GPT_SoVITS/text/G2PWModel"): ... else: - cmd = '"%s" GPT_SoVITS/download.py' % python_exec + cmd = '"%s" -s GPT_SoVITS/download.py' % python_exec p = Popen(cmd, shell=True) p.wait() @@ -1310,7 +1300,15 @@ def sync(text): return {"__type__": "update", "value": text} -with gr.Blocks(title="GPT-SoVITS WebUI") as app: +with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app: + gr.HTML( + top_html.format( + i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + ), + elem_classes="markdown", + ) + with gr.Accordion("使用说明", open=False): gr.Markdown( value=""" @@ -1336,46 +1334,54 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ## 训练新的模型 1. 切换到`其他工具`页面。 2. 点击`清空输出目录`。 -3. 切换回`微调训练`页面,按照微调训练步骤从头开始。**记得换一个模型名。** - -## 免责声明 -本软件以MIT协议开源,作者不对软件具备任何控制力,使用软件者、传播软件导出的声音者自负全责。 -如不认可该条款,则不能使用或引用软件包内任何代码和文件,详见根目录LICENSE。 - -## 快速跳转 -- 中文教程文档: https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e -- GitHub项目页面: https://github.com/RVC-Boss/GPT-SoVITS -""") - +3. 切换回`微调训练`页面,按照微调训练步骤从头开始。**记得换一个模型名。**""") + with gr.Row(): - with gr.Row(): - exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True) - gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False) - version_checkbox = gr.Radio(label=i18n("版本"), value=version, choices=["v1", "v2", "v4"]) - with gr.Row(visible=False): + with gr.Row(equal_height=True): + exp_name = gr.Textbox( + label=i18n("*实验/模型名"), + value="xxx", + interactive=True, + scale=3, + ) + gpu_info_box = gr.Textbox( + label=i18n("显卡信息"), + value=gpu_info, + visible=True, + interactive=False, + scale=5, + ) + version_checkbox = gr.Radio( + label=i18n("训练模型的版本"), + value=version, + choices=["v1", "v2", "v4", "v2Pro", "v2ProPlus"], + scale=5, + ) + with gr.Row(visible=False): + with gr.Row(equal_height=True): + pretrained_s1 = gr.Textbox( + label=i18n("预训练GPT模型路径"), + value=pretrained_gpt_name[version], + interactive=True, + lines=1, + max_lines=1, + scale=3, + ) pretrained_s2G = gr.Textbox( label=i18n("预训练SoVITS-G模型路径"), - value=pretrained_sovits_name[int(version[-1]) - 1], + value=pretrained_sovits_name[version], interactive=True, - lines=2, - max_lines=3, - scale=9, + lines=1, + max_lines=1, + scale=5, ) pretrained_s2D = gr.Textbox( label=i18n("预训练SoVITS-D模型路径"), - value=pretrained_sovits_name[int(version[-1]) - 1].replace("s2G", "s2D"), + value=pretrained_sovits_name[version].replace("s2G", "s2D"), interactive=True, - lines=2, - max_lines=3, - scale=9, - ) - pretrained_s1 = gr.Textbox( - label=i18n("预训练GPT模型路径"), - value=pretrained_gpt_name[int(version[-1]) - 1], - interactive=True, - lines=2, - max_lines=3, - scale=10, + lines=1, + max_lines=1, + scale=5, ) with gr.Tabs(): @@ -1383,9 +1389,13 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: # 语音切分工具 with gr.Row(): slice_inp_path = gr.Files(label=i18n("选择一个或多个音频文件"), file_types=["audio"]) - slice_opt_root = gr.Textbox(label=i18n("切分后的子音频的输出根目录"), value="output/slicer_opt", visible=False) + slice_opt_root = gr.Textbox( + label=i18n("切分后的子音频的输出根目录"), value="output/slicer_opt", visible=False + ) with gr.Row(visible=False): - threshold = gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"), value="-34") + threshold = gr.Textbox( + label=i18n("threshold:音量小于这个值视作静音的备选切割点"), value="-34" + ) min_length = gr.Textbox( label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"), value="4000", @@ -1415,7 +1425,12 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) with gr.Row(visible=False): n_process = gr.Slider( - minimum=1, maximum=n_cpu, step=1, label=i18n("切割使用的进程数"), value=4, interactive=True + minimum=1, + maximum=n_cpu, + step=1, + label=i18n("切割使用的进程数"), + value=4, + interactive=True, ) with gr.Row(): slicer_info = gr.Textbox(label=process_info(process_name_slice, "info")) @@ -1427,10 +1442,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) # 语音降噪工具 + # gr.Markdown(value="0bb-" + i18n("语音降噪工具")+i18n("(不稳定,先别用,可能劣化模型效果!)")) with gr.Row(visible=False): with gr.Column(scale=3): with gr.Row(): - denoise_input_dir = gr.Textbox(label=i18n("输入文件夹路径"), value="output/slicer_opt") + denoise_input_dir = gr.Textbox(label=i18n("输入文件夹路径"), value="") denoise_output_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/denoise_opt") with gr.Row(): denoise_info = gr.Textbox(label=process_info(process_name_denoise, "info")) @@ -1446,7 +1462,9 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: asr_inp_dir = gr.Textbox( label=i18n("语音切分文件夹路径"), value="output/slicer_opt", interactive=True ) - asr_opt_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/asr_opt", interactive=True, visible=False) + asr_opt_dir = gr.Textbox( + label=i18n("输出文件夹路径"), value="output/asr_opt", interactive=True, visible=False + ) if_auto_asr = gr.Checkbox( label=i18n("自动开启语音识别"), value=False, @@ -1478,27 +1496,27 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: value="2."+process_info(process_name_asr, "close"), variant="primary", visible=False ) - def change_lang_choices(key): # 根据选择的模型修改可选的语言 - return {"__type__": "update", "choices": asr_dict[key]["lang"], "value": asr_dict[key]["lang"][0]} + def change_lang_choices(key): # 根据选择的模型修改可选的语言 + return {"__type__": "update", "choices": asr_dict[key]["lang"], "value": asr_dict[key]["lang"][0]} - def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 - return {"__type__": "update", "choices": asr_dict[key]["size"], "value": asr_dict[key]["size"][-1]} + def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 + return {"__type__": "update", "choices": asr_dict[key]["size"], "value": asr_dict[key]["size"][-1]} - def change_precision_choices(key): # 根据选择的模型修改可选的语言 - if key == "Faster Whisper (多语种)": - if default_batch_size <= 4: - precision = "int8" - elif is_half: - precision = "float16" - else: - precision = "float32" + def change_precision_choices(key): # 根据选择的模型修改可选的语言 + if key == "Faster Whisper (多语种)": + if default_batch_size <= 4: + precision = "int8" + elif is_half: + precision = "float16" else: precision = "float32" - return {"__type__": "update", "choices": asr_dict[key]["precision"], "value": precision} + else: + precision = "float32" + return {"__type__": "update", "choices": asr_dict[key]["precision"], "value": precision} - asr_model.change(change_lang_choices, [asr_model], [asr_lang]) - asr_model.change(change_size_choices, [asr_model], [asr_size]) - asr_model.change(change_precision_choices, [asr_model], [asr_precision]) + asr_model.change(change_lang_choices, [asr_model], [asr_lang]) + asr_model.change(change_size_choices, [asr_model], [asr_size]) + asr_model.change(change_precision_choices, [asr_model], [asr_precision]) # 语音文本校对标注工具 path_list = gr.Textbox( @@ -1508,101 +1526,152 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) with gr.Row(): label_info = gr.Textbox(label=process_info(process_name_subfix, "info")) - open_label = gr.Button(value="3."+process_info(process_name_subfix, "open"), variant="primary", visible=True) + open_label = gr.Button( + value="3."+process_info(process_name_subfix, "open"), variant="primary", visible=True + ) close_label = gr.Button( value="3."+process_info(process_name_subfix, "close"), variant="primary", visible=False ) open_label.click(change_label, [path_list], [label_info, open_label, close_label]) close_label.click(change_label, [path_list], [label_info, open_label, close_label]) + with gr.Accordion(label=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹"), visible=False): + with gr.Row(): + with gr.Row(): + inp_text = gr.Textbox( + label=i18n("*文本标注文件"), + value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "asr_opt", "slicer_opt.list"), + interactive=True, + scale=10, + ) + with gr.Row(): + inp_wav_dir = gr.Textbox( + label=i18n("*训练集音频文件目录"), + value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "slicer_opt"), + interactive=True, + placeholder=i18n( + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。" + ), + scale=10, + ) + + def conditional_open_asr(auto_asr, *args): + global auto_asr_stop_flag + if auto_asr and not auto_asr_stop_flag: + yield from open_asr(*args) + else: + yield ["", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, gr.update(), gr.update(), gr.update()] + open_slicer_button.click( + open_slice, + [ + slice_inp_path, + slice_opt_root, + threshold, + min_length, + min_interval, + hop_size, + max_sil_kept, + _max, + alpha, + n_process, + ], + [slicer_info, open_slicer_button, close_slicer_button, asr_inp_dir, denoise_input_dir, inp_wav_dir], + ).then( + conditional_open_asr, + [if_auto_asr, asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir] + ) + close_slicer_button.click(close_slice, [], [slicer_info, open_slicer_button, close_slicer_button]) + + open_denoise_button.click( + open_denoise, + [denoise_input_dir, denoise_output_dir], + [denoise_info, open_denoise_button, close_denoise_button, asr_inp_dir, inp_wav_dir], + ) + close_denoise_button.click(close_denoise, [], [denoise_info, open_denoise_button, close_denoise_button]) + + open_asr_button.click( + open_asr, + [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], + ) + close_asr_button.click(close_asr, [], [asr_info, open_asr_button, close_asr_button]) + # 训练集格式化工具 - with gr.Row(visible=False): + with gr.Accordion(label="1Aa-" + process_name_1a, visible=False): with gr.Row(): - inp_text = gr.Textbox( - label=i18n("*文本标注文件"), - value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "asr_opt", "slicer_opt.list"), - interactive=True, - scale=10, - ) + with gr.Row(): + gpu_numbers1a = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + bert_pretrained_dir = gr.Textbox( + label=i18n("预训练中文BERT模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + interactive=False, + lines=2, + ) + with gr.Row(): + button1a_open = gr.Button( + value=process_info(process_name_1a, "open"), variant="primary", visible=True + ) + button1a_close = gr.Button( + value=process_info(process_name_1a, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1a = gr.Textbox(label=process_info(process_name_1a, "info")) + + with gr.Accordion(label="1Ab-" + process_name_1b, visible=False): with gr.Row(): - inp_wav_dir = gr.Textbox( - label=i18n("*训练集音频文件目录"), - value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "slicer_opt"), - interactive=True, - placeholder=i18n( - "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。" - ), - scale=10, - ) - with gr.Row(visible=False): + with gr.Row(): + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + cnhubert_base_dir = gr.Textbox( + label=i18n("预训练SSL模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-hubert-base", + interactive=False, + lines=2, + ) + with gr.Row(): + button1b_open = gr.Button( + value=process_info(process_name_1b, "open"), variant="primary", visible=True + ) + button1b_close = gr.Button( + value=process_info(process_name_1b, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1b = gr.Textbox(label=process_info(process_name_1b, "info")) + + with gr.Accordion(label="1Ac-" + process_name_1c, visible=False): with gr.Row(): - gpu_numbers1a = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s-%s" % (gpus, gpus), - interactive=True, - ) - with gr.Row(): - bert_pretrained_dir = gr.Textbox( - label=i18n("预训练中文BERT模型路径"), - value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", - interactive=False, - lines=2, - ) - with gr.Row(): - button1a_open = gr.Button( - value=process_info(process_name_1a, "open"), variant="primary", visible=True - ) - button1a_close = gr.Button( - value=process_info(process_name_1a, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1a = gr.Textbox(label=process_info(process_name_1a, "info")) - with gr.Row(visible=False): - with gr.Row(): - gpu_numbers1Ba = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s-%s" % (gpus, gpus), - interactive=True, - ) - with gr.Row(): - cnhubert_base_dir = gr.Textbox( - label=i18n("预训练SSL模型路径"), - value="GPT_SoVITS/pretrained_models/chinese-hubert-base", - interactive=False, - lines=2, - ) - with gr.Row(): - button1b_open = gr.Button( - value=process_info(process_name_1b, "open"), variant="primary", visible=True - ) - button1b_close = gr.Button( - value=process_info(process_name_1b, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1b = gr.Textbox(label=process_info(process_name_1b, "info")) - with gr.Row(visible=False): - with gr.Row(): - gpu_numbers1c = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s-%s" % (gpus, gpus), - interactive=True, - ) - with gr.Row(): - pretrained_s2G_ = gr.Textbox( - label=i18n("预训练SoVITS-G模型路径"), - value=pretrained_sovits_name[int(version[-1]) - 1], - interactive=False, - lines=2, - ) - with gr.Row(): - button1c_open = gr.Button( - value=process_info(process_name_1c, "open"), variant="primary", visible=True - ) - button1c_close = gr.Button( - value=process_info(process_name_1c, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1c = gr.Textbox(label=process_info(process_name_1c, "info")) + with gr.Row(): + gpu_numbers1c = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + pretrained_s2G_ = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[version], + interactive=False, + lines=2, + ) + with gr.Row(): + button1c_open = gr.Button( + value=process_info(process_name_1c, "open"), variant="primary", visible=True + ) + button1c_close = gr.Button( + value=process_info(process_name_1c, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1c = gr.Textbox(label=process_info(process_name_1c, "info")) with gr.Row(): if_auto_sovits = gr.Checkbox( label=i18n("自动开启SoVITS训练"), @@ -1624,68 +1693,9 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: button1abc_close = gr.Button( value="4."+process_info(process_name_1abc, "close"), variant="primary", visible=False ) - + pretrained_s2G.change(sync, [pretrained_s2G], [pretrained_s2G_]) - def conditional_open_asr(auto_asr, *args): - if auto_asr: - yield from open_asr(*args) - else: - yield [ - "跳过语音识别", - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - gr.update(), - gr.update(), - gr.update() - ] - def conditional_close_asr(auto_asr, *args): - if auto_asr: - yield from close_asr(*args) - open_slicer_button.click( - open_slice, - [ - slice_inp_path, - slice_opt_root, - threshold, - min_length, - min_interval, - hop_size, - max_sil_kept, - _max, - alpha, - n_process, - ], - [slicer_info, open_slicer_button, close_slicer_button, asr_inp_dir, denoise_input_dir, inp_wav_dir], - ).then( - conditional_open_asr, - [if_auto_asr, asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], - [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], - ) - close_slicer_button.click( - close_slice, - [], - [slicer_info, open_slicer_button, close_slicer_button] - ).then( - conditional_close_asr, - [if_auto_asr], - [asr_info, open_asr_button, close_asr_button] - ) - - open_denoise_button.click( - open_denoise, - [denoise_input_dir, denoise_output_dir], - [denoise_info, open_denoise_button, close_denoise_button, asr_inp_dir, inp_wav_dir], - ) - close_denoise_button.click(close_denoise, [], [denoise_info, open_denoise_button, close_denoise_button]) - - open_asr_button.click( - open_asr, - [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], - [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], - ) - close_asr_button.click(close_asr, [], [asr_info, open_asr_button, close_asr_button]) - button1a_open.click( open1a, [inp_text, inp_wav_dir, exp_name, gpu_numbers1a, bert_pretrained_dir], @@ -1694,44 +1704,31 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: button1a_close.click(close1a, [], [info1a, button1a_open, button1a_close]) button1b_open.click( open1b, - [inp_text, inp_wav_dir, exp_name, gpu_numbers1Ba, cnhubert_base_dir], + [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1Ba, cnhubert_base_dir], [info1b, button1b_open, button1b_close], ) button1b_close.click(close1b, [], [info1b, button1b_open, button1b_close]) button1c_open.click( - open1c, [inp_text, exp_name, gpu_numbers1c, pretrained_s2G], [info1c, button1c_open, button1c_close] + open1c, + [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1c, pretrained_s2G], + [info1c, button1c_open, button1c_close], ) button1c_close.click(close1c, [], [info1c, button1c_open, button1c_close]) + def conditional_open1Ba(auto_sovits, *args): - if auto_sovits: + global auto_sovits_stop_flag + if auto_sovits and not auto_sovits_stop_flag: yield from open1Ba(*args) else: - yield [ - "跳过SoVITS训练", - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - gr.update(), - gr.update() - ] - def conditional_close1Ba(auto_sovits, *args): - if auto_sovits: - yield from close1Ba(*args) + yield ["", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, gr.update(), gr.update()] def conditional_open1Bb(auto_gpt, *args): - if auto_gpt: + global auto_gpt_stop_flag + if auto_gpt and not auto_gpt_stop_flag: yield from open1Bb(*args) else: - yield [ - "跳过GPT训练", - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - gr.update(), - gr.update() - ] - def conditional_close1Bb(auto_gpt, *args): - if auto_gpt: - yield from close1Bb(*args) - - # SoVITS 训练 + yield ["", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, gr.update(), gr.update()] + + # SoVITS训练 with gr.Column(): with gr.Row(): batch_size = gr.Slider( @@ -1752,7 +1749,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) save_every_epoch = gr.Slider( minimum=1, - maximum=max_sovits_save_every_epoch, + maximum=100, step=1, label=i18n("保存频率save_every_epoch"), value=5, @@ -1774,7 +1771,9 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: visible=True if version in v3v4set else False, ) # v1v2 not need gpu_numbers1Ba = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s" % (gpus), + interactive=True, ) with gr.Column(visible=False): if_save_latest = gr.Checkbox( @@ -1805,7 +1804,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: value="5."+process_info(process_name_sovits, "close"), variant="primary", visible=False ) - # GPT 训练 + # GPT训练 with gr.Column(): with gr.Row(): batch_size1Bb = gr.Slider( @@ -1826,7 +1825,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) save_every_epoch1Bb = gr.Slider( minimum=1, - maximum=50, + maximum=100, step=1, label=i18n("保存频率save_every_epoch"), value=5, @@ -1834,27 +1833,30 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) with gr.Row(): if_dpo = gr.Checkbox( - label=i18n("是否开启DPO训练选项 (实验性)"), + label=i18n("是否开启DPO训练选项(实验性)"), value=False, interactive=True, show_label=True, ) gpu_numbers1Bb = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s" % (gpus), + interactive=True, ) with gr.Column(visible=False): - if_save_latest1Bb = gr.Checkbox( - label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), - value=True, - interactive=True, - show_label=True, - ) - if_save_every_weights1Bb = gr.Checkbox( - label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), - value=True, - interactive=True, - show_label=True, - ) + with gr.Column(): + if_save_latest1Bb = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights1Bb = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) with gr.Row(): info1Bb = gr.Textbox(label=process_info(process_name_gpt, "info")) button1Bb_open = gr.Button( @@ -1862,43 +1864,39 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) button1Bb_close = gr.Button( value="6."+process_info(process_name_gpt, "close"), variant="primary", visible=False - ) - - button1Ba_close.click(close1Ba, [], [info1Ba, button1Ba_open, button1Ba_close]).then( - conditional_close1Bb, - [if_auto_gpt], [info1Bb, button1Bb_open, button1Bb_close] - ) - button1Bb_close.click(close1Bb, [], [info1Bb, button1Bb_open, button1Bb_close]) + ) with gr.TabItem(i18n("推理")): with gr.Row(): GPT_dropdown = gr.Dropdown( label=i18n("GPT模型列表"), - choices=sorted(GPT_names, key=custom_sort_key), - value="GPT_SoVITS/pretrained_models/s1v3.ckpt", + choices=GPT_names, + value=GPT_names[-1], interactive=True, ) SoVITS_dropdown = gr.Dropdown( label=i18n("SoVITS模型列表"), - choices=sorted(SoVITS_names, key=custom_sort_key), - value="GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth", + choices=SoVITS_names, + value=SoVITS_names[0], interactive=True, ) refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") - refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) + refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) with gr.Row(): - gpu_number_1C = gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True) + gpu_number_1C = gr.Textbox( + label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True + ) batched_infer_enabled = gr.Checkbox( label=i18n("启用并行推理版本"), value=False, interactive=True, show_label=True ) with gr.Row(): - tts_info = gr.Textbox(label=process_info(process_name_tts, "info")) + tts_info = gr.Textbox(label=process_info(process_name_tts, "info"), scale=2) open_tts = gr.Button( value=process_info(process_name_tts, "open"), variant="primary", visible=True ) close_tts = gr.Button( value=process_info(process_name_tts, "close"), variant="primary", visible=False - ) + ) open_tts.click( change_tts_inference, [ @@ -1923,9 +1921,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ], [tts_info, open_tts, close_tts], ) + button1Ba_open.click( open1Ba, [ + version_checkbox, batch_size, total_epoch, exp_name, @@ -1939,7 +1939,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: if_grad_ckpt, lora_rank, ], - [info1Ba, button1Ba_open, button1Ba_close,SoVITS_dropdown,GPT_dropdown], + [info1Ba, button1Ba_open, button1Ba_close, SoVITS_dropdown, GPT_dropdown], ).then( conditional_open1Bb, [ @@ -1954,7 +1954,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gpu_numbers1Bb, pretrained_s1, ], - [info1Bb, button1Bb_open, button1Bb_close,SoVITS_dropdown,GPT_dropdown], + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], ) button1Bb_open.click( open1Bb, @@ -1969,7 +1969,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gpu_numbers1Bb, pretrained_s1, ], - [info1Bb, button1Bb_open, button1Bb_close,SoVITS_dropdown,GPT_dropdown], + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], ) version_checkbox.change( switch_version, @@ -1993,6 +1993,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: button1abc_open.click( open1abc, [ + version_checkbox, inp_text, inp_wav_dir, exp_name, @@ -2008,6 +2009,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: conditional_open1Ba, [ if_auto_sovits, + version_checkbox, batch_size, total_epoch, exp_name, @@ -2036,26 +2038,27 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gpu_numbers1Bb, pretrained_s1, ], - [info1Bb, button1Bb_open, button1Bb_close,SoVITS_dropdown,GPT_dropdown], - ) - button1abc_close.click(close1abc, [], [info1abc, button1abc_open, button1abc_close]).then( - conditional_close1Ba, - [if_auto_sovits], [info1Ba, button1Ba_open, button1Ba_close] - ).then( - conditional_close1Bb, - [if_auto_gpt], [info1Bb, button1Bb_open, button1Bb_close] + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], ) + button1abc_close.click(close1abc, [], [info1abc, button1abc_open, button1abc_close]) + button1Ba_close.click(close1Ba, [], [info1Ba, button1Ba_open, button1Ba_close]) + button1Bb_close.click(close1Bb, [], [info1Bb, button1Bb_open, button1Bb_close]) + with gr.TabItem(i18n("其他工具")): # UVR5人声伴奏分离&去混响去延迟工具 with gr.Row(): uvr5_info = gr.Textbox(label=process_info(process_name_uvr5, "info")) - open_uvr5 = gr.Button(value=process_info(process_name_uvr5, "open"), variant="primary", visible=True) - close_uvr5 = gr.Button(value=process_info(process_name_uvr5, "close"), variant="primary", visible=False) + open_uvr5 = gr.Button( + value=process_info(process_name_uvr5, "open"), variant="primary", visible=True + ) + close_uvr5 = gr.Button( + value=process_info(process_name_uvr5, "close"), variant="primary", visible=False + ) open_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) close_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) - # 新增:清空输出目录功能 + # 清空输出目录 with gr.Row(): clear_output_info = gr.Textbox(label=i18n("清空输出目录状态"), value="", interactive=False) clear_output_button = gr.Button(value=i18n("清空输出目录"), variant="stop")