diff --git a/go-webui-simple-mode.bat b/go-webui-simple-mode.bat new file mode 100644 index 00000000..a55a79bc --- /dev/null +++ b/go-webui-simple-mode.bat @@ -0,0 +1,2 @@ +runtime\python.exe -I webui_simple.py zh_CN +pause diff --git a/webui_simple.py b/webui_simple.py new file mode 100644 index 00000000..1a5e7b63 --- /dev/null +++ b/webui_simple.py @@ -0,0 +1,2083 @@ +import os +import sys + +os.environ["version"] = version = "v2Pro" +now_dir = os.getcwd() +sys.path.insert(0, now_dir) +import warnings + +warnings.filterwarnings("ignore") +import json +import platform +import shutil +import signal + +import psutil +import torch +import yaml + +os.environ["TORCH_DISTRIBUTED_DEBUG"] = "INFO" +torch.manual_seed(233333) +tmp = os.path.join(now_dir, "TEMP") +os.makedirs(tmp, exist_ok=True) +os.environ["TEMP"] = tmp +if os.path.exists(tmp): + for name in os.listdir(tmp): + if name == "jieba.cache": + continue + path = "%s/%s" % (tmp, name) + delete = os.remove if os.path.isfile(path) else shutil.rmtree + try: + delete(path) + except Exception as e: + print(str(e)) + pass +import site +import traceback + +site_packages_roots = [] +for path in site.getsitepackages(): + if "packages" in path: + site_packages_roots.append(path) +if site_packages_roots == []: + site_packages_roots = ["%s/runtime/Lib/site-packages" % now_dir] +# os.environ["OPENBLAS_NUM_THREADS"] = "4" +os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" +os.environ["all_proxy"] = "" +for site_packages_root in site_packages_roots: + if os.path.exists(site_packages_root): + try: + with open("%s/users.pth" % (site_packages_root), "w") as f: + f.write( + # "%s\n%s/runtime\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" + "%s\n%s/GPT_SoVITS/BigVGAN\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" + % (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir) + ) + break + except PermissionError: + traceback.print_exc() +import shutil +import subprocess +from subprocess import Popen + +from tools.assets import css, js, top_html +from tools.i18n.i18n import I18nAuto, scan_language_list + +language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" +os.environ["language"] = language +i18n = I18nAuto(language=language) +from multiprocessing import cpu_count + +from config import ( + GPU_INDEX, + GPU_INFOS, + IS_GPU, + exp_root, + infer_device, + is_half, + is_share, + memset, + python_exec, + webui_port_infer_tts, + webui_port_main, + webui_port_subfix, + webui_port_uvr5, +) +from tools import my_utils +from tools.my_utils import check_details, check_for_existance + +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu +try: + import gradio.analytics as analytics + + analytics.version_check = lambda: None +except: + ... +import gradio as gr + +n_cpu = cpu_count() + +set_gpu_numbers = GPU_INDEX +gpu_infos = GPU_INFOS +mem = memset +is_gpu_ok = IS_GPU + +v3v4set = {"v3", "v4"} + + +def set_default(): + global \ + default_batch_size, \ + default_max_batch_size, \ + gpu_info, \ + default_sovits_epoch, \ + default_sovits_save_every_epoch, \ + max_sovits_epoch, \ + max_sovits_save_every_epoch, \ + default_batch_size_s1, \ + if_force_ckpt + if_force_ckpt = False + gpu_info = "\n".join(gpu_infos) + if is_gpu_ok: + minmem = min(mem) + default_batch_size = minmem // 2 if version not in v3v4set else minmem // 8 + default_batch_size_s1 = minmem // 2 + else: + default_batch_size = default_batch_size_s1 = int(psutil.virtual_memory().total / 1024 / 1024 / 1024 / 4) + if version not in v3v4set: + default_sovits_epoch = 8 + default_sovits_save_every_epoch = 4 + max_sovits_epoch = 25 # 40 + max_sovits_save_every_epoch = 25 # 10 + else: + default_sovits_epoch = 2 + default_sovits_save_every_epoch = 1 + max_sovits_epoch = 16 # 40 # 3 #训太多=作死 + max_sovits_save_every_epoch = 10 # 10 # 3 + + default_batch_size = max(1, default_batch_size) + default_batch_size_s1 = max(1, default_batch_size_s1) + default_max_batch_size = default_batch_size * 3 + + +set_default() + +gpus = "-".join(map(str, GPU_INDEX)) +default_gpu_numbers = infer_device.index + + +def fix_gpu_number(input): # 将越界的number强制改到界内 + try: + if int(input) not in set_gpu_numbers: + return default_gpu_numbers + except: + return input + return input + + +def fix_gpu_numbers(inputs): + output = [] + try: + for input in inputs.split(","): + output.append(str(fix_gpu_number(input))) + return ",".join(output) + except: + return inputs + + +from config import pretrained_gpt_name, pretrained_sovits_name + + +def check_pretrained_is_exist(version): + pretrained_model_list = ( + pretrained_sovits_name[version], + pretrained_sovits_name[version].replace("s2G", "s2D"), + pretrained_gpt_name[version], + "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + "GPT_SoVITS/pretrained_models/chinese-hubert-base", + ) + _ = "" + for i in pretrained_model_list: + if "s2Dv3" not in i and "s2Dv4" not in i and os.path.exists(i) == False: + _ += f"\n {i}" + if _: + print("warning: ", i18n("以下模型不存在:") + _) + + +check_pretrained_is_exist(version) +for key in pretrained_sovits_name.keys(): + if os.path.exists(pretrained_sovits_name[key]) == False: + pretrained_sovits_name[key] = "" +for key in pretrained_gpt_name.keys(): + if os.path.exists(pretrained_gpt_name[key]) == False: + pretrained_gpt_name[key] = "" + +from config import ( + GPT_weight_root, + GPT_weight_version2root, + SoVITS_weight_root, + SoVITS_weight_version2root, + change_choices, + get_weights_names, +) + +for root in SoVITS_weight_root + GPT_weight_root: + os.makedirs(root, exist_ok=True) +SoVITS_names, GPT_names = get_weights_names() + +p_label = None +p_uvr5 = None +p_asr = None +p_denoise = None +p_tts_inference = None + + +def kill_proc_tree(pid, including_parent=True): + try: + parent = psutil.Process(pid) + except psutil.NoSuchProcess: + # Process already terminated + return + + children = parent.children(recursive=True) + for child in children: + try: + os.kill(child.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + if including_parent: + try: + os.kill(parent.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + + +system = platform.system() + + +def kill_process(pid, process_name=""): + if system == "Windows": + cmd = "taskkill /t /f /pid %s" % pid + # os.system(cmd) + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + else: + kill_proc_tree(pid) + print(process_name + i18n("进程已终止")) + + +def process_info(process_name="", indicator=""): + if indicator == "opened": + return process_name + i18n("已开启") + elif indicator == "open": + return i18n("开启") + process_name + elif indicator == "closed": + return process_name + i18n("已关闭") + elif indicator == "close": + return i18n("关闭") + process_name + elif indicator == "running": + return process_name + i18n("运行中") + elif indicator == "occupy": + return process_name + i18n("占用中") + "," + i18n("需先终止才能开启下一次任务") + elif indicator == "finish": + return process_name + i18n("已完成") + elif indicator == "failed": + return process_name + i18n("失败") + elif indicator == "info": + return process_name + i18n("进程输出信息") + else: + return process_name + + +process_name_subfix = i18n("音频标注WebUI") + + +def change_label(path_list): + global p_label + if p_label is None: + check_for_existance([path_list]) + path_list = my_utils.clean_path(path_list) + cmd = '"%s" -s tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % ( + python_exec, + path_list, + webui_port_subfix, + is_share, + ) + yield ( + process_info(process_name_subfix, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + print(cmd) + p_label = Popen(cmd, shell=True) + else: + kill_process(p_label.pid, process_name_subfix) + p_label = None + yield ( + process_info(process_name_subfix, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +process_name_uvr5 = i18n("人声分离WebUI") + + +def change_uvr5(): + global p_uvr5 + if p_uvr5 is None: + cmd = '"%s" -s tools/uvr5/webui.py "%s" %s %s %s' % ( + python_exec, + infer_device, + is_half, + webui_port_uvr5, + is_share, + ) + yield ( + process_info(process_name_uvr5, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + print(cmd) + p_uvr5 = Popen(cmd, shell=True) + else: + kill_process(p_uvr5.pid, process_name_uvr5) + p_uvr5 = None + yield ( + process_info(process_name_uvr5, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +process_name_tts = i18n("TTS推理WebUI") + + +def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, sovits_path, batched_infer_enabled): + global p_tts_inference + if batched_infer_enabled: + cmd = '"%s" -s GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language) + else: + cmd = '"%s" -s GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language) + # #####v3暂不支持加速推理 + # if version=="v3": + # cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) + if p_tts_inference is None: + os.environ["gpt_path"] = gpt_path + os.environ["sovits_path"] = sovits_path + os.environ["cnhubert_base_path"] = cnhubert_base_path + os.environ["bert_path"] = bert_path + os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_number(gpu_number) + os.environ["is_half"] = str(is_half) + os.environ["infer_ttswebui"] = str(webui_port_infer_tts) + os.environ["is_share"] = str(is_share) + yield ( + process_info(process_name_tts, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + print(cmd) + p_tts_inference = Popen(cmd, shell=True) + else: + kill_process(p_tts_inference.pid, process_name_tts) + p_tts_inference = None + yield ( + process_info(process_name_tts, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +from tools.asr.config import asr_dict + +process_name_asr = i18n("语音识别") + + +def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision): + global p_asr + if p_asr is None: + asr_inp_dir = my_utils.clean_path(asr_inp_dir) + asr_opt_dir = my_utils.clean_path(asr_opt_dir) + check_for_existance([asr_inp_dir]) + cmd = f'"{python_exec}" -s tools/asr/{asr_dict[asr_model]["path"]}' + cmd += f' -i "{asr_inp_dir}"' + cmd += f' -o "{asr_opt_dir}"' + cmd += f" -s {asr_model_size}" + cmd += f" -l {asr_lang}" + cmd += f" -p {asr_precision}" + output_file_name = os.path.basename(asr_inp_dir) + output_folder = asr_opt_dir or "output/asr_opt" + output_file_path = os.path.abspath(f"{output_folder}/{output_file_name}.list") + yield ( + process_info(process_name_asr, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_asr = Popen(cmd, shell=True) + p_asr.wait() + p_asr = None + yield ( + process_info(process_name_asr, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": output_file_path}, + {"__type__": "update", "value": output_file_path}, + {"__type__": "update", "value": asr_inp_dir}, + ) + else: + yield ( + process_info(process_name_asr, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close_asr(): + global p_asr + if p_asr is not None: + kill_process(p_asr.pid, process_name_asr) + p_asr = None + return ( + process_info(process_name_asr, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +process_name_denoise = i18n("语音降噪") + + +def open_denoise(denoise_inp_dir, denoise_opt_dir): + global p_denoise + if p_denoise == None: + denoise_inp_dir = my_utils.clean_path(denoise_inp_dir) + denoise_opt_dir = my_utils.clean_path(denoise_opt_dir) + check_for_existance([denoise_inp_dir]) + cmd = '"%s" -s tools/cmd-denoise.py -i "%s" -o "%s" -p %s' % ( + python_exec, + denoise_inp_dir, + denoise_opt_dir, + "float16" if is_half == True else "float32", + ) + + yield ( + process_info(process_name_denoise, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_denoise = Popen(cmd, shell=True) + p_denoise.wait() + p_denoise = None + yield ( + process_info(process_name_denoise, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": denoise_opt_dir}, + {"__type__": "update", "value": denoise_opt_dir}, + ) + else: + yield ( + process_info(process_name_denoise, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close_denoise(): + global p_denoise + if p_denoise is not None: + kill_process(p_denoise.pid, process_name_denoise) + p_denoise = None + return ( + process_info(process_name_denoise, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +p_train_SoVITS = None +process_name_sovits = i18n("SoVITS训练") + + +def open1Ba( + version, + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, +): + global p_train_SoVITS, auto_gpt_stop_flag + auto_gpt_stop_flag = False + if p_train_SoVITS == None: + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + with open(config_file) as f: + data = f.read() + data = json.loads(data) + s2_dir = "%s/%s" % (exp_root, exp_name) + os.makedirs("%s/logs_s2_%s" % (s2_dir, version), exist_ok=True) + if check_for_existance([s2_dir], is_train=True): + check_details([s2_dir], is_train=True) + if is_half == False: + data["train"]["fp16_run"] = False + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"] = batch_size + data["train"]["epochs"] = total_epoch + data["train"]["text_low_lr_rate"] = text_low_lr_rate + data["train"]["pretrained_s2G"] = pretrained_s2G + data["train"]["pretrained_s2D"] = pretrained_s2D + data["train"]["if_save_latest"] = if_save_latest + data["train"]["if_save_every_weights"] = if_save_every_weights + data["train"]["save_every_epoch"] = save_every_epoch + data["train"]["gpu_numbers"] = gpu_numbers1Ba + data["train"]["grad_ckpt"] = if_grad_ckpt + data["train"]["lora_rank"] = lora_rank + data["model"]["version"] = version + data["data"]["exp_dir"] = data["s2_ckpt_dir"] = s2_dir + data["save_weight_dir"] = SoVITS_weight_version2root[version] + data["name"] = exp_name + data["version"] = version + tmp_config_path = "%s/tmp_s2.json" % tmp + with open(tmp_config_path, "w") as f: + f.write(json.dumps(data)) + if version in ["v1", "v2", "v2Pro", "v2ProPlus"]: + cmd = '"%s" -s GPT_SoVITS/s2_train.py --config "%s"' % (python_exec, tmp_config_path) + else: + cmd = '"%s" -s GPT_SoVITS/s2_train_v3_lora.py --config "%s"' % (python_exec, tmp_config_path) + yield ( + process_info(process_name_sovits, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_train_SoVITS = Popen(cmd, shell=True) + p_train_SoVITS.wait() + p_train_SoVITS = None + SoVITS_dropdown_update, GPT_dropdown_update = change_choices() + yield ( + process_info(process_name_sovits, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + SoVITS_dropdown_update, + GPT_dropdown_update, + ) + else: + yield ( + process_info(process_name_sovits, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close1Ba(): + global p_train_SoVITS, auto_gpt_stop_flag + auto_gpt_stop_flag = True + if p_train_SoVITS is not None: + kill_process(p_train_SoVITS.pid, process_name_sovits) + p_train_SoVITS = None + return ( + process_info(process_name_sovits, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +p_train_GPT = None +process_name_gpt = i18n("GPT训练") + + +def open1Bb( + batch_size, + total_epoch, + exp_name, + if_dpo, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers, + pretrained_s1, +): + global p_train_GPT + if p_train_GPT == None: + with open( + "GPT_SoVITS/configs/s1longer.yaml" if version == "v1" else "GPT_SoVITS/configs/s1longer-v2.yaml" + ) as f: + data = f.read() + data = yaml.load(data, Loader=yaml.FullLoader) + s1_dir = "%s/%s" % (exp_root, exp_name) + os.makedirs("%s/logs_s1" % (s1_dir), exist_ok=True) + if check_for_existance([s1_dir], is_train=True): + check_details([s1_dir], is_train=True) + if is_half == False: + data["train"]["precision"] = "32" + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"] = batch_size + data["train"]["epochs"] = total_epoch + data["pretrained_s1"] = pretrained_s1 + data["train"]["save_every_n_epoch"] = save_every_epoch + data["train"]["if_save_every_weights"] = if_save_every_weights + data["train"]["if_save_latest"] = if_save_latest + data["train"]["if_dpo"] = if_dpo + data["train"]["half_weights_save_dir"] = GPT_weight_version2root[version] + data["train"]["exp_name"] = exp_name + data["train_semantic_path"] = "%s/6-name2semantic.tsv" % s1_dir + data["train_phoneme_path"] = "%s/2-name2text.txt" % s1_dir + data["output_dir"] = "%s/logs_s1_%s" % (s1_dir, version) + # data["version"]=version + + os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_numbers(gpu_numbers.replace("-", ",")) + os.environ["hz"] = "25hz" + tmp_config_path = "%s/tmp_s1.yaml" % tmp + with open(tmp_config_path, "w") as f: + f.write(yaml.dump(data, default_flow_style=False)) + # cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir) + cmd = '"%s" -s GPT_SoVITS/s1_train.py --config_file "%s" ' % (python_exec, tmp_config_path) + yield ( + process_info(process_name_gpt, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_train_GPT = Popen(cmd, shell=True) + p_train_GPT.wait() + p_train_GPT = None + SoVITS_dropdown_update, GPT_dropdown_update = change_choices() + yield ( + process_info(process_name_gpt, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + SoVITS_dropdown_update, + GPT_dropdown_update, + ) + else: + yield ( + process_info(process_name_gpt, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close1Bb(): + global p_train_GPT + if p_train_GPT is not None: + kill_process(p_train_GPT.pid, process_name_gpt) + p_train_GPT = None + return ( + process_info(process_name_gpt, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps_slice = [] +process_name_slice = i18n("语音切分") + + +auto_asr_stop_flag = True +def open_slice(inp_list, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_parts): + if not inp_list or len(inp_list) == 0: + yield ( + i18n("未选择任何文件"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + return + global ps_slice, auto_asr_stop_flag + auto_asr_stop_flag = False + opt_root = my_utils.clean_path(opt_root) + os.makedirs(opt_root, exist_ok=True) + + for idx, inp in enumerate(inp_list): + inp = my_utils.clean_path(inp) + if not os.path.isfile(inp): + yield ( + f"{i18n('文件')} {inp} {i18n('不存在')}", + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + continue + cmd = '"%s" tools/slice_audio.py "%s" "%s" %s %s %s %s %s %s %s %s %s' % ( + python_exec, + inp, + opt_root, + threshold, + min_length, + min_interval, + hop_size, + max_sil_kept, + _max, + alpha, + "0", + "1", + ) + print(f"执行命令: {cmd}") + p = Popen(cmd, shell=True) + ps_slice.append(p) + + yield ( + process_info(process_name_slice, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + for p in ps_slice: + p.wait() + ps_slice = [] + + yield ( + process_info(process_name_slice, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": opt_root}, + {"__type__": "update", "value": opt_root}, + {"__type__": "update", "value": opt_root}, + ) + + +def close_slice(): + global ps_slice, auto_asr_stop_flag + auto_asr_stop_flag = True + if ps_slice != []: + for p_slice in ps_slice: + try: + kill_process(p_slice.pid, process_name_slice) + except: + traceback.print_exc() + ps_slice = [] + return ( + process_info(process_name_slice, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1a = [] +process_name_1a = i18n("文本分词与特征提取") + + +def open1a(inp_text, inp_wav_dir, exp_name, gpu_numbers, bert_pretrained_dir): + global ps1a + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1a == []: + opt_dir = "%s/%s" % (exp_root, exp_name) + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "bert_pretrained_dir": bert_pretrained_dir, + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + "is_half": str(is_half), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1a.append(p) + yield ( + process_info(process_name_1a, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1a: + p.wait() + opt = [] + for i_part in range(all_parts): + txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + path_text = "%s/2-name2text.txt" % opt_dir + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1a = [] + if len("".join(opt)) > 0: + yield ( + process_info(process_name_1a, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1a, "failed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1a, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1a(): + global ps1a + if ps1a != []: + for p1a in ps1a: + try: + kill_process(p1a.pid, process_name_1a) + except: + traceback.print_exc() + ps1a = [] + return ( + process_info(process_name_1a, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +sv_path = "GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt" +ps1b = [] +process_name_1b = i18n("语音自监督特征提取") + + +def open1b(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): + global ps1b + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1b == []: + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": "%s/%s" % (exp_root, exp_name), + "cnhubert_base_dir": ssl_pretrained_dir, + "sv_path": sv_path, + "is_half": str(is_half), + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + yield ( + process_info(process_name_1b, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1b: + p.wait() + ps1b = [] + if "Pro" in version: + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + for p in ps1b: + p.wait() + ps1b = [] + yield ( + process_info(process_name_1b, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1b, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1b(): + global ps1b + if ps1b != []: + for p1b in ps1b: + try: + kill_process(p1b.pid, process_name_1b) + except: + traceback.print_exc() + ps1b = [] + return ( + process_info(process_name_1b, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1c = [] +process_name_1c = i18n("语义Token提取") + + +def open1c(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, pretrained_s2G_path): + global ps1c + inp_text = my_utils.clean_path(inp_text) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1c == []: + opt_dir = "%s/%s" % (exp_root, exp_name) + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + config = { + "inp_text": inp_text, + "exp_name": exp_name, + "opt_dir": opt_dir, + "pretrained_s2G": pretrained_s2G_path, + "s2config_path": config_file, + "is_half": str(is_half), + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1c.append(p) + yield ( + process_info(process_name_1c, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1c: + p.wait() + opt = ["item_name\tsemantic_audio"] + path_semantic = "%s/6-name2semantic.tsv" % opt_dir + for i_part in range(all_parts): + semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1c = [] + yield ( + process_info(process_name_1c, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1c, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1c(): + global ps1c + if ps1c != []: + for p1c in ps1c: + try: + kill_process(p1c.pid, process_name_1c) + except: + traceback.print_exc() + ps1c = [] + return ( + process_info(process_name_1c, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1abc = [] +process_name_1abc = i18n("训练集格式化一键三连") + +auto_sovits_stop_flag = True +auto_gpt_stop_flag = True +def open1abc( + version, + inp_text, + inp_wav_dir, + exp_name, + gpu_numbers1a, + gpu_numbers1Ba, + gpu_numbers1c, + bert_pretrained_dir, + ssl_pretrained_dir, + pretrained_s2G_path, +): + global ps1abc, auto_sovits_stop_flag, auto_gpt_stop_flag + auto_sovits_stop_flag = False + auto_gpt_stop_flag = False + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + if ps1abc == []: + opt_dir = "%s/%s" % (exp_root, exp_name) + try: + #############################1a + path_text = "%s/2-name2text.txt" % opt_dir + if os.path.exists(path_text) == False or ( + os.path.exists(path_text) == True + and len(open(path_text, "r", encoding="utf8").read().strip("\n").split("\n")) < 2 + ): + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "bert_pretrained_dir": bert_pretrained_dir, + "is_half": str(is_half), + } + gpu_names = gpu_numbers1a.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + + opt = [] + for i_part in range(all_parts): # txt_path="%s/2-name2text-%s.txt"%(opt_dir,i_part) + txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + assert len("".join(opt)) > 0, process_info(process_name_1a, "failed") + yield ( + i18n("进度") + ": 1A-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + #############################1b + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "cnhubert_base_dir": ssl_pretrained_dir, + "sv_path": sv_path, + } + gpu_names = gpu_numbers1Ba.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Done, 1B-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + ps1abc = [] + if "Pro" in version: + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + for p in ps1abc: + p.wait() + ps1abc = [] + yield ( + i18n("进度") + ": 1A-Done, 1B-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + #############################1c + path_semantic = "%s/6-name2semantic.tsv" % opt_dir + if os.path.exists(path_semantic) == False or ( + os.path.exists(path_semantic) == True and os.path.getsize(path_semantic) < 31 + ): + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + config = { + "inp_text": inp_text, + "exp_name": exp_name, + "opt_dir": opt_dir, + "pretrained_s2G": pretrained_s2G_path, + "s2config_path": config_file, + } + gpu_names = gpu_numbers1c.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = '"%s" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Done, 1B-Done, 1C-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + + opt = ["item_name\tsemantic_audio"] + for i_part in range(all_parts): + semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + yield ( + i18n("进度") + ": 1A-Done, 1B-Done, 1C-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + yield ( + process_info(process_name_1abc, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + except: + traceback.print_exc() + close1abc() + yield ( + process_info(process_name_1abc, "failed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1abc, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1abc(): + global ps1abc, auto_sovits_stop_flag, auto_gpt_stop_flag + auto_sovits_stop_flag = True + auto_gpt_stop_flag = True + if ps1abc != []: + for p1abc in ps1abc: + try: + kill_process(p1abc.pid, process_name_1abc) + except: + traceback.print_exc() + ps1abc = [] + return ( + process_info(process_name_1abc, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +def switch_version(version_): + os.environ["version"] = version_ + global version + version = version_ + if pretrained_sovits_name[version] != "" and pretrained_gpt_name[version] != "": + ... + else: + gr.Warning(i18n("未下载模型") + ": " + version.upper()) + set_default() + return ( + {"__type__": "update", "value": pretrained_sovits_name[version]}, + {"__type__": "update", "value": pretrained_sovits_name[version].replace("s2G", "s2D")}, + {"__type__": "update", "value": pretrained_gpt_name[version]}, + {"__type__": "update", "value": pretrained_gpt_name[version]}, + {"__type__": "update", "value": pretrained_sovits_name[version]}, + {"__type__": "update", "value": default_batch_size, "maximum": default_max_batch_size}, + {"__type__": "update", "value": default_sovits_epoch, "maximum": max_sovits_epoch}, + {"__type__": "update", "value": default_sovits_save_every_epoch, "maximum": max_sovits_save_every_epoch}, + {"__type__": "update", "visible": True if version not in v3v4set else False}, + { + "__type__": "update", + "value": False if not if_force_ckpt else True, + "interactive": True if not if_force_ckpt else False, + }, + {"__type__": "update", "interactive": True, "value": False}, + {"__type__": "update", "visible": True if version in v3v4set else False}, + ) # {'__type__': 'update', "interactive": False if version in v3v4set else True, "value": False}, \ ####batch infer + + +if os.path.exists("GPT_SoVITS/text/G2PWModel"): + ... +else: + cmd = '"%s" -s GPT_SoVITS/download.py' % python_exec + p = Popen(cmd, shell=True) + p.wait() + + +def sync(text): + return {"__type__": "update", "value": text} + + +with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app: + gr.HTML( + top_html.format( + i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + ), + elem_classes="markdown", + ) + + with gr.Accordion("使用说明", open=False): + gr.Markdown( + value=""" +## 微调训练步骤(简化版) +0. 填写模型名。 +1. 选择一个或多个音频文件,**勾选`自动开启语音识别`**,然后点击`1.开启语音切分`。`2.开启语音识别`会被自动执行。 +2. **勾选`自动开启SoVITS训练`、`自动开启GPT训练`**,然后点击`4.开启训练集格式化一键三连`。`5.开启SoVITS训练`、`6.开启GPT训练`会被自动执行。 + +## 微调训练步骤 +0. 填写模型名。 +1. 选择一个或多个音频文件,然后点击`1.开启语音切分`。如果已事先完成切分,可直接在`语音切分文件夹路径`里输入对应的路径。 +2. 选择一个ASR模型,然后点击`2.开启语音识别`来生成标注文件。如果已事先准备了标注文件,可直接在`标注文件路径`里输入对应的`.list`文件路径。 +3. 如果需要对标注文件进行修改,可以点击`3.开启音频标注WebUI`,但这一步是非必须的。 +4. 点击`4.开启训练集格式化一键三连`,这一步是为了将数据集整理为特定格式,后续可在`logs`目录里查看。 +5. 根据自己显卡的情况调整batch_size,设置合适的训练轮数,然后点击`5.开启SoVITS训练`,可在终端查看训练进度。 +6. 同样需要调整batch_size,设置合适的训练轮数,然后点击`6.开启GPT训练`,可在终端查看训练进度。如果显存大于12G,且数据集质量好,建议开启DPO。 + +## 推理步骤 +0. 默认是`微调训练`页面,所以需要先切换到`推理`页面。 +1. 在列表里选择之前训练好的GPT模型和SoVITS模型。 +2. 点击`开启TTS推理WebUI`。 + +## 训练新的模型 +1. 切换到`其他工具`页面。 +2. 点击`清空输出目录`。 +3. 切换回`微调训练`页面,按照微调训练步骤从头开始。**记得换一个模型名。**""") + + with gr.Row(): + with gr.Row(equal_height=True): + exp_name = gr.Textbox( + label=i18n("*实验/模型名"), + value="xxx", + interactive=True, + scale=3, + ) + gpu_info_box = gr.Textbox( + label=i18n("显卡信息"), + value=gpu_info, + visible=True, + interactive=False, + scale=5, + ) + version_checkbox = gr.Radio( + label=i18n("训练模型的版本"), + value=version, + choices=["v1", "v2", "v4", "v2Pro", "v2ProPlus"], + scale=5, + ) + with gr.Row(visible=False): + with gr.Row(equal_height=True): + pretrained_s1 = gr.Textbox( + label=i18n("预训练GPT模型路径"), + value=pretrained_gpt_name[version], + interactive=True, + lines=1, + max_lines=1, + scale=3, + ) + pretrained_s2G = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[version], + interactive=True, + lines=1, + max_lines=1, + scale=5, + ) + pretrained_s2D = gr.Textbox( + label=i18n("预训练SoVITS-D模型路径"), + value=pretrained_sovits_name[version].replace("s2G", "s2D"), + interactive=True, + lines=1, + max_lines=1, + scale=5, + ) + + with gr.Tabs(): + with gr.TabItem(i18n("微调训练")): + # 语音切分工具 + with gr.Row(): + slice_inp_path = gr.Files(label=i18n("选择一个或多个音频文件"), file_types=["audio"]) + slice_opt_root = gr.Textbox( + label=i18n("切分后的子音频的输出根目录"), value="output/slicer_opt", visible=False + ) + with gr.Row(visible=False): + threshold = gr.Textbox( + label=i18n("threshold:音量小于这个值视作静音的备选切割点"), value="-34" + ) + min_length = gr.Textbox( + label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"), + value="4000", + ) + min_interval = gr.Textbox(label=i18n("min_interval:最短切割间隔"), value="300") + hop_size = gr.Textbox( + label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"), + value="10", + ) + max_sil_kept = gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"), value="500") + with gr.Row(visible=False): + _max = gr.Slider( + minimum=0, + maximum=1, + step=0.05, + label=i18n("max:归一化后最大值多少"), + value=0.9, + interactive=True, + ) + alpha = gr.Slider( + minimum=0, + maximum=1, + step=0.05, + label=i18n("alpha_mix:混多少比例归一化后音频进来"), + value=0.25, + interactive=True, + ) + with gr.Row(visible=False): + n_process = gr.Slider( + minimum=1, + maximum=n_cpu, + step=1, + label=i18n("切割使用的进程数"), + value=4, + interactive=True, + ) + with gr.Row(): + slicer_info = gr.Textbox(label=process_info(process_name_slice, "info")) + open_slicer_button = gr.Button( + value="1."+process_info(process_name_slice, "open"), variant="primary", visible=True + ) + close_slicer_button = gr.Button( + value="1."+process_info(process_name_slice, "close"), variant="primary", visible=False + ) + + # 语音降噪工具 + # gr.Markdown(value="0bb-" + i18n("语音降噪工具")+i18n("(不稳定,先别用,可能劣化模型效果!)")) + with gr.Row(visible=False): + with gr.Column(scale=3): + with gr.Row(): + denoise_input_dir = gr.Textbox(label=i18n("输入文件夹路径"), value="") + denoise_output_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/denoise_opt") + with gr.Row(): + denoise_info = gr.Textbox(label=process_info(process_name_denoise, "info")) + open_denoise_button = gr.Button( + value=process_info(process_name_denoise, "open"), variant="primary", visible=True + ) + close_denoise_button = gr.Button( + value=process_info(process_name_denoise, "close"), variant="primary", visible=False + ) + + # 语音识别工具 + with gr.Row(): + asr_inp_dir = gr.Textbox( + label=i18n("语音切分文件夹路径"), value="output/slicer_opt", interactive=True + ) + asr_opt_dir = gr.Textbox( + label=i18n("输出文件夹路径"), value="output/asr_opt", interactive=True, visible=False + ) + if_auto_asr = gr.Checkbox( + label=i18n("自动开启语音识别"), + value=False, + interactive=True, + show_label=True, + ) + with gr.Row(): + asr_model = gr.Dropdown( + label=i18n("ASR 模型"), + choices=list(asr_dict.keys()), + interactive=True, + value="达摩 ASR (中文)", + ) + asr_size = gr.Dropdown( + label=i18n("ASR 模型尺寸"), choices=["large"], interactive=True, value="large" + ) + asr_lang = gr.Dropdown( + label=i18n("ASR 语言设置"), choices=["zh", "yue"], interactive=True, value="zh" + ) + asr_precision = gr.Dropdown( + label=i18n("数据类型精度"), choices=["float32"], interactive=True, value="float32" + ) + with gr.Row(): + asr_info = gr.Textbox(label=process_info(process_name_asr, "info")) + open_asr_button = gr.Button( + value="2."+process_info(process_name_asr, "open"), variant="primary", visible=True + ) + close_asr_button = gr.Button( + value="2."+process_info(process_name_asr, "close"), variant="primary", visible=False + ) + + def change_lang_choices(key): # 根据选择的模型修改可选的语言 + return {"__type__": "update", "choices": asr_dict[key]["lang"], "value": asr_dict[key]["lang"][0]} + + def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 + return {"__type__": "update", "choices": asr_dict[key]["size"], "value": asr_dict[key]["size"][-1]} + + def change_precision_choices(key): # 根据选择的模型修改可选的语言 + if key == "Faster Whisper (多语种)": + if default_batch_size <= 4: + precision = "int8" + elif is_half: + precision = "float16" + else: + precision = "float32" + else: + precision = "float32" + return {"__type__": "update", "choices": asr_dict[key]["precision"], "value": precision} + + asr_model.change(change_lang_choices, [asr_model], [asr_lang]) + asr_model.change(change_size_choices, [asr_model], [asr_size]) + asr_model.change(change_precision_choices, [asr_model], [asr_precision]) + + # 语音文本校对标注工具 + path_list = gr.Textbox( + label=i18n("标注文件路径 (含文件后缀 *.list)"), + value="output/asr_opt/slicer_opt.list", + interactive=True, + ) + with gr.Row(): + label_info = gr.Textbox(label=process_info(process_name_subfix, "info")) + open_label = gr.Button( + value="3."+process_info(process_name_subfix, "open"), variant="primary", visible=True + ) + close_label = gr.Button( + value="3."+process_info(process_name_subfix, "close"), variant="primary", visible=False + ) + open_label.click(change_label, [path_list], [label_info, open_label, close_label]) + close_label.click(change_label, [path_list], [label_info, open_label, close_label]) + + with gr.Accordion(label=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹"), visible=False): + with gr.Row(): + with gr.Row(): + inp_text = gr.Textbox( + label=i18n("*文本标注文件"), + value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "asr_opt", "slicer_opt.list"), + interactive=True, + scale=10, + ) + with gr.Row(): + inp_wav_dir = gr.Textbox( + label=i18n("*训练集音频文件目录"), + value=os.path.join(os.path.dirname(os.path.abspath(__file__)), "output", "slicer_opt"), + interactive=True, + placeholder=i18n( + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。" + ), + scale=10, + ) + + def conditional_open_asr(auto_asr, *args): + global auto_asr_stop_flag + if auto_asr and not auto_asr_stop_flag: + yield from open_asr(*args) + else: + yield ["", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, gr.update(), gr.update(), gr.update()] + open_slicer_button.click( + open_slice, + [ + slice_inp_path, + slice_opt_root, + threshold, + min_length, + min_interval, + hop_size, + max_sil_kept, + _max, + alpha, + n_process, + ], + [slicer_info, open_slicer_button, close_slicer_button, asr_inp_dir, denoise_input_dir, inp_wav_dir], + ).then( + conditional_open_asr, + [if_auto_asr, asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir] + ) + close_slicer_button.click(close_slice, [], [slicer_info, open_slicer_button, close_slicer_button]) + + open_denoise_button.click( + open_denoise, + [denoise_input_dir, denoise_output_dir], + [denoise_info, open_denoise_button, close_denoise_button, asr_inp_dir, inp_wav_dir], + ) + close_denoise_button.click(close_denoise, [], [denoise_info, open_denoise_button, close_denoise_button]) + + open_asr_button.click( + open_asr, + [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], + ) + close_asr_button.click(close_asr, [], [asr_info, open_asr_button, close_asr_button]) + + # 训练集格式化工具 + with gr.Accordion(label="1Aa-" + process_name_1a, visible=False): + with gr.Row(): + with gr.Row(): + gpu_numbers1a = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + bert_pretrained_dir = gr.Textbox( + label=i18n("预训练中文BERT模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + interactive=False, + lines=2, + ) + with gr.Row(): + button1a_open = gr.Button( + value=process_info(process_name_1a, "open"), variant="primary", visible=True + ) + button1a_close = gr.Button( + value=process_info(process_name_1a, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1a = gr.Textbox(label=process_info(process_name_1a, "info")) + + with gr.Accordion(label="1Ab-" + process_name_1b, visible=False): + with gr.Row(): + with gr.Row(): + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + cnhubert_base_dir = gr.Textbox( + label=i18n("预训练SSL模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-hubert-base", + interactive=False, + lines=2, + ) + with gr.Row(): + button1b_open = gr.Button( + value=process_info(process_name_1b, "open"), variant="primary", visible=True + ) + button1b_close = gr.Button( + value=process_info(process_name_1b, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1b = gr.Textbox(label=process_info(process_name_1b, "info")) + + with gr.Accordion(label="1Ac-" + process_name_1c, visible=False): + with gr.Row(): + with gr.Row(): + gpu_numbers1c = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s-%s" % (gpus, gpus), + interactive=True, + ) + with gr.Row(): + pretrained_s2G_ = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[version], + interactive=False, + lines=2, + ) + with gr.Row(): + button1c_open = gr.Button( + value=process_info(process_name_1c, "open"), variant="primary", visible=True + ) + button1c_close = gr.Button( + value=process_info(process_name_1c, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1c = gr.Textbox(label=process_info(process_name_1c, "info")) + with gr.Row(): + if_auto_sovits = gr.Checkbox( + label=i18n("自动开启SoVITS训练"), + value=False, + interactive=True, + show_label=True, + ) + if_auto_gpt = gr.Checkbox( + label=i18n("自动开启GPT训练"), + value=False, + interactive=True, + show_label=True, + ) + with gr.Row(): + info1abc = gr.Textbox(label=process_info(process_name_1abc, "info")) + button1abc_open = gr.Button( + value="4."+process_info(process_name_1abc, "open"), variant="primary", visible=True + ) + button1abc_close = gr.Button( + value="4."+process_info(process_name_1abc, "close"), variant="primary", visible=False + ) + + pretrained_s2G.change(sync, [pretrained_s2G], [pretrained_s2G_]) + + button1a_open.click( + open1a, + [inp_text, inp_wav_dir, exp_name, gpu_numbers1a, bert_pretrained_dir], + [info1a, button1a_open, button1a_close], + ) + button1a_close.click(close1a, [], [info1a, button1a_open, button1a_close]) + button1b_open.click( + open1b, + [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1Ba, cnhubert_base_dir], + [info1b, button1b_open, button1b_close], + ) + button1b_close.click(close1b, [], [info1b, button1b_open, button1b_close]) + button1c_open.click( + open1c, + [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1c, pretrained_s2G], + [info1c, button1c_open, button1c_close], + ) + button1c_close.click(close1c, [], [info1c, button1c_open, button1c_close]) + + def conditional_open1Ba(auto_sovits, *args): + global auto_sovits_stop_flag + if auto_sovits and not auto_sovits_stop_flag: + yield from open1Ba(*args) + else: + yield ["", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, gr.update(), gr.update()] + def conditional_open1Bb(auto_gpt, *args): + global auto_gpt_stop_flag + if auto_gpt and not auto_gpt_stop_flag: + yield from open1Bb(*args) + else: + yield ["", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, gr.update(), gr.update()] + + # SoVITS训练 + with gr.Column(): + with gr.Row(): + batch_size = gr.Slider( + minimum=1, + maximum=default_max_batch_size, + step=1, + label=i18n("每张显卡的batch_size"), + value=3, + interactive=True, + ) + total_epoch = gr.Slider( + minimum=1, + maximum=100, + step=1, + label=i18n("总训练轮数total_epoch,不建议太高"), + value=20, + interactive=True, + ) + save_every_epoch = gr.Slider( + minimum=1, + maximum=100, + step=1, + label=i18n("保存频率save_every_epoch"), + value=5, + interactive=True, + ) + with gr.Row(): + text_low_lr_rate = gr.Slider( + minimum=0.2, + maximum=0.6, + step=0.05, + label=i18n("文本模块学习率权重"), + value=0.4, + visible=True if version not in v3v4set else False, + ) # v3v4 not need + lora_rank = gr.Radio( + label=i18n("LoRA秩"), + value="32", + choices=["16", "32", "64", "128"], + visible=True if version in v3v4set else False, + ) # v1v2 not need + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s" % (gpus), + interactive=True, + ) + with gr.Column(visible=False): + if_save_latest = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) + if_grad_ckpt = gr.Checkbox( + label="v3是否开启梯度检查点节省显存占用", + value=False, + interactive=True if version in v3v4set else False, + show_label=True, + visible=False, + ) # 只有V3s2可以用 + with gr.Row(): + info1Ba = gr.Textbox(label=process_info(process_name_sovits, "info")) + button1Ba_open = gr.Button( + value="5."+process_info(process_name_sovits, "open"), variant="primary", visible=True + ) + button1Ba_close = gr.Button( + value="5."+process_info(process_name_sovits, "close"), variant="primary", visible=False + ) + + # GPT训练 + with gr.Column(): + with gr.Row(): + batch_size1Bb = gr.Slider( + minimum=1, + maximum=40, + step=1, + label=i18n("每张显卡的batch_size"), + value=3, + interactive=True, + ) + total_epoch1Bb = gr.Slider( + minimum=2, + maximum=100, + step=1, + label=i18n("总训练轮数total_epoch"), + value=15, + interactive=True, + ) + save_every_epoch1Bb = gr.Slider( + minimum=1, + maximum=100, + step=1, + label=i18n("保存频率save_every_epoch"), + value=5, + interactive=True, + ) + with gr.Row(): + if_dpo = gr.Checkbox( + label=i18n("是否开启DPO训练选项(实验性)"), + value=False, + interactive=True, + show_label=True, + ) + gpu_numbers1Bb = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value="%s" % (gpus), + interactive=True, + ) + with gr.Column(visible=False): + with gr.Column(): + if_save_latest1Bb = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights1Bb = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) + with gr.Row(): + info1Bb = gr.Textbox(label=process_info(process_name_gpt, "info")) + button1Bb_open = gr.Button( + value="6."+process_info(process_name_gpt, "open"), variant="primary", visible=True + ) + button1Bb_close = gr.Button( + value="6."+process_info(process_name_gpt, "close"), variant="primary", visible=False + ) + + with gr.TabItem(i18n("推理")): + with gr.Row(): + GPT_dropdown = gr.Dropdown( + label=i18n("GPT模型列表"), + choices=GPT_names, + value=GPT_names[-1], + interactive=True, + ) + SoVITS_dropdown = gr.Dropdown( + label=i18n("SoVITS模型列表"), + choices=SoVITS_names, + value=SoVITS_names[0], + interactive=True, + ) + refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") + refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) + with gr.Row(): + gpu_number_1C = gr.Textbox( + label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True + ) + batched_infer_enabled = gr.Checkbox( + label=i18n("启用并行推理版本"), value=False, interactive=True, show_label=True + ) + with gr.Row(): + tts_info = gr.Textbox(label=process_info(process_name_tts, "info"), scale=2) + open_tts = gr.Button( + value=process_info(process_name_tts, "open"), variant="primary", visible=True + ) + close_tts = gr.Button( + value=process_info(process_name_tts, "close"), variant="primary", visible=False + ) + open_tts.click( + change_tts_inference, + [ + bert_pretrained_dir, + cnhubert_base_dir, + gpu_number_1C, + GPT_dropdown, + SoVITS_dropdown, + batched_infer_enabled, + ], + [tts_info, open_tts, close_tts], + ) + close_tts.click( + change_tts_inference, + [ + bert_pretrained_dir, + cnhubert_base_dir, + gpu_number_1C, + GPT_dropdown, + SoVITS_dropdown, + batched_infer_enabled, + ], + [tts_info, open_tts, close_tts], + ) + + button1Ba_open.click( + open1Ba, + [ + version_checkbox, + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, + ], + [info1Ba, button1Ba_open, button1Ba_close, SoVITS_dropdown, GPT_dropdown], + ).then( + conditional_open1Bb, + [ + if_auto_gpt, + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], + ) + button1Bb_open.click( + open1Bb, + [ + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], + ) + version_checkbox.change( + switch_version, + [version_checkbox], + [ + pretrained_s2G, + pretrained_s2D, + pretrained_s1, + GPT_dropdown, + SoVITS_dropdown, + batch_size, + total_epoch, + save_every_epoch, + text_low_lr_rate, + if_grad_ckpt, + batched_infer_enabled, + lora_rank, + ], + ) + + button1abc_open.click( + open1abc, + [ + version_checkbox, + inp_text, + inp_wav_dir, + exp_name, + gpu_numbers1a, + gpu_numbers1Ba, + gpu_numbers1c, + bert_pretrained_dir, + cnhubert_base_dir, + pretrained_s2G, + ], + [info1abc, button1abc_open, button1abc_close], + ).then( + conditional_open1Ba, + [ + if_auto_sovits, + version_checkbox, + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, + ], + [info1Ba, button1Ba_open, button1Ba_close,SoVITS_dropdown,GPT_dropdown], + ).then( + conditional_open1Bb, + [ + if_auto_gpt, + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], + ) + + button1abc_close.click(close1abc, [], [info1abc, button1abc_open, button1abc_close]) + button1Ba_close.click(close1Ba, [], [info1Ba, button1Ba_open, button1Ba_close]) + button1Bb_close.click(close1Bb, [], [info1Bb, button1Bb_open, button1Bb_close]) + + with gr.TabItem(i18n("其他工具")): + # UVR5人声伴奏分离&去混响去延迟工具 + with gr.Row(): + uvr5_info = gr.Textbox(label=process_info(process_name_uvr5, "info")) + open_uvr5 = gr.Button( + value=process_info(process_name_uvr5, "open"), variant="primary", visible=True + ) + close_uvr5 = gr.Button( + value=process_info(process_name_uvr5, "close"), variant="primary", visible=False + ) + open_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) + close_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) + + # 清空输出目录 + with gr.Row(): + clear_output_info = gr.Textbox(label=i18n("清空输出目录状态"), value="", interactive=False) + clear_output_button = gr.Button(value=i18n("清空输出目录"), variant="stop") + def clear_output_directory(): + output_dir = "./output" + if os.path.exists(output_dir): + try: + shutil.rmtree(output_dir) # 删除整个目录 + return i18n("输出目录已成功清空") + except Exception as e: + return f"{i18n('清空输出目录时出错')}: {str(e)}" + else: + return i18n("输出目录不存在,无需清空") + clear_output_button.click(fn=clear_output_directory, inputs=[], outputs=[clear_output_info]) + + app.queue().launch( # concurrency_count=511, max_size=1022 + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=webui_port_main, + # quiet=True, + )