diff --git a/GPT_SoVITS/AR/data/dataset.py b/GPT_SoVITS/AR/data/dataset.py index 402483d9..57450b2e 100644 --- a/GPT_SoVITS/AR/data/dataset.py +++ b/GPT_SoVITS/AR/data/dataset.py @@ -61,11 +61,7 @@ class Text2SemanticDataset(Dataset): ) # get dict self.path2 = phoneme_path # "%s/2-name2text.txt"%exp_dir#phoneme_path - self.path3 = "%s/3-bert" % ( - os.path.dirname( - phoneme_path, - ) - ) # "%s/3-bert"%exp_dir#bert_dir + self.path3 = f"{os.path.dirname(phoneme_path)}/3-bert" # "%s/3-bert"%exp_dir#bert_dir self.path6 = semantic_path # "%s/6-name2semantic.tsv"%exp_dir#semantic_path assert os.path.exists(self.path2) assert os.path.exists(self.path6) @@ -219,7 +215,7 @@ class Text2SemanticDataset(Dataset): semantic_ids_len = len(semantic_ids) flag = 0 - path_bert = "%s/%s.pt" % (self.path3, item_name) + path_bert = f"{self.path3}/{item_name}.pt" if os.path.exists(path_bert) == True: bert_feature = torch.load(path_bert, map_location="cpu") else: diff --git a/GPT_SoVITS/AR/utils/io.py b/GPT_SoVITS/AR/utils/io.py index a6475cb6..2721ff90 100644 --- a/GPT_SoVITS/AR/utils/io.py +++ b/GPT_SoVITS/AR/utils/io.py @@ -26,5 +26,5 @@ def write_args(args, path): args_file.write(str(sys.argv)) args_file.write("\n==> args:\n") for k, v in sorted(args_dict.items()): - args_file.write(" %s: %s\n" % (str(k), str(v))) + args_file.write(f" {str(k)}: {str(v)}\n") args_file.close() diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index 0c1d2484..1233b9c2 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -41,7 +41,7 @@ resample_transform_dict = {} def resample(audio_tensor, sr0, sr1, device): global resample_transform_dict - key = "%s-%s-%s" % (sr0, sr1, str(device)) + key = f"{sr0}-{sr1}-{str(device)}" if key not in resample_transform_dict: resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device) return resample_transform_dict[key](audio_tensor) @@ -489,7 +489,7 @@ class TTS: path_sovits = self.configs.default_configs[model_version]["vits_weights_path"] if if_lora_v3 == True and os.path.exists(path_sovits) == False: - info = path_sovits + i18n("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version) + info = path_sovits + i18n(f"SoVITS {model_version} 底模缺失,无法加载相应 LoRA 权重") raise FileExistsError(info) # dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False) @@ -608,7 +608,7 @@ class TTS: self.empty_cache() self.vocoder = BigVGAN.from_pretrained( - "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x", use_cuda_kernel=False, ) # if True, RuntimeError: Ninja is required to load C++ extensions # remove weight norm in the model and set to eval mode @@ -641,7 +641,7 @@ class TTS: ) self.vocoder.remove_weight_norm() state_dict_g = torch.load( - "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu", weights_only=False, ) diff --git a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py index fda70a49..5e480bf9 100644 --- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py +++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py @@ -143,7 +143,7 @@ def cut2(inp): @register_method("cut3") def cut3(inp): inp = inp.strip("\n") - opts = ["%s" % item for item in inp.strip("。").split("。")] + opts = [f"{item}" for item in inp.strip("。").split("。")] opts = [item for item in opts if not set(item).issubset(punctuation)] return "\n".join(opts) diff --git a/GPT_SoVITS/eres2net/kaldi.py b/GPT_SoVITS/eres2net/kaldi.py index a80e5e6b..2313444c 100644 --- a/GPT_SoVITS/eres2net/kaldi.py +++ b/GPT_SoVITS/eres2net/kaldi.py @@ -625,18 +625,7 @@ def fbank( # size (num_mel_bins, padded_window_size // 2) # print(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp) - cache_key = "%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % ( - num_mel_bins, - padded_window_size, - sample_frequency, - low_freq, - high_freq, - vtln_low, - vtln_high, - vtln_warp, - device, - dtype, - ) + cache_key = f"{num_mel_bins}-{padded_window_size}-{sample_frequency}-{low_freq}-{high_freq}-{vtln_low}-{vtln_high}-{vtln_warp}-{device}-{dtype}" if cache_key not in cache: mel_energies = get_mel_banks( num_mel_bins, diff --git a/GPT_SoVITS/export_torch_script_v3v4.py b/GPT_SoVITS/export_torch_script_v3v4.py index b0e4dba5..f53ae144 100644 --- a/GPT_SoVITS/export_torch_script_v3v4.py +++ b/GPT_SoVITS/export_torch_script_v3v4.py @@ -505,7 +505,7 @@ def init_bigvgan(): from BigVGAN import bigvgan bigvgan_model = bigvgan.BigVGAN.from_pretrained( - "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x", use_cuda_kernel=False, ) # if True, RuntimeError: Ninja is required to load C++ extensions # remove weight norm in the model and set to eval mode @@ -533,7 +533,7 @@ def init_hifigan(): hifigan_model.eval() hifigan_model.remove_weight_norm() state_dict_g = torch.load( - "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu" + f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu" ) print("loading vocoder", hifigan_model.load_state_dict(state_dict_g)) if is_half == True: @@ -1042,7 +1042,7 @@ def test_export( wav_gen = wav_gen[:, :, :wav_gen_length] audio = wav_gen[0][0].cpu().detach().numpy() - logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") sr = 24000 soundfile.write(output, (audio * 32768).astype(np.int16), sr) @@ -1115,7 +1115,7 @@ def test_export( wav_gen = torch.cat([wav_gen, zero_wav_torch], 0) audio = wav_gen.cpu().detach().numpy() - logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") soundfile.write(output, (audio * 32768).astype(np.int16), out_sr) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index a361ed58..42d03020 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -180,6 +180,8 @@ def get_bert_feature(text, word2ph): for i in range(len(word2ph)): repeat_feature = res[i].repeat(word2ph[i], 1) phone_level_feature.append(repeat_feature) + if len(phone_level_feature) == 0: + return torch.empty((res.shape[1], 0), dtype=res.dtype, device=res.device) phone_level_feature = torch.cat(phone_level_feature, dim=0) return phone_level_feature.T @@ -235,7 +237,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None) is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4 path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4 if if_lora_v3 == True and is_exist == False: - info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重") + info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失,无法加载相应 LoRA 权重") gr.Warning(info) raise FileExistsError(info) dict_language = dict_language_v1 if version == "v1" else dict_language_v2 @@ -320,7 +322,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None) vq_model = vq_model.to(device) vq_model.eval() if if_lora_v3 == False: - print("loading sovits_%s" % model_version, vq_model.load_state_dict(dict_s2["weight"], strict=False)) + print(f"loading sovits_{model_version}", vq_model.load_state_dict(dict_s2["weight"], strict=False)) else: path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4 print( @@ -335,7 +337,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None) init_lora_weights=True, ) vq_model.cfm = get_peft_model(vq_model.cfm, lora_config) - print("loading sovits_%s_lora%s" % (model_version, lora_rank)) + print(f"loading sovits_{model_version}_lora{lora_rank}") vq_model.load_state_dict(dict_s2["weight"], strict=False) vq_model.cfm = vq_model.cfm.merge_and_unload() # torch.save(vq_model.state_dict(),"merge_win.pth") @@ -442,7 +444,7 @@ def init_bigvgan(): from BigVGAN import bigvgan bigvgan_model = bigvgan.BigVGAN.from_pretrained( - "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x", use_cuda_kernel=False, ) # if True, RuntimeError: Ninja is required to load C++ extensions # remove weight norm in the model and set to eval mode @@ -472,7 +474,7 @@ def init_hifigan(): hifigan_model.eval() hifigan_model.remove_weight_norm() state_dict_g = torch.load( - "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu", weights_only=False, ) @@ -508,7 +510,7 @@ resample_transform_dict = {} def resample(audio_tensor, sr0, sr1, device): global resample_transform_dict - key = "%s-%s-%s" % (sr0, sr1, str(device)) + key = f"{sr0}-{sr1}-{str(device)}" if key not in resample_transform_dict: resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device) return resample_transform_dict[key](audio_tensor) @@ -1062,7 +1064,7 @@ def cut2(inp): def cut3(inp): inp = inp.strip("\n") - opts = ["%s" % item for item in inp.strip("。").split("。")] + opts = [f"{item}" for item in inp.strip("。").split("。")] opts = [item for item in opts if not set(item).issubset(punctuation)] return "\n".join(opts) diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py index 51a120f1..605bbeed 100644 --- a/GPT_SoVITS/inference_webui_fast.py +++ b/GPT_SoVITS/inference_webui_fast.py @@ -31,7 +31,7 @@ import torch now_dir = os.getcwd() sys.path.append(now_dir) -sys.path.append("%s/GPT_SoVITS" % (now_dir)) +sys.path.append(f"{now_dir}/GPT_SoVITS") logging.getLogger("markdown_it").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) @@ -239,7 +239,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None) is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4 path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4 if if_lora_v3 == True and is_exist == False: - info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重") + info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失,无法加载相应 LoRA 权重") gr.Warning(info) raise FileExistsError(info) dict_language = dict_language_v1 if version == "v1" else dict_language_v2 diff --git a/GPT_SoVITS/module/data_utils.py b/GPT_SoVITS/module/data_utils.py index 46eff5fb..ec437580 100644 --- a/GPT_SoVITS/module/data_utils.py +++ b/GPT_SoVITS/module/data_utils.py @@ -23,15 +23,15 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset): def __init__(self, hparams, version=None, val=False): exp_dir = hparams.exp_dir - self.path2 = "%s/2-name2text.txt" % exp_dir - self.path4 = "%s/4-cnhubert" % exp_dir - self.path5 = "%s/5-wav32k" % exp_dir + self.path2 = f"{exp_dir}/2-name2text.txt" + self.path4 = f"{exp_dir}/4-cnhubert" + self.path5 = f"{exp_dir}/5-wav32k" assert os.path.exists(self.path2) assert os.path.exists(self.path4) assert os.path.exists(self.path5) self.is_v2Pro = version in {"v2Pro", "v2ProPlus"} if self.is_v2Pro: - self.path7 = "%s/7-sv_cn" % exp_dir + self.path7 = f"{exp_dir}/7-sv_cn" assert os.path.exists(self.path7) names4 = set([name[:-3] for name in list(os.listdir(self.path4))]) # 去除.pt后缀 names5 = set(os.listdir(self.path5)) @@ -85,7 +85,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset): skipped_phone += 1 continue - size = os.path.getsize("%s/%s" % (self.path5, audiopath)) + size = os.path.getsize(f"{self.path5}/{audiopath}") duration = size / self.sampling_rate / 2 if duration == 0: @@ -110,9 +110,9 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset): audiopath, phoneme_ids = audiopath_sid_text text = torch.FloatTensor(phoneme_ids) try: - spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath)) + spec, wav = self.get_audio(f"{self.path5}/{audiopath}") with torch.no_grad(): - ssl = torch.load("%s/%s.pt" % (self.path4, audiopath), map_location="cpu") + ssl = torch.load(f"{self.path4}/{audiopath}.pt", map_location="cpu") if ssl.shape[-1] != spec.shape[-1]: typee = ssl.dtype ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee) diff --git a/GPT_SoVITS/module/mel_processing.py b/GPT_SoVITS/module/mel_processing.py index 62c7b40e..334b5cf3 100644 --- a/GPT_SoVITS/module/mel_processing.py +++ b/GPT_SoVITS/module/mel_processing.py @@ -46,7 +46,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False) global hann_window dtype_device = str(y.dtype) + "_" + str(y.device) # wnsize_dtype_device = str(win_size) + '_' + dtype_device - key = "%s-%s-%s-%s-%s" % (dtype_device, n_fft, sampling_rate, hop_size, win_size) + key = f"{dtype_device}-{n_fft}-{sampling_rate}-{hop_size}-{win_size}" # if wnsize_dtype_device not in hann_window: if key not in hann_window: # hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) @@ -78,7 +78,7 @@ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): global mel_basis dtype_device = str(spec.dtype) + "_" + str(spec.device) # fmax_dtype_device = str(fmax) + '_' + dtype_device - key = "%s-%s-%s-%s-%s-%s" % (dtype_device, n_fft, num_mels, sampling_rate, fmin, fmax) + key = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{fmin}-{fmax}" # if fmax_dtype_device not in mel_basis: if key not in mel_basis: mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) @@ -99,16 +99,7 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, global mel_basis, hann_window dtype_device = str(y.dtype) + "_" + str(y.device) # fmax_dtype_device = str(fmax) + '_' + dtype_device - fmax_dtype_device = "%s-%s-%s-%s-%s-%s-%s-%s" % ( - dtype_device, - n_fft, - num_mels, - sampling_rate, - hop_size, - win_size, - fmin, - fmax, - ) + fmax_dtype_device = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{hop_size}-{win_size}-{fmin}-{fmax}" # wnsize_dtype_device = str(win_size) + '_' + dtype_device wnsize_dtype_device = fmax_dtype_device if fmax_dtype_device not in mel_basis: diff --git a/GPT_SoVITS/process_ckpt.py b/GPT_SoVITS/process_ckpt.py index 20db9b19..ab00d271 100644 --- a/GPT_SoVITS/process_ckpt.py +++ b/GPT_SoVITS/process_ckpt.py @@ -12,9 +12,9 @@ i18n = I18nAuto() def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path dir = os.path.dirname(path) name = os.path.basename(path) - tmp_path = "%s.pth" % (ttime()) + tmp_path = f"{ttime()}.pth" torch.save(fea, tmp_path) - shutil.move(tmp_path, "%s/%s" % (dir, name)) + shutil.move(tmp_path, f"{dir}/{name}") from io import BytesIO @@ -47,14 +47,14 @@ def savee(ckpt, name, epoch, steps, hps, model_version=None, lora_rank=None): continue opt["weight"][key] = ckpt[key].half() opt["config"] = hps - opt["info"] = "%sepoch_%siteration" % (epoch, steps) + opt["info"] = f"{epoch}epoch_{steps}iteration" if lora_rank: opt["lora_rank"] = lora_rank - my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version) + my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version) elif model_version != None and "Pro" in model_version: - my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version) + my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version) else: - my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name)) + my_save(opt, f"{hps.save_weight_dir}/{name}.pth") return "Success." except: return traceback.format_exc() diff --git a/GPT_SoVITS/utils.py b/GPT_SoVITS/utils.py index 08e18384..b61ac19d 100644 --- a/GPT_SoVITS/utils.py +++ b/GPT_SoVITS/utils.py @@ -44,7 +44,7 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False ) except: traceback.print_exc() - print("error, %s is not in the checkpoint" % k) # shape不对也会,比如text_embedding当cleaner修改时 + print(f"error, {k} is not in the checkpoint") # shape不对也会,比如text_embedding当cleaner修改时 new_state_dict[k] = v if hasattr(model, "module"): model.module.load_state_dict(new_state_dict) @@ -67,9 +67,9 @@ from time import time as ttime def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path dir = os.path.dirname(path) name = os.path.basename(path) - tmp_path = "%s.pth" % (ttime()) + tmp_path = f"{ttime()}.pth" torch.save(fea, tmp_path) - shutil.move(tmp_path, "%s/%s" % (dir, name)) + shutil.move(tmp_path, f"{dir}/{name}") def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path): diff --git a/api.py b/api.py index cc0896a2..1df6b297 100644 --- a/api.py +++ b/api.py @@ -147,7 +147,7 @@ import sys now_dir = os.getcwd() sys.path.append(now_dir) -sys.path.append("%s/GPT_SoVITS" % (now_dir)) +sys.path.append(f"{now_dir}/GPT_SoVITS") import signal from text.LangSegmenter import LangSegmenter @@ -239,7 +239,7 @@ def init_bigvgan(): from BigVGAN import bigvgan bigvgan_model = bigvgan.BigVGAN.from_pretrained( - "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x", use_cuda_kernel=False, ) # if True, RuntimeError: Ninja is required to load C++ extensions # remove weight norm in the model and set to eval mode @@ -268,7 +268,7 @@ def init_hifigan(): hifigan_model.eval() hifigan_model.remove_weight_norm() state_dict_g = torch.load( - "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), + f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu", weights_only=False, ) @@ -292,7 +292,7 @@ resample_transform_dict = {} def resample(audio_tensor, sr0, sr1, device): global resample_transform_dict - key = "%s-%s-%s" % (sr0, sr1, str(device)) + key = f"{sr0}-{sr1}-{str(device)}" if key not in resample_transform_dict: resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device) return resample_transform_dict[key](audio_tensor) @@ -391,7 +391,7 @@ def get_sovits_weights(sovits_path): path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4 if if_lora_v3 == True and is_exist == False: - logger.info("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version) + logger.info(f"SoVITS {model_version} 底模缺失,无法加载相应 LoRA 权重") dict_s2 = load_sovits_new(sovits_path) hps = dict_s2["config"] diff --git a/api_v2.py b/api_v2.py index 5947df53..4c104901 100644 --- a/api_v2.py +++ b/api_v2.py @@ -105,7 +105,7 @@ from typing import Generator now_dir = os.getcwd() sys.path.append(now_dir) -sys.path.append("%s/GPT_SoVITS" % (now_dir)) +sys.path.append(f"{now_dir}/GPT_SoVITS") import argparse import subprocess diff --git a/config.py b/config.py index fdc11c0a..09ae866d 100644 --- a/config.py +++ b/config.py @@ -93,7 +93,7 @@ def get_weights_names(): continue for name in os.listdir(path): if name.endswith(".pth"): - SoVITS_names.append("%s/%s" % (path, name)) + SoVITS_names.append(f"{path}/{name}") if not SoVITS_names: SoVITS_names = [""] GPT_names = [] @@ -105,7 +105,7 @@ def get_weights_names(): continue for name in os.listdir(path): if name.endswith(".ckpt"): - GPT_names.append("%s/%s" % (path, name)) + GPT_names.append(f"{path}/{name}") SoVITS_names = sorted(SoVITS_names, key=custom_sort_key) GPT_names = sorted(GPT_names, key=custom_sort_key) if not GPT_names: diff --git a/tools/audio_sr.py b/tools/audio_sr.py index 58df6d20..4a49f6b1 100644 --- a/tools/audio_sr.py +++ b/tools/audio_sr.py @@ -16,7 +16,7 @@ from models.model import APNet_BWE_Model class AP_BWE: def __init__(self, device, DictToAttrRecursive, checkpoint_file=None): if checkpoint_file == None: - checkpoint_file = "%s/24kto48k/g_24kto48k.zip" % (AP_BWE_main_dir_path) + checkpoint_file = f"{AP_BWE_main_dir_path}/24kto48k/g_24kto48k.zip" if os.path.exists(checkpoint_file) == False: raise FileNotFoundError config_file = os.path.join(os.path.split(checkpoint_file)[0], "config.json") diff --git a/tools/cmd-denoise.py b/tools/cmd-denoise.py index bbf68476..e9cd5f7e 100644 --- a/tools/cmd-denoise.py +++ b/tools/cmd-denoise.py @@ -17,7 +17,7 @@ def execute_denoise(input_folder, output_folder): # print(list(os.listdir(input_folder).sort())) for name in tqdm(os.listdir(input_folder)): try: - ans("%s/%s" % (input_folder, name), output_path="%s/%s" % (output_folder, name)) + ans(f"{input_folder}/{name}", output_path=f"{output_folder}/{name}") except: traceback.print_exc() diff --git a/tools/my_utils.py b/tools/my_utils.py index 04f1a98a..2717e445 100644 --- a/tools/my_utils.py +++ b/tools/my_utils.py @@ -103,7 +103,7 @@ def check_details(path_list=None, is_train=False, is_dataset_processing=False): wav_name = clean_path(wav_name) if audio_path != "" and audio_path != None: wav_name = os.path.basename(wav_name) - wav_path = "%s/%s" % (audio_path, wav_name) + wav_path = f"{audio_path}/{wav_name}" else: wav_path = wav_name if os.path.exists(wav_path): diff --git a/tools/slice_audio.py b/tools/slice_audio.py index 66fafa93..1e0eb782 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -40,7 +40,7 @@ def slice(inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_ chunk /= tmp_max chunk = (chunk / tmp_max * (_max * alpha)) + (1 - alpha) * chunk wavfile.write( - "%s/%s_%010d_%010d.wav" % (opt_root, name, start, end), + f"{opt_root}/{name}_{start:010d}_{end:010d}.wav", 32000, # chunk.astype(np.float32), (chunk * 32767).astype(np.int16), diff --git a/tools/slicer2.py b/tools/slicer2.py index 8d80f1b4..512ac44a 100644 --- a/tools/slicer2.py +++ b/tools/slicer2.py @@ -219,7 +219,7 @@ def main(): soundfile.write( os.path.join( out, - "%s_%d.wav" % (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i), + f"{os.path.basename(args.audio).rsplit('.', maxsplit=1)[0]}_{i}.wav", ), chunk, sr, diff --git a/tools/uvr5/webui.py b/tools/uvr5/webui.py index f5f8d3f6..dbc1be3f 100644 --- a/tools/uvr5/webui.py +++ b/tools/uvr5/webui.py @@ -92,19 +92,16 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format need_reformat = 1 traceback.print_exc() if need_reformat == 1: - tmp_path = "%s/%s.reformatted.wav" % ( - os.path.join(os.environ["TEMP"]), - os.path.basename(inp_path), - ) + tmp_path = f"{os.path.join(os.environ['TEMP'])}/{os.path.basename(inp_path)}.reformatted.wav" os.system(f'ffmpeg -i "{inp_path}" -vn -acodec pcm_s16le -ac 2 -ar 44100 "{tmp_path}" -y') inp_path = tmp_path try: if done == 0: pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal, format0, is_hp3) - infos.append("%s->Success" % (os.path.basename(inp_path))) + infos.append(f"{os.path.basename(inp_path)}->Success") yield "\n".join(infos) except: - infos.append("%s->%s" % (os.path.basename(inp_path), traceback.format_exc())) + infos.append(f"{os.path.basename(inp_path)}->{traceback.format_exc()}") yield "\n".join(infos) except: infos.append(traceback.format_exc()) diff --git a/webui.py b/webui.py index 9a6aae5f..6711385b 100644 --- a/webui.py +++ b/webui.py @@ -1,1982 +1,2045 @@ -import os -import sys - -os.environ["version"] = version = "v2Pro" -now_dir = os.getcwd() -sys.path.insert(0, now_dir) -import warnings - -warnings.filterwarnings("ignore") -import json -import platform -import shutil -import signal - -import psutil -import torch -import yaml - -os.environ["TORCH_DISTRIBUTED_DEBUG"] = "INFO" -torch.manual_seed(233333) -tmp = os.path.join(now_dir, "TEMP") -os.makedirs(tmp, exist_ok=True) -os.environ["TEMP"] = tmp -if os.path.exists(tmp): - for name in os.listdir(tmp): - if name == "jieba.cache": - continue - path = "%s/%s" % (tmp, name) - delete = os.remove if os.path.isfile(path) else shutil.rmtree - try: - delete(path) - except Exception as e: - print(str(e)) - pass -import site -import traceback - -site_packages_roots = [] -for path in site.getsitepackages(): - if "packages" in path: - site_packages_roots.append(path) -if site_packages_roots == []: - site_packages_roots = ["%s/runtime/Lib/site-packages" % now_dir] -# os.environ["OPENBLAS_NUM_THREADS"] = "4" -os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" -os.environ["all_proxy"] = "" -for site_packages_root in site_packages_roots: - if os.path.exists(site_packages_root): - try: - with open("%s/users.pth" % (site_packages_root), "w") as f: - f.write( - # "%s\n%s/runtime\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" - "%s\n%s/GPT_SoVITS/BigVGAN\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" - % (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir) - ) - break - except PermissionError: - traceback.print_exc() -import shutil -import subprocess -from subprocess import Popen - -from tools.assets import css, js, top_html -from tools.i18n.i18n import I18nAuto, scan_language_list - -language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" -os.environ["language"] = language -i18n = I18nAuto(language=language) -from multiprocessing import cpu_count - -from config import ( - GPU_INDEX, - GPU_INFOS, - IS_GPU, - exp_root, - infer_device, - is_half, - is_share, - memset, - python_exec, - webui_port_infer_tts, - webui_port_main, - webui_port_subfix, - webui_port_uvr5, -) -from tools import my_utils -from tools.my_utils import check_details, check_for_existance - -os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" -os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" - -# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu -import gradio as gr - -n_cpu = cpu_count() - -set_gpu_numbers = GPU_INDEX -gpu_infos = GPU_INFOS -mem = memset -is_gpu_ok = IS_GPU - -v3v4set = {"v3", "v4"} - - -def set_default(): - global \ - default_batch_size, \ - default_max_batch_size, \ - gpu_info, \ - default_sovits_epoch, \ - default_sovits_save_every_epoch, \ - max_sovits_epoch, \ - max_sovits_save_every_epoch, \ - default_batch_size_s1, \ - if_force_ckpt - if_force_ckpt = False - gpu_info = "\n".join(gpu_infos) - if is_gpu_ok: - minmem = min(mem) - default_batch_size = minmem // 2 if version not in v3v4set else minmem // 8 - default_batch_size_s1 = minmem // 2 - else: - default_batch_size = default_batch_size_s1 = int(psutil.virtual_memory().total / 1024 / 1024 / 1024 / 4) - if version not in v3v4set: - default_sovits_epoch = 8 - default_sovits_save_every_epoch = 4 - max_sovits_epoch = 25 # 40 - max_sovits_save_every_epoch = 25 # 10 - else: - default_sovits_epoch = 2 - default_sovits_save_every_epoch = 1 - max_sovits_epoch = 16 # 40 # 3 #训太多=作死 - max_sovits_save_every_epoch = 10 # 10 # 3 - - default_batch_size = max(1, default_batch_size) - default_batch_size_s1 = max(1, default_batch_size_s1) - default_max_batch_size = default_batch_size * 3 - - -set_default() - -gpus = "-".join(map(str, GPU_INDEX)) -default_gpu_numbers = infer_device.index - - -def fix_gpu_number(input): # 将越界的number强制改到界内 - try: - if int(input) not in set_gpu_numbers: - return default_gpu_numbers - except: - return input - return input - - -def fix_gpu_numbers(inputs): - output = [] - try: - for input in inputs.split(","): - output.append(str(fix_gpu_number(input))) - return ",".join(output) - except: - return inputs - - -from config import pretrained_gpt_name, pretrained_sovits_name - - -def check_pretrained_is_exist(version): - pretrained_model_list = ( - pretrained_sovits_name[version], - pretrained_sovits_name[version].replace("s2G", "s2D"), - pretrained_gpt_name[version], - "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", - "GPT_SoVITS/pretrained_models/chinese-hubert-base", - ) - _ = "" - for i in pretrained_model_list: - if "s2Dv3" not in i and "s2Dv4" not in i and os.path.exists(i) == False: - _ += f"\n {i}" - if _: - print("warning: ", i18n("以下模型不存在:") + _) - - -check_pretrained_is_exist(version) -for key in pretrained_sovits_name.keys(): - if os.path.exists(pretrained_sovits_name[key]) == False: - pretrained_sovits_name[key] = "" -for key in pretrained_gpt_name.keys(): - if os.path.exists(pretrained_gpt_name[key]) == False: - pretrained_gpt_name[key] = "" - -from config import ( - GPT_weight_root, - GPT_weight_version2root, - SoVITS_weight_root, - SoVITS_weight_version2root, - change_choices, - get_weights_names, -) - -for root in SoVITS_weight_root + GPT_weight_root: - os.makedirs(root, exist_ok=True) -SoVITS_names, GPT_names = get_weights_names() - -p_label = None -p_uvr5 = None -p_asr = None -p_denoise = None -p_tts_inference = None - - -def kill_proc_tree(pid, including_parent=True): - try: - parent = psutil.Process(pid) - except psutil.NoSuchProcess: - # Process already terminated - return - - children = parent.children(recursive=True) - for child in children: - try: - os.kill(child.pid, signal.SIGTERM) # or signal.SIGKILL - except OSError: - pass - if including_parent: - try: - os.kill(parent.pid, signal.SIGTERM) # or signal.SIGKILL - except OSError: - pass - - -system = platform.system() - - -def kill_process(pid, process_name=""): - if system == "Windows": - cmd = "taskkill /t /f /pid %s" % pid - # os.system(cmd) - subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - else: - kill_proc_tree(pid) - print(process_name + i18n("进程已终止")) - - -def process_info(process_name="", indicator=""): - if indicator == "opened": - return process_name + i18n("已开启") - elif indicator == "open": - return i18n("开启") + process_name - elif indicator == "closed": - return process_name + i18n("已关闭") - elif indicator == "close": - return i18n("关闭") + process_name - elif indicator == "running": - return process_name + i18n("运行中") - elif indicator == "occupy": - return process_name + i18n("占用中") + "," + i18n("需先终止才能开启下一次任务") - elif indicator == "finish": - return process_name + i18n("已完成") - elif indicator == "failed": - return process_name + i18n("失败") - elif indicator == "info": - return process_name + i18n("进程输出信息") - else: - return process_name - - -process_name_subfix = i18n("音频标注WebUI") - - -def change_label(path_list): - global p_label - if p_label is None: - check_for_existance([path_list]) - path_list = my_utils.clean_path(path_list) - cmd = '"%s" -s tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % ( - python_exec, - path_list, - webui_port_subfix, - is_share, - ) - yield ( - process_info(process_name_subfix, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - print(cmd) - p_label = Popen(cmd, shell=True) - else: - kill_process(p_label.pid, process_name_subfix) - p_label = None - yield ( - process_info(process_name_subfix, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -process_name_uvr5 = i18n("人声分离WebUI") - - -def change_uvr5(): - global p_uvr5 - if p_uvr5 is None: - cmd = '"%s" -s tools/uvr5/webui.py "%s" %s %s %s' % ( - python_exec, - infer_device, - is_half, - webui_port_uvr5, - is_share, - ) - yield ( - process_info(process_name_uvr5, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - print(cmd) - p_uvr5 = Popen(cmd, shell=True) - else: - kill_process(p_uvr5.pid, process_name_uvr5) - p_uvr5 = None - yield ( - process_info(process_name_uvr5, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -process_name_tts = i18n("TTS推理WebUI") - - -def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, sovits_path, batched_infer_enabled): - global p_tts_inference - if batched_infer_enabled: - cmd = '"%s" -s GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language) - else: - cmd = '"%s" -s GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language) - # #####v3暂不支持加速推理 - # if version=="v3": - # cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) - if p_tts_inference is None: - os.environ["gpt_path"] = gpt_path - os.environ["sovits_path"] = sovits_path - os.environ["cnhubert_base_path"] = cnhubert_base_path - os.environ["bert_path"] = bert_path - os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_number(gpu_number) - os.environ["is_half"] = str(is_half) - os.environ["infer_ttswebui"] = str(webui_port_infer_tts) - os.environ["is_share"] = str(is_share) - yield ( - process_info(process_name_tts, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - print(cmd) - p_tts_inference = Popen(cmd, shell=True) - else: - kill_process(p_tts_inference.pid, process_name_tts) - p_tts_inference = None - yield ( - process_info(process_name_tts, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -from tools.asr.config import asr_dict - -process_name_asr = i18n("语音识别") - - -def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision): - global p_asr - if p_asr is None: - asr_inp_dir = my_utils.clean_path(asr_inp_dir) - asr_opt_dir = my_utils.clean_path(asr_opt_dir) - check_for_existance([asr_inp_dir]) - cmd = f'"{python_exec}" -s tools/asr/{asr_dict[asr_model]["path"]}' - cmd += f' -i "{asr_inp_dir}"' - cmd += f' -o "{asr_opt_dir}"' - cmd += f" -s {asr_model_size}" - cmd += f" -l {asr_lang}" - cmd += f" -p {asr_precision}" - output_file_name = os.path.basename(asr_inp_dir) - output_folder = asr_opt_dir or "output/asr_opt" - output_file_path = os.path.abspath(f"{output_folder}/{output_file_name}.list") - yield ( - process_info(process_name_asr, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - print(cmd) - p_asr = Popen(cmd, shell=True) - p_asr.wait() - p_asr = None - yield ( - process_info(process_name_asr, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - {"__type__": "update", "value": output_file_path}, - {"__type__": "update", "value": output_file_path}, - {"__type__": "update", "value": asr_inp_dir}, - ) - else: - yield ( - process_info(process_name_asr, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - - -def close_asr(): - global p_asr - if p_asr is not None: - kill_process(p_asr.pid, process_name_asr) - p_asr = None - return ( - process_info(process_name_asr, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -process_name_denoise = i18n("语音降噪") - - -def open_denoise(denoise_inp_dir, denoise_opt_dir): - global p_denoise - if p_denoise == None: - denoise_inp_dir = my_utils.clean_path(denoise_inp_dir) - denoise_opt_dir = my_utils.clean_path(denoise_opt_dir) - check_for_existance([denoise_inp_dir]) - cmd = '"%s" -s tools/cmd-denoise.py -i "%s" -o "%s" -p %s' % ( - python_exec, - denoise_inp_dir, - denoise_opt_dir, - "float16" if is_half == True else "float32", - ) - - yield ( - process_info(process_name_denoise, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - print(cmd) - p_denoise = Popen(cmd, shell=True) - p_denoise.wait() - p_denoise = None - yield ( - process_info(process_name_denoise, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - {"__type__": "update", "value": denoise_opt_dir}, - {"__type__": "update", "value": denoise_opt_dir}, - ) - else: - yield ( - process_info(process_name_denoise, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - - -def close_denoise(): - global p_denoise - if p_denoise is not None: - kill_process(p_denoise.pid, process_name_denoise) - p_denoise = None - return ( - process_info(process_name_denoise, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -p_train_SoVITS = None -process_name_sovits = i18n("SoVITS训练") - - -def open1Ba( - version, - batch_size, - total_epoch, - exp_name, - text_low_lr_rate, - if_save_latest, - if_save_every_weights, - save_every_epoch, - gpu_numbers1Ba, - pretrained_s2G, - pretrained_s2D, - if_grad_ckpt, - lora_rank, -): - global p_train_SoVITS - if p_train_SoVITS == None: - exp_name = exp_name.rstrip(" ") - config_file = ( - "GPT_SoVITS/configs/s2.json" - if version not in {"v2Pro", "v2ProPlus"} - else f"GPT_SoVITS/configs/s2{version}.json" - ) - with open(config_file) as f: - data = f.read() - data = json.loads(data) - s2_dir = "%s/%s" % (exp_root, exp_name) - os.makedirs("%s/logs_s2_%s" % (s2_dir, version), exist_ok=True) - if check_for_existance([s2_dir], is_train=True): - check_details([s2_dir], is_train=True) - if is_half == False: - data["train"]["fp16_run"] = False - batch_size = max(1, batch_size // 2) - data["train"]["batch_size"] = batch_size - data["train"]["epochs"] = total_epoch - data["train"]["text_low_lr_rate"] = text_low_lr_rate - data["train"]["pretrained_s2G"] = pretrained_s2G - data["train"]["pretrained_s2D"] = pretrained_s2D - data["train"]["if_save_latest"] = if_save_latest - data["train"]["if_save_every_weights"] = if_save_every_weights - data["train"]["save_every_epoch"] = save_every_epoch - data["train"]["gpu_numbers"] = gpu_numbers1Ba - data["train"]["grad_ckpt"] = if_grad_ckpt - data["train"]["lora_rank"] = lora_rank - data["model"]["version"] = version - data["data"]["exp_dir"] = data["s2_ckpt_dir"] = s2_dir - data["save_weight_dir"] = SoVITS_weight_version2root[version] - data["name"] = exp_name - data["version"] = version - tmp_config_path = "%s/tmp_s2.json" % tmp - with open(tmp_config_path, "w") as f: - f.write(json.dumps(data)) - if version in ["v1", "v2", "v2Pro", "v2ProPlus"]: - cmd = '"%s" -s GPT_SoVITS/s2_train.py --config "%s"' % (python_exec, tmp_config_path) - else: - cmd = '"%s" -s GPT_SoVITS/s2_train_v3_lora.py --config "%s"' % (python_exec, tmp_config_path) - yield ( - process_info(process_name_sovits, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - print(cmd) - p_train_SoVITS = Popen(cmd, shell=True) - p_train_SoVITS.wait() - p_train_SoVITS = None - SoVITS_dropdown_update, GPT_dropdown_update = change_choices() - yield ( - process_info(process_name_sovits, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - SoVITS_dropdown_update, - GPT_dropdown_update, - ) - else: - yield ( - process_info(process_name_sovits, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - - -def close1Ba(): - global p_train_SoVITS - if p_train_SoVITS is not None: - kill_process(p_train_SoVITS.pid, process_name_sovits) - p_train_SoVITS = None - return ( - process_info(process_name_sovits, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -p_train_GPT = None -process_name_gpt = i18n("GPT训练") - - -def open1Bb( - batch_size, - total_epoch, - exp_name, - if_dpo, - if_save_latest, - if_save_every_weights, - save_every_epoch, - gpu_numbers, - pretrained_s1, -): - global p_train_GPT - if p_train_GPT == None: - exp_name = exp_name.rstrip(" ") - with open( - "GPT_SoVITS/configs/s1longer.yaml" if version == "v1" else "GPT_SoVITS/configs/s1longer-v2.yaml" - ) as f: - data = f.read() - data = yaml.load(data, Loader=yaml.FullLoader) - s1_dir = "%s/%s" % (exp_root, exp_name) - os.makedirs("%s/logs_s1" % (s1_dir), exist_ok=True) - if check_for_existance([s1_dir], is_train=True): - check_details([s1_dir], is_train=True) - if is_half == False: - data["train"]["precision"] = "32" - batch_size = max(1, batch_size // 2) - data["train"]["batch_size"] = batch_size - data["train"]["epochs"] = total_epoch - data["pretrained_s1"] = pretrained_s1 - data["train"]["save_every_n_epoch"] = save_every_epoch - data["train"]["if_save_every_weights"] = if_save_every_weights - data["train"]["if_save_latest"] = if_save_latest - data["train"]["if_dpo"] = if_dpo - data["train"]["half_weights_save_dir"] = GPT_weight_version2root[version] - data["train"]["exp_name"] = exp_name - data["train_semantic_path"] = "%s/6-name2semantic.tsv" % s1_dir - data["train_phoneme_path"] = "%s/2-name2text.txt" % s1_dir - data["output_dir"] = "%s/logs_s1_%s" % (s1_dir, version) - # data["version"]=version - - os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_numbers(gpu_numbers.replace("-", ",")) - os.environ["hz"] = "25hz" - tmp_config_path = "%s/tmp_s1.yaml" % tmp - with open(tmp_config_path, "w") as f: - f.write(yaml.dump(data, default_flow_style=False)) - # cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir) - cmd = '"%s" -s GPT_SoVITS/s1_train.py --config_file "%s" ' % (python_exec, tmp_config_path) - yield ( - process_info(process_name_gpt, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - print(cmd) - p_train_GPT = Popen(cmd, shell=True) - p_train_GPT.wait() - p_train_GPT = None - SoVITS_dropdown_update, GPT_dropdown_update = change_choices() - yield ( - process_info(process_name_gpt, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - SoVITS_dropdown_update, - GPT_dropdown_update, - ) - else: - yield ( - process_info(process_name_gpt, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - - -def close1Bb(): - global p_train_GPT - if p_train_GPT is not None: - kill_process(p_train_GPT.pid, process_name_gpt) - p_train_GPT = None - return ( - process_info(process_name_gpt, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -ps_slice = [] -process_name_slice = i18n("语音切分") - - -def open_slice(inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_parts): - global ps_slice - inp = my_utils.clean_path(inp) - opt_root = my_utils.clean_path(opt_root) - check_for_existance([inp]) - if os.path.exists(inp) == False: - yield ( - i18n("输入路径不存在"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - {"__type__": "update"}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - return - if os.path.isfile(inp): - n_parts = 1 - elif os.path.isdir(inp): - pass - else: - yield ( - i18n("输入路径存在但不可用"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - {"__type__": "update"}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - return - if ps_slice == []: - for i_part in range(n_parts): - cmd = '"%s" -s tools/slice_audio.py "%s" "%s" %s %s %s %s %s %s %s %s %s' % ( - python_exec, - inp, - opt_root, - threshold, - min_length, - min_interval, - hop_size, - max_sil_kept, - _max, - alpha, - i_part, - n_parts, - ) - print(cmd) - p = Popen(cmd, shell=True) - ps_slice.append(p) - yield ( - process_info(process_name_slice, "opened"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - for p in ps_slice: - p.wait() - ps_slice = [] - yield ( - process_info(process_name_slice, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - {"__type__": "update", "value": opt_root}, - {"__type__": "update", "value": opt_root}, - {"__type__": "update", "value": opt_root}, - ) - else: - yield ( - process_info(process_name_slice, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - {"__type__": "update"}, - {"__type__": "update"}, - {"__type__": "update"}, - ) - - -def close_slice(): - global ps_slice - if ps_slice != []: - for p_slice in ps_slice: - try: - kill_process(p_slice.pid, process_name_slice) - except: - traceback.print_exc() - ps_slice = [] - return ( - process_info(process_name_slice, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -ps1a = [] -process_name_1a = i18n("文本分词与特征提取") - - -def open1a(inp_text, inp_wav_dir, exp_name, gpu_numbers, bert_pretrained_dir): - global ps1a - inp_text = my_utils.clean_path(inp_text) - inp_wav_dir = my_utils.clean_path(inp_wav_dir) - if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): - check_details([inp_text, inp_wav_dir], is_dataset_processing=True) - exp_name = exp_name.rstrip(" ") - if ps1a == []: - opt_dir = "%s/%s" % (exp_root, exp_name) - config = { - "inp_text": inp_text, - "inp_wav_dir": inp_wav_dir, - "exp_name": exp_name, - "opt_dir": opt_dir, - "bert_pretrained_dir": bert_pretrained_dir, - } - gpu_names = gpu_numbers.split("-") - all_parts = len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - "is_half": str(is_half), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1a.append(p) - yield ( - process_info(process_name_1a, "running"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - for p in ps1a: - p.wait() - opt = [] - for i_part in range(all_parts): - txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) - with open(txt_path, "r", encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(txt_path) - path_text = "%s/2-name2text.txt" % opt_dir - with open(path_text, "w", encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - ps1a = [] - if len("".join(opt)) > 0: - yield ( - process_info(process_name_1a, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - else: - yield ( - process_info(process_name_1a, "failed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - else: - yield ( - process_info(process_name_1a, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - - -def close1a(): - global ps1a - if ps1a != []: - for p1a in ps1a: - try: - kill_process(p1a.pid, process_name_1a) - except: - traceback.print_exc() - ps1a = [] - return ( - process_info(process_name_1a, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -sv_path = "GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt" -ps1b = [] -process_name_1b = i18n("语音自监督特征提取") - - -def open1b(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): - global ps1b - inp_text = my_utils.clean_path(inp_text) - inp_wav_dir = my_utils.clean_path(inp_wav_dir) - if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): - check_details([inp_text, inp_wav_dir], is_dataset_processing=True) - exp_name = exp_name.rstrip(" ") - if ps1b == []: - config = { - "inp_text": inp_text, - "inp_wav_dir": inp_wav_dir, - "exp_name": exp_name, - "opt_dir": "%s/%s" % (exp_root, exp_name), - "cnhubert_base_dir": ssl_pretrained_dir, - "sv_path": sv_path, - "is_half": str(is_half), - } - gpu_names = gpu_numbers.split("-") - all_parts = len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1b.append(p) - yield ( - process_info(process_name_1b, "running"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - for p in ps1b: - p.wait() - ps1b = [] - if "Pro" in version: - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1b.append(p) - for p in ps1b: - p.wait() - ps1b = [] - yield ( - process_info(process_name_1b, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - else: - yield ( - process_info(process_name_1b, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - - -def close1b(): - global ps1b - if ps1b != []: - for p1b in ps1b: - try: - kill_process(p1b.pid, process_name_1b) - except: - traceback.print_exc() - ps1b = [] - return ( - process_info(process_name_1b, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -ps1c = [] -process_name_1c = i18n("语义Token提取") - - -def open1c(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, pretrained_s2G_path): - global ps1c - inp_text = my_utils.clean_path(inp_text) - if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): - check_details([inp_text, inp_wav_dir], is_dataset_processing=True) - exp_name = exp_name.rstrip(" ") - if ps1c == []: - opt_dir = "%s/%s" % (exp_root, exp_name) - config_file = ( - "GPT_SoVITS/configs/s2.json" - if version not in {"v2Pro", "v2ProPlus"} - else f"GPT_SoVITS/configs/s2{version}.json" - ) - config = { - "inp_text": inp_text, - "exp_name": exp_name, - "opt_dir": opt_dir, - "pretrained_s2G": pretrained_s2G_path, - "s2config_path": config_file, - "is_half": str(is_half), - } - gpu_names = gpu_numbers.split("-") - all_parts = len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1c.append(p) - yield ( - process_info(process_name_1c, "running"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - for p in ps1c: - p.wait() - opt = ["item_name\tsemantic_audio"] - path_semantic = "%s/6-name2semantic.tsv" % opt_dir - for i_part in range(all_parts): - semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) - with open(semantic_path, "r", encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(semantic_path) - with open(path_semantic, "w", encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - ps1c = [] - yield ( - process_info(process_name_1c, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - else: - yield ( - process_info(process_name_1c, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - - -def close1c(): - global ps1c - if ps1c != []: - for p1c in ps1c: - try: - kill_process(p1c.pid, process_name_1c) - except: - traceback.print_exc() - ps1c = [] - return ( - process_info(process_name_1c, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -ps1abc = [] -process_name_1abc = i18n("训练集格式化一键三连") - - -def open1abc( - version, - inp_text, - inp_wav_dir, - exp_name, - gpu_numbers1a, - gpu_numbers1Ba, - gpu_numbers1c, - bert_pretrained_dir, - ssl_pretrained_dir, - pretrained_s2G_path, -): - global ps1abc - inp_text = my_utils.clean_path(inp_text) - inp_wav_dir = my_utils.clean_path(inp_wav_dir) - if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): - check_details([inp_text, inp_wav_dir], is_dataset_processing=True) - exp_name = exp_name.rstrip(" ") - if ps1abc == []: - opt_dir = "%s/%s" % (exp_root, exp_name) - try: - #############################1a - path_text = "%s/2-name2text.txt" % opt_dir - if os.path.exists(path_text) == False or ( - os.path.exists(path_text) == True - and len(open(path_text, "r", encoding="utf8").read().strip("\n").split("\n")) < 2 - ): - config = { - "inp_text": inp_text, - "inp_wav_dir": inp_wav_dir, - "exp_name": exp_name, - "opt_dir": opt_dir, - "bert_pretrained_dir": bert_pretrained_dir, - "is_half": str(is_half), - } - gpu_names = gpu_numbers1a.split("-") - all_parts = len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/1-get-text.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - yield ( - i18n("进度") + ": 1A-Doing", - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - for p in ps1abc: - p.wait() - - opt = [] - for i_part in range(all_parts): # txt_path="%s/2-name2text-%s.txt"%(opt_dir,i_part) - txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) - with open(txt_path, "r", encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(txt_path) - with open(path_text, "w", encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - assert len("".join(opt)) > 0, process_info(process_name_1a, "failed") - yield ( - i18n("进度") + ": 1A-Done", - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - ps1abc = [] - #############################1b - config = { - "inp_text": inp_text, - "inp_wav_dir": inp_wav_dir, - "exp_name": exp_name, - "opt_dir": opt_dir, - "cnhubert_base_dir": ssl_pretrained_dir, - "sv_path": sv_path, - } - gpu_names = gpu_numbers1Ba.split("-") - all_parts = len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - yield ( - i18n("进度") + ": 1A-Done, 1B-Doing", - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - for p in ps1abc: - p.wait() - ps1abc = [] - if "Pro" in version: - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - for p in ps1abc: - p.wait() - ps1abc = [] - yield ( - i18n("进度") + ": 1A-Done, 1B-Done", - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - #############################1c - path_semantic = "%s/6-name2semantic.tsv" % opt_dir - if os.path.exists(path_semantic) == False or ( - os.path.exists(path_semantic) == True and os.path.getsize(path_semantic) < 31 - ): - config_file = ( - "GPT_SoVITS/configs/s2.json" - if version not in {"v2Pro", "v2ProPlus"} - else f"GPT_SoVITS/configs/s2{version}.json" - ) - config = { - "inp_text": inp_text, - "exp_name": exp_name, - "opt_dir": opt_dir, - "pretrained_s2G": pretrained_s2G_path, - "s2config_path": config_file, - } - gpu_names = gpu_numbers1c.split("-") - all_parts = len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), - } - ) - os.environ.update(config) - cmd = '"%s" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' % python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - yield ( - i18n("进度") + ": 1A-Done, 1B-Done, 1C-Doing", - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - for p in ps1abc: - p.wait() - - opt = ["item_name\tsemantic_audio"] - for i_part in range(all_parts): - semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) - with open(semantic_path, "r", encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(semantic_path) - with open(path_semantic, "w", encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - yield ( - i18n("进度") + ": 1A-Done, 1B-Done, 1C-Done", - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - ps1abc = [] - yield ( - process_info(process_name_1abc, "finish"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - except: - traceback.print_exc() - close1abc() - yield ( - process_info(process_name_1abc, "failed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - else: - yield ( - process_info(process_name_1abc, "occupy"), - {"__type__": "update", "visible": False}, - {"__type__": "update", "visible": True}, - ) - - -def close1abc(): - global ps1abc - if ps1abc != []: - for p1abc in ps1abc: - try: - kill_process(p1abc.pid, process_name_1abc) - except: - traceback.print_exc() - ps1abc = [] - return ( - process_info(process_name_1abc, "closed"), - {"__type__": "update", "visible": True}, - {"__type__": "update", "visible": False}, - ) - - -def switch_version(version_): - os.environ["version"] = version_ - global version - version = version_ - if pretrained_sovits_name[version] != "" and pretrained_gpt_name[version] != "": - ... - else: - gr.Warning(i18n("未下载模型") + ": " + version.upper()) - set_default() - return ( - {"__type__": "update", "value": pretrained_sovits_name[version]}, - {"__type__": "update", "value": pretrained_sovits_name[version].replace("s2G", "s2D")}, - {"__type__": "update", "value": pretrained_gpt_name[version]}, - {"__type__": "update", "value": pretrained_gpt_name[version]}, - {"__type__": "update", "value": pretrained_sovits_name[version]}, - {"__type__": "update", "value": default_batch_size, "maximum": default_max_batch_size}, - {"__type__": "update", "value": default_sovits_epoch, "maximum": max_sovits_epoch}, - {"__type__": "update", "value": default_sovits_save_every_epoch, "maximum": max_sovits_save_every_epoch}, - {"__type__": "update", "visible": True if version not in v3v4set else False}, - { - "__type__": "update", - "value": False if not if_force_ckpt else True, - "interactive": True if not if_force_ckpt else False, - }, - {"__type__": "update", "interactive": True, "value": False}, - {"__type__": "update", "visible": True if version in v3v4set else False}, - ) # {'__type__': 'update', "interactive": False if version in v3v4set else True, "value": False}, \ ####batch infer - - -if os.path.exists("GPT_SoVITS/text/G2PWModel"): - ... -else: - cmd = '"%s" -s GPT_SoVITS/download.py' % python_exec - p = Popen(cmd, shell=True) - p.wait() - - -def sync(text): - return {"__type__": "update", "value": text} - - -with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app: - gr.HTML( - top_html.format( - i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") - + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") - ), - elem_classes="markdown", - ) - - with gr.Tabs(): - with gr.TabItem("0-" + i18n("前置数据集获取工具")): # 提前随机切片防止uvr5爆内存->uvr5->slicer->asr->打标 - with gr.Accordion(label="0a-" + i18n("UVR5人声伴奏分离&去混响去延迟工具")): - with gr.Row(): - with gr.Column(scale=3): - with gr.Row(): - uvr5_info = gr.Textbox(label=process_info(process_name_uvr5, "info")) - open_uvr5 = gr.Button( - value=process_info(process_name_uvr5, "open"), variant="primary", visible=True - ) - close_uvr5 = gr.Button( - value=process_info(process_name_uvr5, "close"), variant="primary", visible=False - ) - - with gr.Accordion(label="0b-" + i18n("语音切分工具")): - with gr.Row(): - with gr.Column(scale=3): - with gr.Row(): - slice_inp_path = gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"), value="") - slice_opt_root = gr.Textbox( - label=i18n("切分后的子音频的输出根目录"), value="output/slicer_opt" - ) - with gr.Row(): - threshold = gr.Textbox( - label=i18n("threshold:音量小于这个值视作静音的备选切割点"), value="-34" - ) - min_length = gr.Textbox( - label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"), - value="4000", - ) - min_interval = gr.Textbox(label=i18n("min_interval:最短切割间隔"), value="300") - hop_size = gr.Textbox( - label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"), - value="10", - ) - max_sil_kept = gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"), value="500") - with gr.Row(): - _max = gr.Slider( - minimum=0, - maximum=1, - step=0.05, - label=i18n("max:归一化后最大值多少"), - value=0.9, - interactive=True, - ) - alpha = gr.Slider( - minimum=0, - maximum=1, - step=0.05, - label=i18n("alpha_mix:混多少比例归一化后音频进来"), - value=0.25, - interactive=True, - ) - with gr.Row(): - n_process = gr.Slider( - minimum=1, - maximum=n_cpu, - step=1, - label=i18n("切割使用的进程数"), - value=4, - interactive=True, - ) - slicer_info = gr.Textbox(label=process_info(process_name_slice, "info")) - open_slicer_button = gr.Button( - value=process_info(process_name_slice, "open"), variant="primary", visible=True - ) - close_slicer_button = gr.Button( - value=process_info(process_name_slice, "close"), variant="primary", visible=False - ) - - # gr.Markdown(value="0bb-" + i18n("语音降噪工具")+i18n("(不稳定,先别用,可能劣化模型效果!)")) - with gr.Row(visible=False): - with gr.Column(scale=3): - with gr.Row(): - denoise_input_dir = gr.Textbox(label=i18n("输入文件夹路径"), value="") - denoise_output_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/denoise_opt") - with gr.Row(): - denoise_info = gr.Textbox(label=process_info(process_name_denoise, "info")) - open_denoise_button = gr.Button( - value=process_info(process_name_denoise, "open"), variant="primary", visible=True - ) - close_denoise_button = gr.Button( - value=process_info(process_name_denoise, "close"), variant="primary", visible=False - ) - - with gr.Accordion(label="0c-" + i18n("语音识别工具")): - with gr.Row(): - with gr.Column(scale=3): - with gr.Row(): - asr_inp_dir = gr.Textbox( - label=i18n("输入文件夹路径"), value="D:\\GPT-SoVITS\\raw\\xxx", interactive=True - ) - asr_opt_dir = gr.Textbox( - label=i18n("输出文件夹路径"), value="output/asr_opt", interactive=True - ) - with gr.Row(): - asr_model = gr.Dropdown( - label=i18n("ASR 模型"), - choices=list(asr_dict.keys()), - interactive=True, - value="达摩 ASR (中文)", - ) - asr_size = gr.Dropdown( - label=i18n("ASR 模型尺寸"), choices=["large"], interactive=True, value="large" - ) - asr_lang = gr.Dropdown( - label=i18n("ASR 语言设置"), choices=["zh", "yue"], interactive=True, value="zh" - ) - asr_precision = gr.Dropdown( - label=i18n("数据类型精度"), choices=["float32"], interactive=True, value="float32" - ) - with gr.Row(): - asr_info = gr.Textbox(label=process_info(process_name_asr, "info")) - open_asr_button = gr.Button( - value=process_info(process_name_asr, "open"), variant="primary", visible=True - ) - close_asr_button = gr.Button( - value=process_info(process_name_asr, "close"), variant="primary", visible=False - ) - - def change_lang_choices(key): # 根据选择的模型修改可选的语言 - return {"__type__": "update", "choices": asr_dict[key]["lang"], "value": asr_dict[key]["lang"][0]} - - def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 - return {"__type__": "update", "choices": asr_dict[key]["size"], "value": asr_dict[key]["size"][-1]} - - def change_precision_choices(key): # 根据选择的模型修改可选的语言 - if key == "Faster Whisper (多语种)": - if default_batch_size <= 4: - precision = "int8" - elif is_half: - precision = "float16" - else: - precision = "float32" - else: - precision = "float32" - return {"__type__": "update", "choices": asr_dict[key]["precision"], "value": precision} - - asr_model.change(change_lang_choices, [asr_model], [asr_lang]) - asr_model.change(change_size_choices, [asr_model], [asr_size]) - asr_model.change(change_precision_choices, [asr_model], [asr_precision]) - - with gr.Accordion(label="0d-" + i18n("语音文本校对标注工具")): - with gr.Row(): - with gr.Column(scale=3): - with gr.Row(): - path_list = gr.Textbox( - label=i18n("标注文件路径 (含文件后缀 *.list)"), - value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list", - interactive=True, - ) - label_info = gr.Textbox(label=process_info(process_name_subfix, "info")) - open_label = gr.Button( - value=process_info(process_name_subfix, "open"), variant="primary", visible=True - ) - close_label = gr.Button( - value=process_info(process_name_subfix, "close"), variant="primary", visible=False - ) - - open_label.click(change_label, [path_list], [label_info, open_label, close_label]) - close_label.click(change_label, [path_list], [label_info, open_label, close_label]) - open_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) - close_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) - - with gr.TabItem(i18n("1-GPT-SoVITS-TTS")): - with gr.Accordion(i18n("微调模型信息")): - with gr.Row(): - with gr.Row(equal_height=True): - exp_name = gr.Textbox( - label=i18n("*实验/模型名"), - value="xxx", - interactive=True, - scale=3, - ) - gpu_info_box = gr.Textbox( - label=i18n("显卡信息"), - value=gpu_info, - visible=True, - interactive=False, - scale=5, - ) - version_checkbox = gr.Radio( - label=i18n("训练模型的版本"), - value=version, - choices=["v1", "v2", "v4", "v2Pro", "v2ProPlus"], - scale=5, - ) - with gr.Accordion(label=i18n("预训练模型路径"), open=False): - with gr.Row(): - with gr.Row(equal_height=True): - pretrained_s1 = gr.Textbox( - label=i18n("预训练GPT模型路径"), - value=pretrained_gpt_name[version], - interactive=True, - lines=1, - max_lines=1, - scale=3, - ) - pretrained_s2G = gr.Textbox( - label=i18n("预训练SoVITS-G模型路径"), - value=pretrained_sovits_name[version], - interactive=True, - lines=1, - max_lines=1, - scale=5, - ) - pretrained_s2D = gr.Textbox( - label=i18n("预训练SoVITS-D模型路径"), - value=pretrained_sovits_name[version].replace("s2G", "s2D"), - interactive=True, - lines=1, - max_lines=1, - scale=5, - ) - - with gr.TabItem("1A-" + i18n("训练集格式化工具")): - with gr.Accordion(label=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹")): - with gr.Row(): - with gr.Row(): - inp_text = gr.Textbox( - label=i18n("*文本标注文件"), - value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list", - interactive=True, - scale=10, - ) - with gr.Row(): - inp_wav_dir = gr.Textbox( - label=i18n("*训练集音频文件目录"), - # value=r"D:\RVC1006\GPT-SoVITS\raw\xxx", - interactive=True, - placeholder=i18n( - "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。" - ), - scale=10, - ) - - with gr.Accordion(label="1Aa-" + process_name_1a): - with gr.Row(): - with gr.Row(): - gpu_numbers1a = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s-%s" % (gpus, gpus), - interactive=True, - ) - with gr.Row(): - bert_pretrained_dir = gr.Textbox( - label=i18n("预训练中文BERT模型路径"), - value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", - interactive=False, - lines=2, - ) - with gr.Row(): - button1a_open = gr.Button( - value=process_info(process_name_1a, "open"), variant="primary", visible=True - ) - button1a_close = gr.Button( - value=process_info(process_name_1a, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1a = gr.Textbox(label=process_info(process_name_1a, "info")) - - with gr.Accordion(label="1Ab-" + process_name_1b): - with gr.Row(): - with gr.Row(): - gpu_numbers1Ba = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s-%s" % (gpus, gpus), - interactive=True, - ) - with gr.Row(): - cnhubert_base_dir = gr.Textbox( - label=i18n("预训练SSL模型路径"), - value="GPT_SoVITS/pretrained_models/chinese-hubert-base", - interactive=False, - lines=2, - ) - with gr.Row(): - button1b_open = gr.Button( - value=process_info(process_name_1b, "open"), variant="primary", visible=True - ) - button1b_close = gr.Button( - value=process_info(process_name_1b, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1b = gr.Textbox(label=process_info(process_name_1b, "info")) - - with gr.Accordion(label="1Ac-" + process_name_1c): - with gr.Row(): - with gr.Row(): - gpu_numbers1c = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s-%s" % (gpus, gpus), - interactive=True, - ) - with gr.Row(): - pretrained_s2G_ = gr.Textbox( - label=i18n("预训练SoVITS-G模型路径"), - value=pretrained_sovits_name[version], - interactive=False, - lines=2, - ) - with gr.Row(): - button1c_open = gr.Button( - value=process_info(process_name_1c, "open"), variant="primary", visible=True - ) - button1c_close = gr.Button( - value=process_info(process_name_1c, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1c = gr.Textbox(label=process_info(process_name_1c, "info")) - - with gr.Accordion(label="1Aabc-" + process_name_1abc): - with gr.Row(): - with gr.Row(): - button1abc_open = gr.Button( - value=process_info(process_name_1abc, "open"), variant="primary", visible=True - ) - button1abc_close = gr.Button( - value=process_info(process_name_1abc, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1abc = gr.Textbox(label=process_info(process_name_1abc, "info")) - - pretrained_s2G.change(sync, [pretrained_s2G], [pretrained_s2G_]) - open_asr_button.click( - open_asr, - [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], - [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], - ) - close_asr_button.click(close_asr, [], [asr_info, open_asr_button, close_asr_button]) - open_slicer_button.click( - open_slice, - [ - slice_inp_path, - slice_opt_root, - threshold, - min_length, - min_interval, - hop_size, - max_sil_kept, - _max, - alpha, - n_process, - ], - [slicer_info, open_slicer_button, close_slicer_button, asr_inp_dir, denoise_input_dir, inp_wav_dir], - ) - close_slicer_button.click(close_slice, [], [slicer_info, open_slicer_button, close_slicer_button]) - open_denoise_button.click( - open_denoise, - [denoise_input_dir, denoise_output_dir], - [denoise_info, open_denoise_button, close_denoise_button, asr_inp_dir, inp_wav_dir], - ) - close_denoise_button.click(close_denoise, [], [denoise_info, open_denoise_button, close_denoise_button]) - - button1a_open.click( - open1a, - [inp_text, inp_wav_dir, exp_name, gpu_numbers1a, bert_pretrained_dir], - [info1a, button1a_open, button1a_close], - ) - button1a_close.click(close1a, [], [info1a, button1a_open, button1a_close]) - button1b_open.click( - open1b, - [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1Ba, cnhubert_base_dir], - [info1b, button1b_open, button1b_close], - ) - button1b_close.click(close1b, [], [info1b, button1b_open, button1b_close]) - button1c_open.click( - open1c, - [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1c, pretrained_s2G], - [info1c, button1c_open, button1c_close], - ) - button1c_close.click(close1c, [], [info1c, button1c_open, button1c_close]) - button1abc_open.click( - open1abc, - [ - version_checkbox, - inp_text, - inp_wav_dir, - exp_name, - gpu_numbers1a, - gpu_numbers1Ba, - gpu_numbers1c, - bert_pretrained_dir, - cnhubert_base_dir, - pretrained_s2G, - ], - [info1abc, button1abc_open, button1abc_close], - ) - button1abc_close.click(close1abc, [], [info1abc, button1abc_open, button1abc_close]) - - with gr.TabItem("1B-" + i18n("微调训练")): - with gr.Accordion(label="1Ba-" + i18n("SoVITS 训练: 模型权重文件在 SoVITS_weights/")): - with gr.Row(): - with gr.Column(): - with gr.Row(): - batch_size = gr.Slider( - minimum=1, - maximum=default_max_batch_size, - step=1, - label=i18n("每张显卡的batch_size"), - value=default_batch_size, - interactive=True, - ) - total_epoch = gr.Slider( - minimum=1, - maximum=max_sovits_epoch, - step=1, - label=i18n("总训练轮数total_epoch,不建议太高"), - value=default_sovits_epoch, - interactive=True, - ) - with gr.Row(): - text_low_lr_rate = gr.Slider( - minimum=0.2, - maximum=0.6, - step=0.05, - label=i18n("文本模块学习率权重"), - value=0.4, - visible=True if version not in v3v4set else False, - ) # v3v4 not need - lora_rank = gr.Radio( - label=i18n("LoRA秩"), - value="32", - choices=["16", "32", "64", "128"], - visible=True if version in v3v4set else False, - ) # v1v2 not need - save_every_epoch = gr.Slider( - minimum=1, - maximum=max_sovits_save_every_epoch, - step=1, - label=i18n("保存频率save_every_epoch"), - value=default_sovits_save_every_epoch, - interactive=True, - ) - with gr.Column(): - with gr.Column(): - if_save_latest = gr.Checkbox( - label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), - value=True, - interactive=True, - show_label=True, - ) - if_save_every_weights = gr.Checkbox( - label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), - value=True, - interactive=True, - show_label=True, - ) - if_grad_ckpt = gr.Checkbox( - label="v3是否开启梯度检查点节省显存占用", - value=False, - interactive=True if version in v3v4set else False, - show_label=True, - visible=False, - ) # 只有V3s2可以用 - with gr.Row(): - gpu_numbers1Ba = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s" % (gpus), - interactive=True, - ) - with gr.Row(): - with gr.Row(): - button1Ba_open = gr.Button( - value=process_info(process_name_sovits, "open"), variant="primary", visible=True - ) - button1Ba_close = gr.Button( - value=process_info(process_name_sovits, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1Ba = gr.Textbox(label=process_info(process_name_sovits, "info")) - with gr.Accordion(label="1Bb-" + i18n("GPT 训练: 模型权重文件在 GPT_weights/")): - with gr.Row(): - with gr.Column(): - with gr.Row(): - batch_size1Bb = gr.Slider( - minimum=1, - maximum=40, - step=1, - label=i18n("每张显卡的batch_size"), - value=default_batch_size_s1, - interactive=True, - ) - total_epoch1Bb = gr.Slider( - minimum=2, - maximum=50, - step=1, - label=i18n("总训练轮数total_epoch"), - value=15, - interactive=True, - ) - with gr.Row(): - save_every_epoch1Bb = gr.Slider( - minimum=1, - maximum=50, - step=1, - label=i18n("保存频率save_every_epoch"), - value=5, - interactive=True, - ) - if_dpo = gr.Checkbox( - label=i18n("是否开启DPO训练选项(实验性)"), - value=False, - interactive=True, - show_label=True, - ) - with gr.Column(): - with gr.Column(): - if_save_latest1Bb = gr.Checkbox( - label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), - value=True, - interactive=True, - show_label=True, - ) - if_save_every_weights1Bb = gr.Checkbox( - label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), - value=True, - interactive=True, - show_label=True, - ) - with gr.Row(): - gpu_numbers1Bb = gr.Textbox( - label=i18n("GPU卡号以-分割,每个卡号一个进程"), - value="%s" % (gpus), - interactive=True, - ) - with gr.Row(): - with gr.Row(): - button1Bb_open = gr.Button( - value=process_info(process_name_gpt, "open"), variant="primary", visible=True - ) - button1Bb_close = gr.Button( - value=process_info(process_name_gpt, "close"), variant="primary", visible=False - ) - with gr.Row(): - info1Bb = gr.Textbox(label=process_info(process_name_gpt, "info")) - - button1Ba_close.click(close1Ba, [], [info1Ba, button1Ba_open, button1Ba_close]) - button1Bb_close.click(close1Bb, [], [info1Bb, button1Bb_open, button1Bb_close]) - - with gr.TabItem("1C-" + i18n("推理")): - gr.Markdown( - value=i18n( - "选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的几个是底模,体验5秒Zero Shot TTS不训练推理用。" - ) - ) - with gr.Row(): - with gr.Column(scale=2): - with gr.Row(): - GPT_dropdown = gr.Dropdown( - label=i18n("GPT模型列表"), - choices=GPT_names, - value=GPT_names[-1], - interactive=True, - ) - SoVITS_dropdown = gr.Dropdown( - label=i18n("SoVITS模型列表"), - choices=SoVITS_names, - value=SoVITS_names[0], - interactive=True, - ) - with gr.Column(scale=2): - with gr.Row(): - gpu_number_1C = gr.Textbox( - label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True - ) - refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") - refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) - with gr.Row(equal_height=True): - with gr.Row(): - batched_infer_enabled = gr.Checkbox( - label=i18n("启用并行推理版本"), value=False, interactive=True, show_label=True - ) - open_tts = gr.Button( - value=process_info(process_name_tts, "open"), variant="primary", visible=True - ) - close_tts = gr.Button( - value=process_info(process_name_tts, "close"), variant="primary", visible=False - ) - with gr.Column(): - tts_info = gr.Textbox(label=process_info(process_name_tts, "info"), scale=2) - open_tts.click( - change_tts_inference, - [ - bert_pretrained_dir, - cnhubert_base_dir, - gpu_number_1C, - GPT_dropdown, - SoVITS_dropdown, - batched_infer_enabled, - ], - [tts_info, open_tts, close_tts], - ) - close_tts.click( - change_tts_inference, - [ - bert_pretrained_dir, - cnhubert_base_dir, - gpu_number_1C, - GPT_dropdown, - SoVITS_dropdown, - batched_infer_enabled, - ], - [tts_info, open_tts, close_tts], - ) - button1Ba_open.click( - open1Ba, - [ - version_checkbox, - batch_size, - total_epoch, - exp_name, - text_low_lr_rate, - if_save_latest, - if_save_every_weights, - save_every_epoch, - gpu_numbers1Ba, - pretrained_s2G, - pretrained_s2D, - if_grad_ckpt, - lora_rank, - ], - [info1Ba, button1Ba_open, button1Ba_close, SoVITS_dropdown, GPT_dropdown], - ) - button1Bb_open.click( - open1Bb, - [ - batch_size1Bb, - total_epoch1Bb, - exp_name, - if_dpo, - if_save_latest1Bb, - if_save_every_weights1Bb, - save_every_epoch1Bb, - gpu_numbers1Bb, - pretrained_s1, - ], - [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], - ) - version_checkbox.change( - switch_version, - [version_checkbox], - [ - pretrained_s2G, - pretrained_s2D, - pretrained_s1, - GPT_dropdown, - SoVITS_dropdown, - batch_size, - total_epoch, - save_every_epoch, - text_low_lr_rate, - if_grad_ckpt, - batched_infer_enabled, - lora_rank, - ], - ) - - with gr.TabItem(i18n("2-GPT-SoVITS-变声")): - gr.Markdown(value=i18n("施工中,请静候佳音")) - - app.queue().launch( # concurrency_count=511, max_size=1022 - server_name="0.0.0.0", - inbrowser=True, - share=is_share, - server_port=webui_port_main, - # quiet=True, - ) +import os +import sys + +os.environ["version"] = version = "v2Pro" +now_dir = os.getcwd() +sys.path.insert(0, now_dir) +import warnings + +warnings.filterwarnings("ignore") +import json +import platform +import shutil +import signal + +import psutil +import torch +import yaml + +os.environ["TORCH_DISTRIBUTED_DEBUG"] = "INFO" +torch.manual_seed(233333) +tmp = os.path.join(now_dir, "TEMP") +os.makedirs(tmp, exist_ok=True) +os.environ["TEMP"] = tmp +if os.path.exists(tmp): + for name in os.listdir(tmp): + if name == "jieba.cache": + continue + path = f"{tmp}/{name}" + delete = os.remove if os.path.isfile(path) else shutil.rmtree + try: + delete(path) + except Exception as e: + print(str(e)) + pass +import site +import traceback + +site_packages_roots = [] +for path in site.getsitepackages(): + if "packages" in path: + site_packages_roots.append(path) +if site_packages_roots == []: + site_packages_roots = [f"{now_dir}/runtime/Lib/site-packages"] +# os.environ["OPENBLAS_NUM_THREADS"] = "4" +os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" +os.environ["all_proxy"] = "" +for site_packages_root in site_packages_roots: + if os.path.exists(site_packages_root): + try: + with open(f"{site_packages_root}/users.pth", "w") as f: + f.write( + # "%s\n%s/runtime\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5" + f"{now_dir}\n{now_dir}/GPT_SoVITS/BigVGAN\n{now_dir}/tools\n{now_dir}/tools/asr\n{now_dir}/GPT_SoVITS\n{now_dir}/tools/uvr5" + ) + break + except PermissionError: + traceback.print_exc() +import shutil +import subprocess +from subprocess import Popen + +from tools.assets import css, js, top_html +from tools.i18n.i18n import I18nAuto, scan_language_list + +language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" +os.environ["language"] = language +i18n = I18nAuto(language=language) +from multiprocessing import cpu_count + +from config import ( + GPU_INDEX, + GPU_INFOS, + IS_GPU, + exp_root, + infer_device, + is_half, + is_share, + memset, + python_exec, + webui_port_infer_tts, + webui_port_main, + webui_port_subfix, + webui_port_uvr5, +) +from tools import my_utils +from tools.my_utils import check_details, check_for_existance + +os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" + +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu +import gradio as gr + +n_cpu = cpu_count() + +set_gpu_numbers = GPU_INDEX +gpu_infos = GPU_INFOS +mem = memset +is_gpu_ok = IS_GPU + +v3v4set = {"v3", "v4"} + + +def set_default(): + global \ + default_batch_size, \ + default_max_batch_size, \ + gpu_info, \ + default_sovits_epoch, \ + default_sovits_save_every_epoch, \ + max_sovits_epoch, \ + max_sovits_save_every_epoch, \ + default_batch_size_s1, \ + if_force_ckpt + if_force_ckpt = False + gpu_info = "\n".join(gpu_infos) + if is_gpu_ok: + minmem = min(mem) + default_batch_size = minmem // 2 if version not in v3v4set else minmem // 8 + default_batch_size_s1 = minmem // 2 + else: + default_batch_size = default_batch_size_s1 = int(psutil.virtual_memory().total / 1024 / 1024 / 1024 / 4) + if version not in v3v4set: + default_sovits_epoch = 8 + default_sovits_save_every_epoch = 4 + max_sovits_epoch = 25 # 40 + max_sovits_save_every_epoch = 25 # 10 + else: + default_sovits_epoch = 2 + default_sovits_save_every_epoch = 1 + max_sovits_epoch = 16 # 40 # 3 #训太多=作死 + max_sovits_save_every_epoch = 10 # 10 # 3 + + default_batch_size = max(1, default_batch_size) + default_batch_size_s1 = max(1, default_batch_size_s1) + default_max_batch_size = default_batch_size * 3 + +# ==================== DRY UTILITY FUNCTIONS ==================== + +class ProcessManager: + """Centralized process management to eliminate repetitive code""" + + def __init__(self): + self.processes = {} + + def create_process(self, process_name, cmd, wait=False, env_updates=None): + """Create and manage a process with consistent UI updates""" + if self.processes.get(process_name) is None: + if env_updates: + os.environ.update(env_updates) + + print(cmd) + p = Popen(cmd, shell=True) + self.processes[process_name] = p + + if wait: + p.wait() + self.processes[process_name] = None + + return True + return False + + def kill_process(self, process_name, process_display_name=""): + """Kill a process with consistent error handling""" + if self.processes.get(process_name) is not None: + kill_process(self.processes[process_name].pid, process_display_name) + self.processes[process_name] = None + return True + return False + + def get_process_status(self, process_name): + """Get current process status""" + return self.processes.get(process_name) + +# Global process manager +process_manager = ProcessManager() + +def create_ui_yield(process_name, status, visible_states=None): + """Create consistent UI yield patterns""" + if visible_states is None: + visible_states = {"opened": (False, True), "closed": (True, False), "finish": (True, False), "occupy": (False, True)} + + visible_open, visible_close = visible_states.get(status, (True, False)) + + return ( + process_info(process_name, status), + {"__type__": "update", "visible": visible_open}, + {"__type__": "update", "visible": visible_close}, + ) + +def create_command_builder(): + """Factory for building subprocess commands""" + class CommandBuilder: + def __init__(self): + self.base_cmd = f'"{python_exec}" -s' + + def build(self, script_path, *args): + """Build a command with consistent formatting""" + cmd_parts = [self.base_cmd, script_path] + cmd_parts.extend(str(arg) for arg in args) + return " ".join(cmd_parts) + + def build_with_paths(self, script_path, *paths): + """Build command with quoted paths""" + cmd_parts = [self.base_cmd, script_path] + cmd_parts.extend(f'"{path}"' for path in paths) + return " ".join(cmd_parts) + + return CommandBuilder() + +# Global command builder +cmd_builder = create_command_builder() + +def setup_gpu_environment(gpu_numbers, is_half_flag=None): + """Setup GPU environment variables consistently""" + env_updates = { + "_CUDA_VISIBLE_DEVICES": fix_gpu_numbers(gpu_numbers.replace("-", ",")), + } + if is_half_flag is not None: + env_updates["is_half"] = str(is_half_flag) + return env_updates + +def create_multi_gpu_config(gpu_numbers, base_config, script_path): + """Create configuration for multi-GPU processing""" + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + processes = [] + + for i_part in range(all_parts): + config = base_config.copy() + config.update({ + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + }) + os.environ.update(config) + + cmd = cmd_builder.build(script_path) + print(cmd) + p = Popen(cmd, shell=True) + processes.append(p) + + return processes + +def merge_output_files(opt_dir, file_pattern, output_file, header=None): + """Merge multiple output files into a single file""" + opt = [] + if header: + opt.append(header) + + all_parts = len([f for f in os.listdir(opt_dir) if f.startswith(file_pattern.split("-")[0])]) + + for i_part in range(all_parts): + file_path = f"{opt_dir}/{file_pattern.format(i_part)}" + if os.path.exists(file_path): + with open(file_path, "r", encoding="utf8") as f: + content = f.read().strip("\n").split("\n") + opt.extend(content) + os.remove(file_path) + + with open(output_file, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + + return opt + +# ==================== END DRY UTILITY FUNCTIONS ==================== + +def create_process_handler(process_name, process_display_name, script_path, env_updates=None, wait=False, additional_args=None): + """Generic process handler to eliminate repetitive open/close patterns""" + + def open_process(*args): + process_var = globals().get(f"p_{process_name}") + + if process_var is None: + cmd = cmd_builder.build(script_path, *(additional_args or [])) + + if process_manager.create_process(process_name, cmd, wait=wait, env_updates=env_updates): + yield create_ui_yield(process_display_name, "opened") + globals()[f"p_{process_name}"] = process_manager.get_process_status(process_name) + else: + yield create_ui_yield(process_display_name, "occupy") + else: + yield create_ui_yield(process_display_name, "occupy") + + def close_process(): + if process_manager.kill_process(process_name, process_display_name): + globals()[f"p_{process_name}"] = None + return create_ui_yield(process_display_name, "closed") + + return open_process, close_process + +set_default() + +gpus = "-".join(map(str, GPU_INDEX)) +default_gpu_numbers = infer_device.index + + +def fix_gpu_number(input): # 将越界的number强制改到界内 + try: + if int(input) not in set_gpu_numbers: + return default_gpu_numbers + except: + return input + return input + + +def fix_gpu_numbers(inputs): + output = [] + try: + for input in inputs.split(","): + output.append(str(fix_gpu_number(input))) + return ",".join(output) + except: + return inputs + + +from config import pretrained_gpt_name, pretrained_sovits_name + + +def check_pretrained_is_exist(version): + pretrained_model_list = ( + pretrained_sovits_name[version], + pretrained_sovits_name[version].replace("s2G", "s2D"), + pretrained_gpt_name[version], + "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + "GPT_SoVITS/pretrained_models/chinese-hubert-base", + ) + _ = "" + for i in pretrained_model_list: + if "s2Dv3" not in i and "s2Dv4" not in i and os.path.exists(i) == False: + _ += f"\n {i}" + if _: + print("warning: ", i18n("以下模型不存在:") + _) + + +check_pretrained_is_exist(version) +for key in pretrained_sovits_name.keys(): + if os.path.exists(pretrained_sovits_name[key]) == False: + pretrained_sovits_name[key] = "" +for key in pretrained_gpt_name.keys(): + if os.path.exists(pretrained_gpt_name[key]) == False: + pretrained_gpt_name[key] = "" + +from config import ( + GPT_weight_root, + GPT_weight_version2root, + SoVITS_weight_root, + SoVITS_weight_version2root, + change_choices, + get_weights_names, +) + +for root in SoVITS_weight_root + GPT_weight_root: + os.makedirs(root, exist_ok=True) +SoVITS_names, GPT_names = get_weights_names() + +p_label = None +p_uvr5 = None +p_asr = None +p_denoise = None +p_tts_inference = None + + +def kill_proc_tree(pid, including_parent=True): + try: + parent = psutil.Process(pid) + except psutil.NoSuchProcess: + # Process already terminated + return + + children = parent.children(recursive=True) + for child in children: + try: + os.kill(child.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + if including_parent: + try: + os.kill(parent.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + + +system = platform.system() + + +def kill_process(pid, process_name=""): + if system == "Windows": + cmd = f"taskkill /t /f /pid {pid}" + # os.system(cmd) + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + else: + kill_proc_tree(pid) + print(process_name + i18n("进程已终止")) + + +def process_info(process_name="", indicator=""): + if indicator == "opened": + return process_name + i18n("已开启") + elif indicator == "open": + return i18n("开启") + process_name + elif indicator == "closed": + return process_name + i18n("已关闭") + elif indicator == "close": + return i18n("关闭") + process_name + elif indicator == "running": + return process_name + i18n("运行中") + elif indicator == "occupy": + return process_name + i18n("占用中") + "," + i18n("需先终止才能开启下一次任务") + elif indicator == "finish": + return process_name + i18n("已完成") + elif indicator == "failed": + return process_name + i18n("失败") + elif indicator == "info": + return process_name + i18n("进程输出信息") + else: + return process_name + + +process_name_subfix = i18n("音频标注WebUI") + + +def change_label(path_list): + global p_label + if p_label is None: + check_for_existance([path_list]) + path_list = my_utils.clean_path(path_list) + cmd = cmd_builder.build_with_paths("tools/subfix_webui.py", path_list) + f" --webui_port {webui_port_subfix} --is_share {is_share}" + + if process_manager.create_process("label", cmd): + yield create_ui_yield(process_name_subfix, "opened") + p_label = process_manager.get_process_status("label") + else: + yield create_ui_yield(process_name_subfix, "occupy") + else: + process_manager.kill_process("label", process_name_subfix) + p_label = None + yield create_ui_yield(process_name_subfix, "closed") + + +process_name_uvr5 = i18n("人声分离WebUI") + + +def change_uvr5(): + global p_uvr5 + if p_uvr5 is None: + cmd = cmd_builder.build("tools/uvr5/webui.py", infer_device, is_half, webui_port_uvr5, is_share) + + if process_manager.create_process("uvr5", cmd): + yield create_ui_yield(process_name_uvr5, "opened") + p_uvr5 = process_manager.get_process_status("uvr5") + else: + yield create_ui_yield(process_name_uvr5, "occupy") + else: + process_manager.kill_process("uvr5", process_name_uvr5) + p_uvr5 = None + yield create_ui_yield(process_name_uvr5, "closed") + + +process_name_tts = i18n("TTS推理WebUI") + + +def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, sovits_path, batched_infer_enabled): + global p_tts_inference + script = "GPT_SoVITS/inference_webui_fast.py" if batched_infer_enabled else "GPT_SoVITS/inference_webui.py" + cmd = cmd_builder.build_with_paths(script, language) + + if p_tts_inference is None: + env_updates = { + "gpt_path": gpt_path, + "sovits_path": sovits_path, + "cnhubert_base_path": cnhubert_base_path, + "bert_path": bert_path, + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_number), + "is_half": str(is_half), + "infer_ttswebui": str(webui_port_infer_tts), + "is_share": str(is_share), + } + + if process_manager.create_process("tts_inference", cmd, env_updates=env_updates): + yield create_ui_yield(process_name_tts, "opened") + p_tts_inference = process_manager.get_process_status("tts_inference") + else: + yield create_ui_yield(process_name_tts, "occupy") + else: + process_manager.kill_process("tts_inference", process_name_tts) + p_tts_inference = None + yield create_ui_yield(process_name_tts, "closed") + + +from tools.asr.config import asr_dict + +process_name_asr = i18n("语音识别") + + +def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision): + global p_asr + if p_asr is None: + asr_inp_dir = my_utils.clean_path(asr_inp_dir) + asr_opt_dir = my_utils.clean_path(asr_opt_dir) + check_for_existance([asr_inp_dir]) + + cmd = cmd_builder.build("tools/asr/" + asr_dict[asr_model]["path"]) + cmd += f' -i "{asr_inp_dir}" -o "{asr_opt_dir}" -s {asr_model_size} -l {asr_lang} -p {asr_precision}' + + output_file_name = os.path.basename(asr_inp_dir) + output_folder = asr_opt_dir or "output/asr_opt" + output_file_path = os.path.abspath(f"{output_folder}/{output_file_name}.list") + + yield create_ui_yield(process_name_asr, "opened") + ({"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}) + + if process_manager.create_process("asr", cmd, wait=True): + yield create_ui_yield(process_name_asr, "finish") + ( + {"__type__": "update", "value": output_file_path}, + {"__type__": "update", "value": output_file_path}, + {"__type__": "update", "value": asr_inp_dir}, + ) + p_asr = None + else: + yield create_ui_yield(process_name_asr, "occupy") + ({"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}) + + +def close_asr(): + global p_asr + if process_manager.kill_process("asr", process_name_asr): + p_asr = None + return create_ui_yield(process_name_asr, "closed") + + +process_name_denoise = i18n("语音降噪") + + +def open_denoise(denoise_inp_dir, denoise_opt_dir): + global p_denoise + if p_denoise == None: + denoise_inp_dir = my_utils.clean_path(denoise_inp_dir) + denoise_opt_dir = my_utils.clean_path(denoise_opt_dir) + check_for_existance([denoise_inp_dir]) + + precision = "float16" if is_half == True else "float32" + cmd = cmd_builder.build_with_paths("tools/cmd-denoise.py", denoise_inp_dir, denoise_opt_dir) + f" -p {precision}" + + yield create_ui_yield(process_name_denoise, "opened") + ({"__type__": "update"}, {"__type__": "update"}) + + if process_manager.create_process("denoise", cmd, wait=True): + yield create_ui_yield(process_name_denoise, "finish") + ( + {"__type__": "update", "value": denoise_opt_dir}, + {"__type__": "update", "value": denoise_opt_dir}, + ) + p_denoise = None + else: + yield create_ui_yield(process_name_denoise, "occupy") + ({"__type__": "update"}, {"__type__": "update"}) + + +def close_denoise(): + global p_denoise + if process_manager.kill_process("denoise", process_name_denoise): + p_denoise = None + return create_ui_yield(process_name_denoise, "closed") + + +p_train_SoVITS = None +process_name_sovits = i18n("SoVITS训练") + + +def open1Ba( + version, + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, +): + global p_train_SoVITS + if p_train_SoVITS == None: + exp_name = exp_name.rstrip(" ") + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + with open(config_file) as f: + data = f.read() + data = json.loads(data) + s2_dir = f"{exp_root}/{exp_name}" + os.makedirs(f"{s2_dir}/logs_s2_{version}", exist_ok=True) + if check_for_existance([s2_dir], is_train=True): + check_details([s2_dir], is_train=True) + if is_half == False: + data["train"]["fp16_run"] = False + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"] = batch_size + data["train"]["epochs"] = total_epoch + data["train"]["text_low_lr_rate"] = text_low_lr_rate + data["train"]["pretrained_s2G"] = pretrained_s2G + data["train"]["pretrained_s2D"] = pretrained_s2D + data["train"]["if_save_latest"] = if_save_latest + data["train"]["if_save_every_weights"] = if_save_every_weights + data["train"]["save_every_epoch"] = save_every_epoch + data["train"]["gpu_numbers"] = gpu_numbers1Ba + data["train"]["grad_ckpt"] = if_grad_ckpt + data["train"]["lora_rank"] = lora_rank + data["model"]["version"] = version + data["data"]["exp_dir"] = data["s2_ckpt_dir"] = s2_dir + data["save_weight_dir"] = SoVITS_weight_version2root[version] + data["name"] = exp_name + data["version"] = version + tmp_config_path = f"{tmp}/tmp_s2.json" + with open(tmp_config_path, "w") as f: + f.write(json.dumps(data)) + if version in ["v1", "v2", "v2Pro", "v2ProPlus"]: + cmd = f'"{python_exec}" -s GPT_SoVITS/s2_train.py --config "{tmp_config_path}"' + else: + cmd = f'"{python_exec}" -s GPT_SoVITS/s2_train_v3_lora.py --config "{tmp_config_path}"' + yield ( + process_info(process_name_sovits, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_train_SoVITS = Popen(cmd, shell=True) + p_train_SoVITS.wait() + p_train_SoVITS = None + SoVITS_dropdown_update, GPT_dropdown_update = change_choices() + yield ( + process_info(process_name_sovits, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + SoVITS_dropdown_update, + GPT_dropdown_update, + ) + else: + yield ( + process_info(process_name_sovits, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close1Ba(): + global p_train_SoVITS + if p_train_SoVITS is not None: + kill_process(p_train_SoVITS.pid, process_name_sovits) + p_train_SoVITS = None + return ( + process_info(process_name_sovits, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +p_train_GPT = None +process_name_gpt = i18n("GPT训练") + + +def open1Bb( + batch_size, + total_epoch, + exp_name, + if_dpo, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers, + pretrained_s1, +): + global p_train_GPT + if p_train_GPT == None: + exp_name = exp_name.rstrip(" ") + with open( + "GPT_SoVITS/configs/s1longer.yaml" if version == "v1" else "GPT_SoVITS/configs/s1longer-v2.yaml" + ) as f: + data = f.read() + data = yaml.load(data, Loader=yaml.FullLoader) + s1_dir = f"{exp_root}/{exp_name}" + os.makedirs(f"{s1_dir}/logs_s1", exist_ok=True) + if check_for_existance([s1_dir], is_train=True): + check_details([s1_dir], is_train=True) + if is_half == False: + data["train"]["precision"] = "32" + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"] = batch_size + data["train"]["epochs"] = total_epoch + data["pretrained_s1"] = pretrained_s1 + data["train"]["save_every_n_epoch"] = save_every_epoch + data["train"]["if_save_every_weights"] = if_save_every_weights + data["train"]["if_save_latest"] = if_save_latest + data["train"]["if_dpo"] = if_dpo + data["train"]["half_weights_save_dir"] = GPT_weight_version2root[version] + data["train"]["exp_name"] = exp_name + data["train_semantic_path"] = f"{s1_dir}/6-name2semantic.tsv" + data["train_phoneme_path"] = f"{s1_dir}/2-name2text.txt" + data["output_dir"] = f"{s1_dir}/logs_s1_{version}" + # data["version"]=version + + os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_numbers(gpu_numbers.replace("-", ",")) + os.environ["hz"] = "25hz" + tmp_config_path = f"{tmp}/tmp_s1.yaml" + with open(tmp_config_path, "w") as f: + f.write(yaml.dump(data, default_flow_style=False)) + # cmd = f'"{python_exec}" GPT_SoVITS/s1_train.py --config_file "{tmp_config_path}" --train_semantic_path "{s1_dir}/6-name2semantic.tsv" --train_phoneme_path "{s1_dir}/2-name2text.txt" --output_dir "{s1_dir}/logs_s1"' + cmd = f'"{python_exec}" -s GPT_SoVITS/s1_train.py --config_file "{tmp_config_path}" ' + yield ( + process_info(process_name_gpt, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + print(cmd) + p_train_GPT = Popen(cmd, shell=True) + p_train_GPT.wait() + p_train_GPT = None + SoVITS_dropdown_update, GPT_dropdown_update = change_choices() + yield ( + process_info(process_name_gpt, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + SoVITS_dropdown_update, + GPT_dropdown_update, + ) + else: + yield ( + process_info(process_name_gpt, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close1Bb(): + global p_train_GPT + if p_train_GPT is not None: + kill_process(p_train_GPT.pid, process_name_gpt) + p_train_GPT = None + return ( + process_info(process_name_gpt, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps_slice = [] +process_name_slice = i18n("语音切分") + + +def open_slice(inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_parts): + global ps_slice + inp = my_utils.clean_path(inp) + opt_root = my_utils.clean_path(opt_root) + check_for_existance([inp]) + if os.path.exists(inp) == False: + yield ( + i18n("输入路径不存在"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + return + if os.path.isfile(inp): + n_parts = 1 + elif os.path.isdir(inp): + pass + else: + yield ( + i18n("输入路径存在但不可用"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + return + if ps_slice == []: + for i_part in range(n_parts): + cmd = f'"{python_exec}" -s tools/slice_audio.py "{inp}" "{opt_root}" {threshold} {min_length} {min_interval} {hop_size} {max_sil_kept} {_max} {alpha} {i_part} {n_parts}' + print(cmd) + p = Popen(cmd, shell=True) + ps_slice.append(p) + yield ( + process_info(process_name_slice, "opened"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + for p in ps_slice: + p.wait() + ps_slice = [] + yield ( + process_info(process_name_slice, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + {"__type__": "update", "value": opt_root}, + {"__type__": "update", "value": opt_root}, + {"__type__": "update", "value": opt_root}, + ) + else: + yield ( + process_info(process_name_slice, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + {"__type__": "update"}, + {"__type__": "update"}, + {"__type__": "update"}, + ) + + +def close_slice(): + global ps_slice + if ps_slice != []: + for p_slice in ps_slice: + try: + kill_process(p_slice.pid, process_name_slice) + except: + traceback.print_exc() + ps_slice = [] + return ( + process_info(process_name_slice, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1a = [] +process_name_1a = i18n("文本分词与特征提取") + + +def open1a(inp_text, inp_wav_dir, exp_name, gpu_numbers, bert_pretrained_dir): + global ps1a + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + exp_name = exp_name.rstrip(" ") + if ps1a == []: + opt_dir = f"{exp_root}/{exp_name}" + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "bert_pretrained_dir": bert_pretrained_dir, + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + "is_half": str(is_half), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/1-get-text.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1a.append(p) + yield ( + process_info(process_name_1a, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1a: + p.wait() + opt = [] + for i_part in range(all_parts): + txt_path = f"{opt_dir}/2-name2text-{i_part}.txt" + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + path_text = f"{opt_dir}/2-name2text.txt" + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1a = [] + if len("".join(opt)) > 0: + yield ( + process_info(process_name_1a, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1a, "failed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1a, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1a(): + global ps1a + if ps1a != []: + for p1a in ps1a: + try: + kill_process(p1a.pid, process_name_1a) + except: + traceback.print_exc() + ps1a = [] + return ( + process_info(process_name_1a, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +sv_path = "GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt" +ps1b = [] +process_name_1b = i18n("语音自监督特征提取") + + +def open1b(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir): + global ps1b + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + exp_name = exp_name.rstrip(" ") + if ps1b == []: + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": f"{exp_root}/{exp_name}", + "cnhubert_base_dir": ssl_pretrained_dir, + "sv_path": sv_path, + "is_half": str(is_half), + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + yield ( + process_info(process_name_1b, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1b: + p.wait() + ps1b = [] + if "Pro" in version: + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + for p in ps1b: + p.wait() + ps1b = [] + yield ( + process_info(process_name_1b, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1b, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1b(): + global ps1b + if ps1b != []: + for p1b in ps1b: + try: + kill_process(p1b.pid, process_name_1b) + except: + traceback.print_exc() + ps1b = [] + return ( + process_info(process_name_1b, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1c = [] +process_name_1c = i18n("语义Token提取") + + +def open1c(version, inp_text, inp_wav_dir, exp_name, gpu_numbers, pretrained_s2G_path): + global ps1c + inp_text = my_utils.clean_path(inp_text) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + exp_name = exp_name.rstrip(" ") + if ps1c == []: + opt_dir = f"{exp_root}/{exp_name}" + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + config = { + "inp_text": inp_text, + "exp_name": exp_name, + "opt_dir": opt_dir, + "pretrained_s2G": pretrained_s2G_path, + "s2config_path": config_file, + "is_half": str(is_half), + } + gpu_names = gpu_numbers.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1c.append(p) + yield ( + process_info(process_name_1c, "running"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1c: + p.wait() + opt = ["item_name\tsemantic_audio"] + path_semantic = f"{opt_dir}/6-name2semantic.tsv" + for i_part in range(all_parts): + semantic_path = f"{opt_dir}/6-name2semantic-{i_part}.tsv" + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1c = [] + yield ( + process_info(process_name_1c, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1c, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1c(): + global ps1c + if ps1c != []: + for p1c in ps1c: + try: + kill_process(p1c.pid, process_name_1c) + except: + traceback.print_exc() + ps1c = [] + return ( + process_info(process_name_1c, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +ps1abc = [] +process_name_1abc = i18n("训练集格式化一键三连") + + +def open1abc( + version, + inp_text, + inp_wav_dir, + exp_name, + gpu_numbers1a, + gpu_numbers1Ba, + gpu_numbers1c, + bert_pretrained_dir, + ssl_pretrained_dir, + pretrained_s2G_path, +): + global ps1abc + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if check_for_existance([inp_text, inp_wav_dir], is_dataset_processing=True): + check_details([inp_text, inp_wav_dir], is_dataset_processing=True) + exp_name = exp_name.rstrip(" ") + if ps1abc == []: + opt_dir = f"{exp_root}/{exp_name}" + try: + #############################1a + path_text = f"{opt_dir}/2-name2text.txt" + if os.path.exists(path_text) == False or ( + os.path.exists(path_text) == True + and len(open(path_text, "r", encoding="utf8").read().strip("\n").split("\n")) < 2 + ): + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "bert_pretrained_dir": bert_pretrained_dir, + "is_half": str(is_half), + } + gpu_names = gpu_numbers1a.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/1-get-text.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + + opt = [] + for i_part in range(all_parts): # txt_path=f"{opt_dir}/2-name2text-{i_part}.txt" + txt_path = f"{opt_dir}/2-name2text-{i_part}.txt" + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + assert len("".join(opt)) > 0, process_info(process_name_1a, "failed") + yield ( + i18n("进度") + ": 1A-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + #############################1b + config = { + "inp_text": inp_text, + "inp_wav_dir": inp_wav_dir, + "exp_name": exp_name, + "opt_dir": opt_dir, + "cnhubert_base_dir": ssl_pretrained_dir, + "sv_path": sv_path, + } + gpu_names = gpu_numbers1Ba.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Done, 1B-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + ps1abc = [] + if "Pro" in version: + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/2-get-sv.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + for p in ps1abc: + p.wait() + ps1abc = [] + yield ( + i18n("进度") + ": 1A-Done, 1B-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + #############################1c + path_semantic = f"{opt_dir}/6-name2semantic.tsv" + if os.path.exists(path_semantic) == False or ( + os.path.exists(path_semantic) == True and os.path.getsize(path_semantic) < 31 + ): + config_file = ( + "GPT_SoVITS/configs/s2.json" + if version not in {"v2Pro", "v2ProPlus"} + else f"GPT_SoVITS/configs/s2{version}.json" + ) + config = { + "inp_text": inp_text, + "exp_name": exp_name, + "opt_dir": opt_dir, + "pretrained_s2G": pretrained_s2G_path, + "s2config_path": config_file, + } + gpu_names = gpu_numbers1c.split("-") + all_parts = len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": fix_gpu_number(gpu_names[i_part]), + } + ) + os.environ.update(config) + cmd = f'"{python_exec}" -s GPT_SoVITS/prepare_datasets/3-get-semantic.py' + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield ( + i18n("进度") + ": 1A-Done, 1B-Done, 1C-Doing", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + for p in ps1abc: + p.wait() + + opt = ["item_name\tsemantic_audio"] + for i_part in range(all_parts): + semantic_path = f"{opt_dir}/6-name2semantic-{i_part}.tsv" + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + yield ( + i18n("进度") + ": 1A-Done, 1B-Done, 1C-Done", + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + ps1abc = [] + yield ( + process_info(process_name_1abc, "finish"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + except: + traceback.print_exc() + close1abc() + yield ( + process_info(process_name_1abc, "failed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + else: + yield ( + process_info(process_name_1abc, "occupy"), + {"__type__": "update", "visible": False}, + {"__type__": "update", "visible": True}, + ) + + +def close1abc(): + global ps1abc + if ps1abc != []: + for p1abc in ps1abc: + try: + kill_process(p1abc.pid, process_name_1abc) + except: + traceback.print_exc() + ps1abc = [] + return ( + process_info(process_name_1abc, "closed"), + {"__type__": "update", "visible": True}, + {"__type__": "update", "visible": False}, + ) + + +def switch_version(version_): + os.environ["version"] = version_ + global version + version = version_ + if pretrained_sovits_name[version] != "" and pretrained_gpt_name[version] != "": + ... + else: + gr.Warning(i18n("未下载模型") + ": " + version.upper()) + set_default() + return ( + {"__type__": "update", "value": pretrained_sovits_name[version]}, + {"__type__": "update", "value": pretrained_sovits_name[version].replace("s2G", "s2D")}, + {"__type__": "update", "value": pretrained_gpt_name[version]}, + {"__type__": "update", "value": pretrained_gpt_name[version]}, + {"__type__": "update", "value": pretrained_sovits_name[version]}, + {"__type__": "update", "value": default_batch_size, "maximum": default_max_batch_size}, + {"__type__": "update", "value": default_sovits_epoch, "maximum": max_sovits_epoch}, + {"__type__": "update", "value": default_sovits_save_every_epoch, "maximum": max_sovits_save_every_epoch}, + {"__type__": "update", "visible": True if version not in v3v4set else False}, + { + "__type__": "update", + "value": False if not if_force_ckpt else True, + "interactive": True if not if_force_ckpt else False, + }, + {"__type__": "update", "interactive": True, "value": False}, + {"__type__": "update", "visible": True if version in v3v4set else False}, + ) # {'__type__': 'update', "interactive": False if version in v3v4set else True, "value": False}, \ ####batch infer + + +if os.path.exists("GPT_SoVITS/text/G2PWModel"): + ... +else: + cmd = f'"{python_exec}" -s GPT_SoVITS/download.py' + p = Popen(cmd, shell=True) + p.wait() + + +def sync(text): + return {"__type__": "update", "value": text} + + +with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app: + gr.HTML( + top_html.format( + i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + ), + elem_classes="markdown", + ) + + with gr.Tabs(): + with gr.TabItem("0-" + i18n("前置数据集获取工具")): # 提前随机切片防止uvr5爆内存->uvr5->slicer->asr->打标 + with gr.Accordion(label="0a-" + i18n("UVR5人声伴奏分离&去混响去延迟工具")): + with gr.Row(): + with gr.Column(scale=3): + with gr.Row(): + uvr5_info = gr.Textbox(label=process_info(process_name_uvr5, "info")) + open_uvr5 = gr.Button( + value=process_info(process_name_uvr5, "open"), variant="primary", visible=True + ) + close_uvr5 = gr.Button( + value=process_info(process_name_uvr5, "close"), variant="primary", visible=False + ) + + with gr.Accordion(label="0b-" + i18n("语音切分工具")): + with gr.Row(): + with gr.Column(scale=3): + with gr.Row(): + slice_inp_path = gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"), value="") + slice_opt_root = gr.Textbox( + label=i18n("切分后的子音频的输出根目录"), value="output/slicer_opt" + ) + with gr.Row(): + threshold = gr.Textbox( + label=i18n("threshold:音量小于这个值视作静音的备选切割点"), value="-34" + ) + min_length = gr.Textbox( + label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"), + value="4000", + ) + min_interval = gr.Textbox(label=i18n("min_interval:最短切割间隔"), value="300") + hop_size = gr.Textbox( + label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"), + value="10", + ) + max_sil_kept = gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"), value="500") + with gr.Row(): + _max = gr.Slider( + minimum=0, + maximum=1, + step=0.05, + label=i18n("max:归一化后最大值多少"), + value=0.9, + interactive=True, + ) + alpha = gr.Slider( + minimum=0, + maximum=1, + step=0.05, + label=i18n("alpha_mix:混多少比例归一化后音频进来"), + value=0.25, + interactive=True, + ) + with gr.Row(): + n_process = gr.Slider( + minimum=1, + maximum=n_cpu, + step=1, + label=i18n("切割使用的进程数"), + value=4, + interactive=True, + ) + slicer_info = gr.Textbox(label=process_info(process_name_slice, "info")) + open_slicer_button = gr.Button( + value=process_info(process_name_slice, "open"), variant="primary", visible=True + ) + close_slicer_button = gr.Button( + value=process_info(process_name_slice, "close"), variant="primary", visible=False + ) + + # gr.Markdown(value="0bb-" + i18n("语音降噪工具")+i18n("(不稳定,先别用,可能劣化模型效果!)")) + with gr.Row(visible=False): + with gr.Column(scale=3): + with gr.Row(): + denoise_input_dir = gr.Textbox(label=i18n("输入文件夹路径"), value="") + denoise_output_dir = gr.Textbox(label=i18n("输出文件夹路径"), value="output/denoise_opt") + with gr.Row(): + denoise_info = gr.Textbox(label=process_info(process_name_denoise, "info")) + open_denoise_button = gr.Button( + value=process_info(process_name_denoise, "open"), variant="primary", visible=True + ) + close_denoise_button = gr.Button( + value=process_info(process_name_denoise, "close"), variant="primary", visible=False + ) + + with gr.Accordion(label="0c-" + i18n("语音识别工具")): + with gr.Row(): + with gr.Column(scale=3): + with gr.Row(): + asr_inp_dir = gr.Textbox( + label=i18n("输入文件夹路径"), value="D:\\GPT-SoVITS\\raw\\xxx", interactive=True + ) + asr_opt_dir = gr.Textbox( + label=i18n("输出文件夹路径"), value="output/asr_opt", interactive=True + ) + with gr.Row(): + asr_model = gr.Dropdown( + label=i18n("ASR 模型"), + choices=list(asr_dict.keys()), + interactive=True, + value="达摩 ASR (中文)", + ) + asr_size = gr.Dropdown( + label=i18n("ASR 模型尺寸"), choices=["large"], interactive=True, value="large" + ) + asr_lang = gr.Dropdown( + label=i18n("ASR 语言设置"), choices=["zh", "yue"], interactive=True, value="zh" + ) + asr_precision = gr.Dropdown( + label=i18n("数据类型精度"), choices=["float32"], interactive=True, value="float32" + ) + with gr.Row(): + asr_info = gr.Textbox(label=process_info(process_name_asr, "info")) + open_asr_button = gr.Button( + value=process_info(process_name_asr, "open"), variant="primary", visible=True + ) + close_asr_button = gr.Button( + value=process_info(process_name_asr, "close"), variant="primary", visible=False + ) + + def change_lang_choices(key): # 根据选择的模型修改可选的语言 + return {"__type__": "update", "choices": asr_dict[key]["lang"], "value": asr_dict[key]["lang"][0]} + + def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 + return {"__type__": "update", "choices": asr_dict[key]["size"], "value": asr_dict[key]["size"][-1]} + + def change_precision_choices(key): # 根据选择的模型修改可选的语言 + if key == "Faster Whisper (多语种)": + if default_batch_size <= 4: + precision = "int8" + elif is_half: + precision = "float16" + else: + precision = "float32" + else: + precision = "float32" + return {"__type__": "update", "choices": asr_dict[key]["precision"], "value": precision} + + asr_model.change(change_lang_choices, [asr_model], [asr_lang]) + asr_model.change(change_size_choices, [asr_model], [asr_size]) + asr_model.change(change_precision_choices, [asr_model], [asr_precision]) + + with gr.Accordion(label="0d-" + i18n("语音文本校对标注工具")): + with gr.Row(): + with gr.Column(scale=3): + with gr.Row(): + path_list = gr.Textbox( + label=i18n("标注文件路径 (含文件后缀 *.list)"), + value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list", + interactive=True, + ) + label_info = gr.Textbox(label=process_info(process_name_subfix, "info")) + open_label = gr.Button( + value=process_info(process_name_subfix, "open"), variant="primary", visible=True + ) + close_label = gr.Button( + value=process_info(process_name_subfix, "close"), variant="primary", visible=False + ) + + open_label.click(change_label, [path_list], [label_info, open_label, close_label]) + close_label.click(change_label, [path_list], [label_info, open_label, close_label]) + open_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) + close_uvr5.click(change_uvr5, [], [uvr5_info, open_uvr5, close_uvr5]) + + with gr.TabItem(i18n("1-GPT-SoVITS-TTS")): + with gr.Accordion(i18n("微调模型信息")): + with gr.Row(): + with gr.Row(equal_height=True): + exp_name = gr.Textbox( + label=i18n("*实验/模型名"), + value="xxx", + interactive=True, + scale=3, + ) + gpu_info_box = gr.Textbox( + label=i18n("显卡信息"), + value=gpu_info, + visible=True, + interactive=False, + scale=5, + ) + version_checkbox = gr.Radio( + label=i18n("训练模型的版本"), + value=version, + choices=["v1", "v2", "v4", "v2Pro", "v2ProPlus"], + scale=5, + ) + with gr.Accordion(label=i18n("预训练模型路径"), open=False): + with gr.Row(): + with gr.Row(equal_height=True): + pretrained_s1 = gr.Textbox( + label=i18n("预训练GPT模型路径"), + value=pretrained_gpt_name[version], + interactive=True, + lines=1, + max_lines=1, + scale=3, + ) + pretrained_s2G = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[version], + interactive=True, + lines=1, + max_lines=1, + scale=5, + ) + pretrained_s2D = gr.Textbox( + label=i18n("预训练SoVITS-D模型路径"), + value=pretrained_sovits_name[version].replace("s2G", "s2D"), + interactive=True, + lines=1, + max_lines=1, + scale=5, + ) + + with gr.TabItem("1A-" + i18n("训练集格式化工具")): + with gr.Accordion(label=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹")): + with gr.Row(): + with gr.Row(): + inp_text = gr.Textbox( + label=i18n("*文本标注文件"), + value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list", + interactive=True, + scale=10, + ) + with gr.Row(): + inp_wav_dir = gr.Textbox( + label=i18n("*训练集音频文件目录"), + # value=r"D:\RVC1006\GPT-SoVITS\raw\xxx", + interactive=True, + placeholder=i18n( + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。" + ), + scale=10, + ) + + with gr.Accordion(label="1Aa-" + process_name_1a): + with gr.Row(): + with gr.Row(): + gpu_numbers1a = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value=f"{gpus}-{gpus}", + interactive=True, + ) + with gr.Row(): + bert_pretrained_dir = gr.Textbox( + label=i18n("预训练中文BERT模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", + interactive=False, + lines=2, + ) + with gr.Row(): + button1a_open = gr.Button( + value=process_info(process_name_1a, "open"), variant="primary", visible=True + ) + button1a_close = gr.Button( + value=process_info(process_name_1a, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1a = gr.Textbox(label=process_info(process_name_1a, "info")) + + with gr.Accordion(label="1Ab-" + process_name_1b): + with gr.Row(): + with gr.Row(): + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value=f"{gpus}-{gpus}", + interactive=True, + ) + with gr.Row(): + cnhubert_base_dir = gr.Textbox( + label=i18n("预训练SSL模型路径"), + value="GPT_SoVITS/pretrained_models/chinese-hubert-base", + interactive=False, + lines=2, + ) + with gr.Row(): + button1b_open = gr.Button( + value=process_info(process_name_1b, "open"), variant="primary", visible=True + ) + button1b_close = gr.Button( + value=process_info(process_name_1b, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1b = gr.Textbox(label=process_info(process_name_1b, "info")) + + with gr.Accordion(label="1Ac-" + process_name_1c): + with gr.Row(): + with gr.Row(): + gpu_numbers1c = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value=f"{gpus}-{gpus}", + interactive=True, + ) + with gr.Row(): + pretrained_s2G_ = gr.Textbox( + label=i18n("预训练SoVITS-G模型路径"), + value=pretrained_sovits_name[version], + interactive=False, + lines=2, + ) + with gr.Row(): + button1c_open = gr.Button( + value=process_info(process_name_1c, "open"), variant="primary", visible=True + ) + button1c_close = gr.Button( + value=process_info(process_name_1c, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1c = gr.Textbox(label=process_info(process_name_1c, "info")) + + with gr.Accordion(label="1Aabc-" + process_name_1abc): + with gr.Row(): + with gr.Row(): + button1abc_open = gr.Button( + value=process_info(process_name_1abc, "open"), variant="primary", visible=True + ) + button1abc_close = gr.Button( + value=process_info(process_name_1abc, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1abc = gr.Textbox(label=process_info(process_name_1abc, "info")) + + pretrained_s2G.change(sync, [pretrained_s2G], [pretrained_s2G_]) + open_asr_button.click( + open_asr, + [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], + [asr_info, open_asr_button, close_asr_button, path_list, inp_text, inp_wav_dir], + ) + close_asr_button.click(close_asr, [], [asr_info, open_asr_button, close_asr_button]) + open_slicer_button.click( + open_slice, + [ + slice_inp_path, + slice_opt_root, + threshold, + min_length, + min_interval, + hop_size, + max_sil_kept, + _max, + alpha, + n_process, + ], + [slicer_info, open_slicer_button, close_slicer_button, asr_inp_dir, denoise_input_dir, inp_wav_dir], + ) + close_slicer_button.click(close_slice, [], [slicer_info, open_slicer_button, close_slicer_button]) + open_denoise_button.click( + open_denoise, + [denoise_input_dir, denoise_output_dir], + [denoise_info, open_denoise_button, close_denoise_button, asr_inp_dir, inp_wav_dir], + ) + close_denoise_button.click(close_denoise, [], [denoise_info, open_denoise_button, close_denoise_button]) + + button1a_open.click( + open1a, + [inp_text, inp_wav_dir, exp_name, gpu_numbers1a, bert_pretrained_dir], + [info1a, button1a_open, button1a_close], + ) + button1a_close.click(close1a, [], [info1a, button1a_open, button1a_close]) + button1b_open.click( + open1b, + [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1Ba, cnhubert_base_dir], + [info1b, button1b_open, button1b_close], + ) + button1b_close.click(close1b, [], [info1b, button1b_open, button1b_close]) + button1c_open.click( + open1c, + [version_checkbox, inp_text, inp_wav_dir, exp_name, gpu_numbers1c, pretrained_s2G], + [info1c, button1c_open, button1c_close], + ) + button1c_close.click(close1c, [], [info1c, button1c_open, button1c_close]) + button1abc_open.click( + open1abc, + [ + version_checkbox, + inp_text, + inp_wav_dir, + exp_name, + gpu_numbers1a, + gpu_numbers1Ba, + gpu_numbers1c, + bert_pretrained_dir, + cnhubert_base_dir, + pretrained_s2G, + ], + [info1abc, button1abc_open, button1abc_close], + ) + button1abc_close.click(close1abc, [], [info1abc, button1abc_open, button1abc_close]) + + with gr.TabItem("1B-" + i18n("微调训练")): + with gr.Accordion(label="1Ba-" + i18n("SoVITS 训练: 模型权重文件在 SoVITS_weights/")): + with gr.Row(): + with gr.Column(): + with gr.Row(): + batch_size = gr.Slider( + minimum=1, + maximum=default_max_batch_size, + step=1, + label=i18n("每张显卡的batch_size"), + value=default_batch_size, + interactive=True, + ) + total_epoch = gr.Slider( + minimum=1, + maximum=max_sovits_epoch, + step=1, + label=i18n("总训练轮数total_epoch,不建议太高"), + value=default_sovits_epoch, + interactive=True, + ) + with gr.Row(): + text_low_lr_rate = gr.Slider( + minimum=0.2, + maximum=0.6, + step=0.05, + label=i18n("文本模块学习率权重"), + value=0.4, + visible=True if version not in v3v4set else False, + ) # v3v4 not need + lora_rank = gr.Radio( + label=i18n("LoRA秩"), + value="32", + choices=["16", "32", "64", "128"], + visible=True if version in v3v4set else False, + ) # v1v2 not need + save_every_epoch = gr.Slider( + minimum=1, + maximum=max_sovits_save_every_epoch, + step=1, + label=i18n("保存频率save_every_epoch"), + value=default_sovits_save_every_epoch, + interactive=True, + ) + with gr.Column(): + with gr.Column(): + if_save_latest = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) + if_grad_ckpt = gr.Checkbox( + label="v3是否开启梯度检查点节省显存占用", + value=False, + interactive=True if version in v3v4set else False, + show_label=True, + visible=False, + ) # 只有V3s2可以用 + with gr.Row(): + gpu_numbers1Ba = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value=f"{gpus}", + interactive=True, + ) + with gr.Row(): + with gr.Row(): + button1Ba_open = gr.Button( + value=process_info(process_name_sovits, "open"), variant="primary", visible=True + ) + button1Ba_close = gr.Button( + value=process_info(process_name_sovits, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1Ba = gr.Textbox(label=process_info(process_name_sovits, "info")) + with gr.Accordion(label="1Bb-" + i18n("GPT 训练: 模型权重文件在 GPT_weights/")): + with gr.Row(): + with gr.Column(): + with gr.Row(): + batch_size1Bb = gr.Slider( + minimum=1, + maximum=40, + step=1, + label=i18n("每张显卡的batch_size"), + value=default_batch_size_s1, + interactive=True, + ) + total_epoch1Bb = gr.Slider( + minimum=2, + maximum=50, + step=1, + label=i18n("总训练轮数total_epoch"), + value=15, + interactive=True, + ) + with gr.Row(): + save_every_epoch1Bb = gr.Slider( + minimum=1, + maximum=50, + step=1, + label=i18n("保存频率save_every_epoch"), + value=5, + interactive=True, + ) + if_dpo = gr.Checkbox( + label=i18n("是否开启DPO训练选项(实验性)"), + value=False, + interactive=True, + show_label=True, + ) + with gr.Column(): + with gr.Column(): + if_save_latest1Bb = gr.Checkbox( + label=i18n("是否仅保存最新的权重文件以节省硬盘空间"), + value=True, + interactive=True, + show_label=True, + ) + if_save_every_weights1Bb = gr.Checkbox( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + value=True, + interactive=True, + show_label=True, + ) + with gr.Row(): + gpu_numbers1Bb = gr.Textbox( + label=i18n("GPU卡号以-分割,每个卡号一个进程"), + value=f"{gpus}", + interactive=True, + ) + with gr.Row(): + with gr.Row(): + button1Bb_open = gr.Button( + value=process_info(process_name_gpt, "open"), variant="primary", visible=True + ) + button1Bb_close = gr.Button( + value=process_info(process_name_gpt, "close"), variant="primary", visible=False + ) + with gr.Row(): + info1Bb = gr.Textbox(label=process_info(process_name_gpt, "info")) + + button1Ba_close.click(close1Ba, [], [info1Ba, button1Ba_open, button1Ba_close]) + button1Bb_close.click(close1Bb, [], [info1Bb, button1Bb_open, button1Bb_close]) + + with gr.TabItem("1C-" + i18n("推理")): + gr.Markdown( + value=i18n( + "选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的几个是底模,体验5秒Zero Shot TTS不训练推理用。" + ) + ) + with gr.Row(): + with gr.Column(scale=2): + with gr.Row(): + GPT_dropdown = gr.Dropdown( + label=i18n("GPT模型列表"), + choices=GPT_names, + value=GPT_names[-1], + interactive=True, + ) + SoVITS_dropdown = gr.Dropdown( + label=i18n("SoVITS模型列表"), + choices=SoVITS_names, + value=SoVITS_names[0], + interactive=True, + ) + with gr.Column(scale=2): + with gr.Row(): + gpu_number_1C = gr.Textbox( + label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True + ) + refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") + refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) + with gr.Row(equal_height=True): + with gr.Row(): + batched_infer_enabled = gr.Checkbox( + label=i18n("启用并行推理版本"), value=False, interactive=True, show_label=True + ) + open_tts = gr.Button( + value=process_info(process_name_tts, "open"), variant="primary", visible=True + ) + close_tts = gr.Button( + value=process_info(process_name_tts, "close"), variant="primary", visible=False + ) + with gr.Column(): + tts_info = gr.Textbox(label=process_info(process_name_tts, "info"), scale=2) + open_tts.click( + change_tts_inference, + [ + bert_pretrained_dir, + cnhubert_base_dir, + gpu_number_1C, + GPT_dropdown, + SoVITS_dropdown, + batched_infer_enabled, + ], + [tts_info, open_tts, close_tts], + ) + close_tts.click( + change_tts_inference, + [ + bert_pretrained_dir, + cnhubert_base_dir, + gpu_number_1C, + GPT_dropdown, + SoVITS_dropdown, + batched_infer_enabled, + ], + [tts_info, open_tts, close_tts], + ) + button1Ba_open.click( + open1Ba, + [ + version_checkbox, + batch_size, + total_epoch, + exp_name, + text_low_lr_rate, + if_save_latest, + if_save_every_weights, + save_every_epoch, + gpu_numbers1Ba, + pretrained_s2G, + pretrained_s2D, + if_grad_ckpt, + lora_rank, + ], + [info1Ba, button1Ba_open, button1Ba_close, SoVITS_dropdown, GPT_dropdown], + ) + button1Bb_open.click( + open1Bb, + [ + batch_size1Bb, + total_epoch1Bb, + exp_name, + if_dpo, + if_save_latest1Bb, + if_save_every_weights1Bb, + save_every_epoch1Bb, + gpu_numbers1Bb, + pretrained_s1, + ], + [info1Bb, button1Bb_open, button1Bb_close, SoVITS_dropdown, GPT_dropdown], + ) + version_checkbox.change( + switch_version, + [version_checkbox], + [ + pretrained_s2G, + pretrained_s2D, + pretrained_s1, + GPT_dropdown, + SoVITS_dropdown, + batch_size, + total_epoch, + save_every_epoch, + text_low_lr_rate, + if_grad_ckpt, + batched_infer_enabled, + lora_rank, + ], + ) + + with gr.TabItem(i18n("2-GPT-SoVITS-变声")): + gr.Markdown(value=i18n("施工中,请静候佳音")) + + app.queue().launch( # concurrency_count=511, max_size=1022 + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=webui_port_main, + # quiet=True, + )