From f1acdafd209a8cb17f34653ac05e965864c8848c Mon Sep 17 00:00:00 2001 From: jax Date: Sun, 18 Feb 2024 13:41:14 +0800 Subject: [PATCH 1/5] split train && interence ui --- GPT_SoVITS/inference_webui.py | 607 +------------------------------- GPT_SoVITS/interence_base.py | 602 ++++++++++++++++++++++++++++++++ train_base.py | 626 +++++++++++++++++++++++++++++++++ webui.py | 627 +--------------------------------- 4 files changed, 1240 insertions(+), 1222 deletions(-) create mode 100644 GPT_SoVITS/interence_base.py create mode 100644 train_base.py diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 39ae7e43..87e2ae52 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -1,606 +1,13 @@ -''' -按中英混合识别 -按日英混合识别 -多语种启动切分识别语种 -全部按中文识别 -全部按英文识别 -全部按日文识别 -''' -import os, re, logging -import LangSegment -logging.getLogger("markdown_it").setLevel(logging.ERROR) -logging.getLogger("urllib3").setLevel(logging.ERROR) -logging.getLogger("httpcore").setLevel(logging.ERROR) -logging.getLogger("httpx").setLevel(logging.ERROR) -logging.getLogger("asyncio").setLevel(logging.ERROR) -logging.getLogger("charset_normalizer").setLevel(logging.ERROR) -logging.getLogger("torchaudio._extension").setLevel(logging.ERROR) -import pdb - -if os.path.exists("./gweight.txt"): - with open("./gweight.txt", 'r', encoding="utf-8") as file: - gweight_data = file.read() - gpt_path = os.environ.get( - "gpt_path", gweight_data) -else: - gpt_path = os.environ.get( - "gpt_path", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt") - -if os.path.exists("./sweight.txt"): - with open("./sweight.txt", 'r', encoding="utf-8") as file: - sweight_data = file.read() - sovits_path = os.environ.get("sovits_path", sweight_data) -else: - sovits_path = os.environ.get("sovits_path", "GPT_SoVITS/pretrained_models/s2G488k.pth") -# gpt_path = os.environ.get( -# "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -# ) -# sovits_path = os.environ.get("sovits_path", "pretrained_models/s2G488k.pth") -cnhubert_base_path = os.environ.get( - "cnhubert_base_path", "GPT_SoVITS/pretrained_models/chinese-hubert-base" -) -bert_path = os.environ.get( - "bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" -) -infer_ttswebui = os.environ.get("infer_ttswebui", 9872) -infer_ttswebui = int(infer_ttswebui) -is_share = os.environ.get("is_share", "False") -is_share = eval(is_share) -if "_CUDA_VISIBLE_DEVICES" in os.environ: - os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] -is_half = eval(os.environ.get("is_half", "True")) -import gradio as gr -from transformers import AutoModelForMaskedLM, AutoTokenizer -import numpy as np -import librosa, torch -from feature_extractor import cnhubert - -cnhubert.cnhubert_base_path = cnhubert_base_path - -from module.models import SynthesizerTrn -from AR.models.t2s_lightning_module import Text2SemanticLightningModule -from text import cleaned_text_to_sequence -from text.cleaner import clean_text -from time import time as ttime -from module.mel_processing import spectrogram_torch -from my_utils import load_audio +import os from tools.i18n.i18n import I18nAuto +from interence_base import sovits_path, gpt_path, change_choices, GPT_names, custom_sort_key, SoVITS_names, change_sovits_weights, change_gpt_weights, get_tts_wav, cut1, cut2, cut3, cut4, cut5 +import gradio as gr i18n = I18nAuto() - -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 - -if torch.cuda.is_available(): - device = "cuda" -elif torch.backends.mps.is_available(): - device = "mps" -else: - device = "cpu" - -tokenizer = AutoTokenizer.from_pretrained(bert_path) -bert_model = AutoModelForMaskedLM.from_pretrained(bert_path) -if is_half == True: - bert_model = bert_model.half().to(device) -else: - bert_model = bert_model.to(device) - - -def get_bert_feature(text, word2ph): - with torch.no_grad(): - inputs = tokenizer(text, return_tensors="pt") - for i in inputs: - inputs[i] = inputs[i].to(device) - res = bert_model(**inputs, output_hidden_states=True) - res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1] - assert len(word2ph) == len(text) - phone_level_feature = [] - for i in range(len(word2ph)): - repeat_feature = res[i].repeat(word2ph[i], 1) - phone_level_feature.append(repeat_feature) - phone_level_feature = torch.cat(phone_level_feature, dim=0) - return phone_level_feature.T - - -class DictToAttrRecursive(dict): - def __init__(self, input_dict): - super().__init__(input_dict) - for key, value in input_dict.items(): - if isinstance(value, dict): - value = DictToAttrRecursive(value) - self[key] = value - setattr(self, key, value) - - def __getattr__(self, item): - try: - return self[item] - except KeyError: - raise AttributeError(f"Attribute {item} not found") - - def __setattr__(self, key, value): - if isinstance(value, dict): - value = DictToAttrRecursive(value) - super(DictToAttrRecursive, self).__setitem__(key, value) - super().__setattr__(key, value) - - def __delattr__(self, item): - try: - del self[item] - except KeyError: - raise AttributeError(f"Attribute {item} not found") - - -ssl_model = cnhubert.get_model() -if is_half == True: - ssl_model = ssl_model.half().to(device) -else: - ssl_model = ssl_model.to(device) - - -def change_sovits_weights(sovits_path): - global vq_model, hps - dict_s2 = torch.load(sovits_path, map_location="cpu") - hps = dict_s2["config"] - hps = DictToAttrRecursive(hps) - hps.model.semantic_frame_rate = "25hz" - vq_model = SynthesizerTrn( - hps.data.filter_length // 2 + 1, - hps.train.segment_size // hps.data.hop_length, - n_speakers=hps.data.n_speakers, - **hps.model - ) - if ("pretrained" not in sovits_path): - del vq_model.enc_q - if is_half == True: - vq_model = vq_model.half().to(device) - else: - vq_model = vq_model.to(device) - vq_model.eval() - print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) - with open("./sweight.txt", "w", encoding="utf-8") as f: - f.write(sovits_path) - - -change_sovits_weights(sovits_path) - - -def change_gpt_weights(gpt_path): - global hz, max_sec, t2s_model, config - hz = 50 - dict_s1 = torch.load(gpt_path, map_location="cpu") - config = dict_s1["config"] - max_sec = config["data"]["max_sec"] - t2s_model = Text2SemanticLightningModule(config, "****", is_train=False) - t2s_model.load_state_dict(dict_s1["weight"]) - if is_half == True: - t2s_model = t2s_model.half() - t2s_model = t2s_model.to(device) - t2s_model.eval() - total = sum([param.nelement() for param in t2s_model.parameters()]) - print("Number of parameter: %.2fM" % (total / 1e6)) - with open("./gweight.txt", "w", encoding="utf-8") as f: f.write(gpt_path) - - -change_gpt_weights(gpt_path) - - -def get_spepc(hps, filename): - audio = load_audio(filename, int(hps.data.sampling_rate)) - audio = torch.FloatTensor(audio) - audio_norm = audio - audio_norm = audio_norm.unsqueeze(0) - spec = spectrogram_torch( - audio_norm, - hps.data.filter_length, - hps.data.sampling_rate, - hps.data.hop_length, - hps.data.win_length, - center=False, - ) - return spec - - -dict_language = { - i18n("中文"): "all_zh",#全部按中文识别 - i18n("英文"): "en",#全部按英文识别#######不变 - i18n("日文"): "all_ja",#全部按日文识别 - i18n("中英混合"): "zh",#按中英混合识别####不变 - i18n("日英混合"): "ja",#按日英混合识别####不变 - i18n("多语种混合"): "auto",#多语种启动切分识别语种 -} - - -def splite_en_inf(sentence, language): - pattern = re.compile(r'[a-zA-Z ]+') - textlist = [] - langlist = [] - pos = 0 - for match in pattern.finditer(sentence): - start, end = match.span() - if start > pos: - textlist.append(sentence[pos:start]) - langlist.append(language) - textlist.append(sentence[start:end]) - langlist.append("en") - pos = end - if pos < len(sentence): - textlist.append(sentence[pos:]) - langlist.append(language) - # Merge punctuation into previous word - for i in range(len(textlist)-1, 0, -1): - if re.match(r'^[\W_]+$', textlist[i]): - textlist[i-1] += textlist[i] - del textlist[i] - del langlist[i] - # Merge consecutive words with the same language tag - i = 0 - while i < len(langlist) - 1: - if langlist[i] == langlist[i+1]: - textlist[i] += textlist[i+1] - del textlist[i+1] - del langlist[i+1] - else: - i += 1 - - return textlist, langlist - - -def clean_text_inf(text, language): - formattext = "" - language = language.replace("all_","") - for tmp in LangSegment.getTexts(text): - if language == "ja": - if tmp["lang"] == language or tmp["lang"] == "zh": - formattext += tmp["text"] + " " - continue - if tmp["lang"] == language: - formattext += tmp["text"] + " " - while " " in formattext: - formattext = formattext.replace(" ", " ") - phones, word2ph, norm_text = clean_text(formattext, language) - phones = cleaned_text_to_sequence(phones) - return phones, word2ph, norm_text - -dtype=torch.float16 if is_half == True else torch.float32 -def get_bert_inf(phones, word2ph, norm_text, language): - language=language.replace("all_","") - if language == "zh": - bert = get_bert_feature(norm_text, word2ph).to(device)#.to(dtype) - else: - bert = torch.zeros( - (1024, len(phones)), - dtype=torch.float16 if is_half == True else torch.float32, - ).to(device) - - return bert - - -def nonen_clean_text_inf(text, language): - if(language!="auto"): - textlist, langlist = splite_en_inf(text, language) - else: - textlist=[] - langlist=[] - for tmp in LangSegment.getTexts(text): - langlist.append(tmp["lang"]) - textlist.append(tmp["text"]) - phones_list = [] - word2ph_list = [] - norm_text_list = [] - for i in range(len(textlist)): - lang = langlist[i] - phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) - phones_list.append(phones) - if lang == "zh": - word2ph_list.append(word2ph) - norm_text_list.append(norm_text) - print(word2ph_list) - phones = sum(phones_list, []) - word2ph = sum(word2ph_list, []) - norm_text = ' '.join(norm_text_list) - - return phones, word2ph, norm_text - - -def nonen_get_bert_inf(text, language): - if(language!="auto"): - textlist, langlist = splite_en_inf(text, language) - else: - textlist=[] - langlist=[] - for tmp in LangSegment.getTexts(text): - langlist.append(tmp["lang"]) - textlist.append(tmp["text"]) - print(textlist) - print(langlist) - bert_list = [] - for i in range(len(textlist)): - lang = langlist[i] - phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) - bert = get_bert_inf(phones, word2ph, norm_text, lang) - bert_list.append(bert) - bert = torch.cat(bert_list, dim=1) - - return bert - - -splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", } - - -def get_first(text): - pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]" - text = re.split(pattern, text)[0].strip() - return text - - -def get_cleaned_text_final(text,language): - if language in {"en","all_zh","all_ja"}: - phones, word2ph, norm_text = clean_text_inf(text, language) - elif language in {"zh", "ja","auto"}: - phones, word2ph, norm_text = nonen_clean_text_inf(text, language) - return phones, word2ph, norm_text - -def get_bert_final(phones, word2ph, text,language,device): - if language == "en": - bert = get_bert_inf(phones, word2ph, text, language) - elif language in {"zh", "ja","auto"}: - bert = nonen_get_bert_inf(text, language) - elif language == "all_zh": - bert = get_bert_feature(text, word2ph).to(device) - else: - bert = torch.zeros((1024, len(phones))).to(device) - return bert - -def merge_short_text_in_array(texts, threshold): - if (len(texts)) < 2: - return texts - result = [] - text = "" - for ele in texts: - text += ele - if len(text) >= threshold: - result.append(text) - text = "" - if (len(text) > 0): - if len(result) == 0: - result.append(text) - else: - result[len(result) - 1] += text - return result - -def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False): - if prompt_text is None or len(prompt_text) == 0: - ref_free = True - t0 = ttime() - prompt_language = dict_language[prompt_language] - text_language = dict_language[text_language] - if not ref_free: - prompt_text = prompt_text.strip("\n") - if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "." - print(i18n("实际输入的参考文本:"), prompt_text) - text = text.strip("\n") - if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text - - print(i18n("实际输入的目标文本:"), text) - zero_wav = np.zeros( - int(hps.data.sampling_rate * 0.3), - dtype=np.float16 if is_half == True else np.float32, - ) - with torch.no_grad(): - wav16k, sr = librosa.load(ref_wav_path, sr=16000) - if (wav16k.shape[0] > 160000 or wav16k.shape[0] < 48000): - raise OSError(i18n("参考音频在3~10秒范围外,请更换!")) - wav16k = torch.from_numpy(wav16k) - zero_wav_torch = torch.from_numpy(zero_wav) - if is_half == True: - wav16k = wav16k.half().to(device) - zero_wav_torch = zero_wav_torch.half().to(device) - else: - wav16k = wav16k.to(device) - zero_wav_torch = zero_wav_torch.to(device) - wav16k = torch.cat([wav16k, zero_wav_torch]) - ssl_content = ssl_model.model(wav16k.unsqueeze(0))[ - "last_hidden_state" - ].transpose( - 1, 2 - ) # .float() - codes = vq_model.extract_latent(ssl_content) - - prompt_semantic = codes[0, 0] - t1 = ttime() - - if (how_to_cut == i18n("凑四句一切")): - text = cut1(text) - elif (how_to_cut == i18n("凑50字一切")): - text = cut2(text) - elif (how_to_cut == i18n("按中文句号。切")): - text = cut3(text) - elif (how_to_cut == i18n("按英文句号.切")): - text = cut4(text) - elif (how_to_cut == i18n("按标点符号切")): - text = cut5(text) - while "\n\n" in text: - text = text.replace("\n\n", "\n") - print(i18n("实际输入的目标文本(切句后):"), text) - texts = text.split("\n") - texts = merge_short_text_in_array(texts, 5) - audio_opt = [] - if not ref_free: - phones1, word2ph1, norm_text1=get_cleaned_text_final(prompt_text, prompt_language) - bert1=get_bert_final(phones1, word2ph1, norm_text1,prompt_language,device).to(dtype) - - for text in texts: - # 解决输入目标文本的空行导致报错的问题 - if (len(text.strip()) == 0): - continue - if (text[-1] not in splits): text += "。" if text_language != "en" else "." - print(i18n("实际输入的目标文本(每句):"), text) - phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language) - bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype) - if not ref_free: - bert = torch.cat([bert1, bert2], 1) - all_phoneme_ids = torch.LongTensor(phones1+phones2).to(device).unsqueeze(0) - else: - bert = bert2 - all_phoneme_ids = torch.LongTensor(phones2).to(device).unsqueeze(0) - - bert = bert.to(device).unsqueeze(0) - all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device) - prompt = prompt_semantic.unsqueeze(0).to(device) - t2 = ttime() - with torch.no_grad(): - # pred_semantic = t2s_model.model.infer( - pred_semantic, idx = t2s_model.model.infer_panel( - all_phoneme_ids, - all_phoneme_len, - None if ref_free else prompt, - bert, - # prompt_phone_len=ph_offset, - top_k=top_k, - top_p=top_p, - temperature=temperature, - early_stop_num=hz * max_sec, - ) - t3 = ttime() - # print(pred_semantic.shape,idx) - pred_semantic = pred_semantic[:, -idx:].unsqueeze( - 0 - ) # .unsqueeze(0)#mq要多unsqueeze一次 - refer = get_spepc(hps, ref_wav_path) # .to(device) - if is_half == True: - refer = refer.half().to(device) - else: - refer = refer.to(device) - # audio = vq_model.decode(pred_semantic, all_phoneme_ids, refer).detach().cpu().numpy()[0, 0] - audio = ( - vq_model.decode( - pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refer - ) - .detach() - .cpu() - .numpy()[0, 0] - ) ###试试重建不带上prompt部分 - max_audio=np.abs(audio).max()#简单防止16bit爆音 - if max_audio>1:audio/=max_audio - audio_opt.append(audio) - audio_opt.append(zero_wav) - t4 = ttime() - print("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3)) - yield hps.data.sampling_rate, (np.concatenate(audio_opt, 0) * 32768).astype( - np.int16 - ) - - -def split(todo_text): - todo_text = todo_text.replace("……", "。").replace("——", ",") - if todo_text[-1] not in splits: - todo_text += "。" - i_split_head = i_split_tail = 0 - len_text = len(todo_text) - todo_texts = [] - while 1: - if i_split_head >= len_text: - break # 结尾一定有标点,所以直接跳出即可,最后一段在上次已加入 - if todo_text[i_split_head] in splits: - i_split_head += 1 - todo_texts.append(todo_text[i_split_tail:i_split_head]) - i_split_tail = i_split_head - else: - i_split_head += 1 - return todo_texts - - -def cut1(inp): - inp = inp.strip("\n") - inps = split(inp) - split_idx = list(range(0, len(inps), 4)) - split_idx[-1] = None - if len(split_idx) > 1: - opts = [] - for idx in range(len(split_idx) - 1): - opts.append("".join(inps[split_idx[idx]: split_idx[idx + 1]])) - else: - opts = [inp] - return "\n".join(opts) - - -def cut2(inp): - inp = inp.strip("\n") - inps = split(inp) - if len(inps) < 2: - return inp - opts = [] - summ = 0 - tmp_str = "" - for i in range(len(inps)): - summ += len(inps[i]) - tmp_str += inps[i] - if summ > 50: - summ = 0 - opts.append(tmp_str) - tmp_str = "" - if tmp_str != "": - opts.append(tmp_str) - # print(opts) - if len(opts) > 1 and len(opts[-1]) < 50: ##如果最后一个太短了,和前一个合一起 - opts[-2] = opts[-2] + opts[-1] - opts = opts[:-1] - return "\n".join(opts) - - -def cut3(inp): - inp = inp.strip("\n") - return "\n".join(["%s" % item for item in inp.strip("。").split("。")]) - - -def cut4(inp): - inp = inp.strip("\n") - return "\n".join(["%s" % item for item in inp.strip(".").split(".")]) - - -# contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py -def cut5(inp): - # if not re.search(r'[^\w\s]', inp[-1]): - # inp += '。' - inp = inp.strip("\n") - punds = r'[,.;?!、,。?!;:]' - items = re.split(f'({punds})', inp) - items = ["".join(group) for group in zip(items[::2], items[1::2])] - opt = "\n".join(items) - return opt - - -def custom_sort_key(s): - # 使用正则表达式提取字符串中的数字部分和非数字部分 - parts = re.split('(\d+)', s) - # 将数字部分转换为整数,非数字部分保持不变 - parts = [int(part) if part.isdigit() else part for part in parts] - return parts - - -def change_choices(): - SoVITS_names, GPT_names = get_weights_names() - return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"} - - -pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth" -pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -SoVITS_weight_root = "SoVITS_weights" -GPT_weight_root = "GPT_weights" -os.makedirs(SoVITS_weight_root, exist_ok=True) -os.makedirs(GPT_weight_root, exist_ok=True) - - -def get_weights_names(): - SoVITS_names = [pretrained_sovits_name] - for name in os.listdir(SoVITS_weight_root): - if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (SoVITS_weight_root, name)) - GPT_names = [pretrained_gpt_name] - for name in os.listdir(GPT_weight_root): - if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (GPT_weight_root, name)) - return SoVITS_names, GPT_names - - -SoVITS_names, GPT_names = get_weights_names() +is_share = os.environ.get("is_share", "False") +is_share = eval(is_share) +infer_ttswebui = os.environ.get("infer_ttswebui", 9872) +infer_ttswebui = int(infer_ttswebui) with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown( diff --git a/GPT_SoVITS/interence_base.py b/GPT_SoVITS/interence_base.py new file mode 100644 index 00000000..3b4134cc --- /dev/null +++ b/GPT_SoVITS/interence_base.py @@ -0,0 +1,602 @@ +''' +按中英混合识别 +按日英混合识别 +多语种启动切分识别语种 +全部按中文识别 +全部按英文识别 +全部按日文识别 +''' +import os, re, logging +import LangSegment +logging.getLogger("markdown_it").setLevel(logging.ERROR) +logging.getLogger("urllib3").setLevel(logging.ERROR) +logging.getLogger("httpcore").setLevel(logging.ERROR) +logging.getLogger("httpx").setLevel(logging.ERROR) +logging.getLogger("asyncio").setLevel(logging.ERROR) +logging.getLogger("charset_normalizer").setLevel(logging.ERROR) +logging.getLogger("torchaudio._extension").setLevel(logging.ERROR) +import pdb + +if os.path.exists("./gweight.txt"): + with open("./gweight.txt", 'r', encoding="utf-8") as file: + gweight_data = file.read() + gpt_path = os.environ.get( + "gpt_path", gweight_data) +else: + gpt_path = os.environ.get( + "gpt_path", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt") + +if os.path.exists("./sweight.txt"): + with open("./sweight.txt", 'r', encoding="utf-8") as file: + sweight_data = file.read() + sovits_path = os.environ.get("sovits_path", sweight_data) +else: + sovits_path = os.environ.get("sovits_path", "GPT_SoVITS/pretrained_models/s2G488k.pth") +# gpt_path = os.environ.get( +# "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +# ) +# sovits_path = os.environ.get("sovits_path", "pretrained_models/s2G488k.pth") +cnhubert_base_path = os.environ.get( + "cnhubert_base_path", "GPT_SoVITS/pretrained_models/chinese-hubert-base" +) +bert_path = os.environ.get( + "bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" +) +infer_ttswebui = os.environ.get("infer_ttswebui", 9872) +infer_ttswebui = int(infer_ttswebui) +is_share = os.environ.get("is_share", "False") +is_share = eval(is_share) +if "_CUDA_VISIBLE_DEVICES" in os.environ: + os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] +is_half = eval(os.environ.get("is_half", "True")) +from transformers import AutoModelForMaskedLM, AutoTokenizer +import numpy as np +import librosa, torch +from feature_extractor import cnhubert + +cnhubert.cnhubert_base_path = cnhubert_base_path + +from module.models import SynthesizerTrn +from AR.models.t2s_lightning_module import Text2SemanticLightningModule +from text import cleaned_text_to_sequence +from text.cleaner import clean_text +from time import time as ttime +from module.mel_processing import spectrogram_torch +from my_utils import load_audio +from tools.i18n.i18n import I18nAuto + +i18n = I18nAuto() + +os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 + +if torch.cuda.is_available(): + device = "cuda" +elif torch.backends.mps.is_available(): + device = "mps" +else: + device = "cpu" + +tokenizer = AutoTokenizer.from_pretrained(bert_path) +bert_model = AutoModelForMaskedLM.from_pretrained(bert_path) +if is_half == True: + bert_model = bert_model.half().to(device) +else: + bert_model = bert_model.to(device) + + +def get_bert_feature(text, word2ph): + with torch.no_grad(): + inputs = tokenizer(text, return_tensors="pt") + for i in inputs: + inputs[i] = inputs[i].to(device) + res = bert_model(**inputs, output_hidden_states=True) + res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1] + assert len(word2ph) == len(text) + phone_level_feature = [] + for i in range(len(word2ph)): + repeat_feature = res[i].repeat(word2ph[i], 1) + phone_level_feature.append(repeat_feature) + phone_level_feature = torch.cat(phone_level_feature, dim=0) + return phone_level_feature.T + + +class DictToAttrRecursive(dict): + def __init__(self, input_dict): + super().__init__(input_dict) + for key, value in input_dict.items(): + if isinstance(value, dict): + value = DictToAttrRecursive(value) + self[key] = value + setattr(self, key, value) + + def __getattr__(self, item): + try: + return self[item] + except KeyError: + raise AttributeError(f"Attribute {item} not found") + + def __setattr__(self, key, value): + if isinstance(value, dict): + value = DictToAttrRecursive(value) + super(DictToAttrRecursive, self).__setitem__(key, value) + super().__setattr__(key, value) + + def __delattr__(self, item): + try: + del self[item] + except KeyError: + raise AttributeError(f"Attribute {item} not found") + + +ssl_model = cnhubert.get_model() +if is_half == True: + ssl_model = ssl_model.half().to(device) +else: + ssl_model = ssl_model.to(device) + + +def change_sovits_weights(sovits_path): + global vq_model, hps + dict_s2 = torch.load(sovits_path, map_location="cpu") + hps = dict_s2["config"] + hps = DictToAttrRecursive(hps) + hps.model.semantic_frame_rate = "25hz" + vq_model = SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + n_speakers=hps.data.n_speakers, + **hps.model + ) + if ("pretrained" not in sovits_path): + del vq_model.enc_q + if is_half == True: + vq_model = vq_model.half().to(device) + else: + vq_model = vq_model.to(device) + vq_model.eval() + print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) + with open("./sweight.txt", "w", encoding="utf-8") as f: + f.write(sovits_path) + + +change_sovits_weights(sovits_path) + + +def change_gpt_weights(gpt_path): + global hz, max_sec, t2s_model, config + hz = 50 + dict_s1 = torch.load(gpt_path, map_location="cpu") + config = dict_s1["config"] + max_sec = config["data"]["max_sec"] + t2s_model = Text2SemanticLightningModule(config, "****", is_train=False) + t2s_model.load_state_dict(dict_s1["weight"]) + if is_half == True: + t2s_model = t2s_model.half() + t2s_model = t2s_model.to(device) + t2s_model.eval() + total = sum([param.nelement() for param in t2s_model.parameters()]) + print("Number of parameter: %.2fM" % (total / 1e6)) + with open("./gweight.txt", "w", encoding="utf-8") as f: f.write(gpt_path) + + +change_gpt_weights(gpt_path) + + +def get_spepc(hps, filename): + audio = load_audio(filename, int(hps.data.sampling_rate)) + audio = torch.FloatTensor(audio) + audio_norm = audio + audio_norm = audio_norm.unsqueeze(0) + spec = spectrogram_torch( + audio_norm, + hps.data.filter_length, + hps.data.sampling_rate, + hps.data.hop_length, + hps.data.win_length, + center=False, + ) + return spec + + +dict_language = { + i18n("中文"): "all_zh",#全部按中文识别 + i18n("英文"): "en",#全部按英文识别#######不变 + i18n("日文"): "all_ja",#全部按日文识别 + i18n("中英混合"): "zh",#按中英混合识别####不变 + i18n("日英混合"): "ja",#按日英混合识别####不变 + i18n("多语种混合"): "auto",#多语种启动切分识别语种 +} + + +def splite_en_inf(sentence, language): + pattern = re.compile(r'[a-zA-Z ]+') + textlist = [] + langlist = [] + pos = 0 + for match in pattern.finditer(sentence): + start, end = match.span() + if start > pos: + textlist.append(sentence[pos:start]) + langlist.append(language) + textlist.append(sentence[start:end]) + langlist.append("en") + pos = end + if pos < len(sentence): + textlist.append(sentence[pos:]) + langlist.append(language) + # Merge punctuation into previous word + for i in range(len(textlist)-1, 0, -1): + if re.match(r'^[\W_]+$', textlist[i]): + textlist[i-1] += textlist[i] + del textlist[i] + del langlist[i] + # Merge consecutive words with the same language tag + i = 0 + while i < len(langlist) - 1: + if langlist[i] == langlist[i+1]: + textlist[i] += textlist[i+1] + del textlist[i+1] + del langlist[i+1] + else: + i += 1 + + return textlist, langlist + + +def clean_text_inf(text, language): + formattext = "" + language = language.replace("all_","") + for tmp in LangSegment.getTexts(text): + if language == "ja": + if tmp["lang"] == language or tmp["lang"] == "zh": + formattext += tmp["text"] + " " + continue + if tmp["lang"] == language: + formattext += tmp["text"] + " " + while " " in formattext: + formattext = formattext.replace(" ", " ") + phones, word2ph, norm_text = clean_text(formattext, language) + phones = cleaned_text_to_sequence(phones) + return phones, word2ph, norm_text + +dtype=torch.float16 if is_half == True else torch.float32 +def get_bert_inf(phones, word2ph, norm_text, language): + language=language.replace("all_","") + if language == "zh": + bert = get_bert_feature(norm_text, word2ph).to(device)#.to(dtype) + else: + bert = torch.zeros( + (1024, len(phones)), + dtype=torch.float16 if is_half == True else torch.float32, + ).to(device) + + return bert + + +def nonen_clean_text_inf(text, language): + if(language!="auto"): + textlist, langlist = splite_en_inf(text, language) + else: + textlist=[] + langlist=[] + for tmp in LangSegment.getTexts(text): + langlist.append(tmp["lang"]) + textlist.append(tmp["text"]) + phones_list = [] + word2ph_list = [] + norm_text_list = [] + for i in range(len(textlist)): + lang = langlist[i] + phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) + phones_list.append(phones) + if lang == "zh": + word2ph_list.append(word2ph) + norm_text_list.append(norm_text) + print(word2ph_list) + phones = sum(phones_list, []) + word2ph = sum(word2ph_list, []) + norm_text = ' '.join(norm_text_list) + + return phones, word2ph, norm_text + + +def nonen_get_bert_inf(text, language): + if(language!="auto"): + textlist, langlist = splite_en_inf(text, language) + else: + textlist=[] + langlist=[] + for tmp in LangSegment.getTexts(text): + langlist.append(tmp["lang"]) + textlist.append(tmp["text"]) + print(textlist) + print(langlist) + bert_list = [] + for i in range(len(textlist)): + lang = langlist[i] + phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) + bert = get_bert_inf(phones, word2ph, norm_text, lang) + bert_list.append(bert) + bert = torch.cat(bert_list, dim=1) + + return bert + + +splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", } + + +def get_first(text): + pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]" + text = re.split(pattern, text)[0].strip() + return text + + +def get_cleaned_text_final(text,language): + if language in {"en","all_zh","all_ja"}: + phones, word2ph, norm_text = clean_text_inf(text, language) + elif language in {"zh", "ja","auto"}: + phones, word2ph, norm_text = nonen_clean_text_inf(text, language) + return phones, word2ph, norm_text + +def get_bert_final(phones, word2ph, text,language,device): + if language == "en": + bert = get_bert_inf(phones, word2ph, text, language) + elif language in {"zh", "ja","auto"}: + bert = nonen_get_bert_inf(text, language) + elif language == "all_zh": + bert = get_bert_feature(text, word2ph).to(device) + else: + bert = torch.zeros((1024, len(phones))).to(device) + return bert + +def merge_short_text_in_array(texts, threshold): + if (len(texts)) < 2: + return texts + result = [] + text = "" + for ele in texts: + text += ele + if len(text) >= threshold: + result.append(text) + text = "" + if (len(text) > 0): + if len(result) == 0: + result.append(text) + else: + result[len(result) - 1] += text + return result + +def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False): + if prompt_text is None or len(prompt_text) == 0: + ref_free = True + t0 = ttime() + prompt_language = dict_language[prompt_language] + text_language = dict_language[text_language] + if not ref_free: + prompt_text = prompt_text.strip("\n") + if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "." + print(i18n("实际输入的参考文本:"), prompt_text) + text = text.strip("\n") + if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text + + print(i18n("实际输入的目标文本:"), text) + zero_wav = np.zeros( + int(hps.data.sampling_rate * 0.3), + dtype=np.float16 if is_half == True else np.float32, + ) + with torch.no_grad(): + wav16k, sr = librosa.load(ref_wav_path, sr=16000) + if (wav16k.shape[0] > 160000 or wav16k.shape[0] < 48000): + raise OSError(i18n("参考音频在3~10秒范围外,请更换!")) + wav16k = torch.from_numpy(wav16k) + zero_wav_torch = torch.from_numpy(zero_wav) + if is_half == True: + wav16k = wav16k.half().to(device) + zero_wav_torch = zero_wav_torch.half().to(device) + else: + wav16k = wav16k.to(device) + zero_wav_torch = zero_wav_torch.to(device) + wav16k = torch.cat([wav16k, zero_wav_torch]) + ssl_content = ssl_model.model(wav16k.unsqueeze(0))[ + "last_hidden_state" + ].transpose( + 1, 2 + ) # .float() + codes = vq_model.extract_latent(ssl_content) + + prompt_semantic = codes[0, 0] + t1 = ttime() + + if (how_to_cut == i18n("凑四句一切")): + text = cut1(text) + elif (how_to_cut == i18n("凑50字一切")): + text = cut2(text) + elif (how_to_cut == i18n("按中文句号。切")): + text = cut3(text) + elif (how_to_cut == i18n("按英文句号.切")): + text = cut4(text) + elif (how_to_cut == i18n("按标点符号切")): + text = cut5(text) + while "\n\n" in text: + text = text.replace("\n\n", "\n") + print(i18n("实际输入的目标文本(切句后):"), text) + texts = text.split("\n") + texts = merge_short_text_in_array(texts, 5) + audio_opt = [] + if not ref_free: + phones1, word2ph1, norm_text1=get_cleaned_text_final(prompt_text, prompt_language) + bert1=get_bert_final(phones1, word2ph1, norm_text1,prompt_language,device).to(dtype) + + for text in texts: + # 解决输入目标文本的空行导致报错的问题 + if (len(text.strip()) == 0): + continue + if (text[-1] not in splits): text += "。" if text_language != "en" else "." + print(i18n("实际输入的目标文本(每句):"), text) + phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language) + bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype) + if not ref_free: + bert = torch.cat([bert1, bert2], 1) + all_phoneme_ids = torch.LongTensor(phones1+phones2).to(device).unsqueeze(0) + else: + bert = bert2 + all_phoneme_ids = torch.LongTensor(phones2).to(device).unsqueeze(0) + + bert = bert.to(device).unsqueeze(0) + all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device) + prompt = prompt_semantic.unsqueeze(0).to(device) + t2 = ttime() + with torch.no_grad(): + # pred_semantic = t2s_model.model.infer( + pred_semantic, idx = t2s_model.model.infer_panel( + all_phoneme_ids, + all_phoneme_len, + None if ref_free else prompt, + bert, + # prompt_phone_len=ph_offset, + top_k=top_k, + top_p=top_p, + temperature=temperature, + early_stop_num=hz * max_sec, + ) + t3 = ttime() + # print(pred_semantic.shape,idx) + pred_semantic = pred_semantic[:, -idx:].unsqueeze( + 0 + ) # .unsqueeze(0)#mq要多unsqueeze一次 + refer = get_spepc(hps, ref_wav_path) # .to(device) + if is_half == True: + refer = refer.half().to(device) + else: + refer = refer.to(device) + # audio = vq_model.decode(pred_semantic, all_phoneme_ids, refer).detach().cpu().numpy()[0, 0] + audio = ( + vq_model.decode( + pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refer + ) + .detach() + .cpu() + .numpy()[0, 0] + ) ###试试重建不带上prompt部分 + max_audio=np.abs(audio).max()#简单防止16bit爆音 + if max_audio>1:audio/=max_audio + audio_opt.append(audio) + audio_opt.append(zero_wav) + t4 = ttime() + print("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3)) + yield hps.data.sampling_rate, (np.concatenate(audio_opt, 0) * 32768).astype( + np.int16 + ) + + +def split(todo_text): + todo_text = todo_text.replace("……", "。").replace("——", ",") + if todo_text[-1] not in splits: + todo_text += "。" + i_split_head = i_split_tail = 0 + len_text = len(todo_text) + todo_texts = [] + while 1: + if i_split_head >= len_text: + break # 结尾一定有标点,所以直接跳出即可,最后一段在上次已加入 + if todo_text[i_split_head] in splits: + i_split_head += 1 + todo_texts.append(todo_text[i_split_tail:i_split_head]) + i_split_tail = i_split_head + else: + i_split_head += 1 + return todo_texts + + +def cut1(inp): + inp = inp.strip("\n") + inps = split(inp) + split_idx = list(range(0, len(inps), 4)) + split_idx[-1] = None + if len(split_idx) > 1: + opts = [] + for idx in range(len(split_idx) - 1): + opts.append("".join(inps[split_idx[idx]: split_idx[idx + 1]])) + else: + opts = [inp] + return "\n".join(opts) + + +def cut2(inp): + inp = inp.strip("\n") + inps = split(inp) + if len(inps) < 2: + return inp + opts = [] + summ = 0 + tmp_str = "" + for i in range(len(inps)): + summ += len(inps[i]) + tmp_str += inps[i] + if summ > 50: + summ = 0 + opts.append(tmp_str) + tmp_str = "" + if tmp_str != "": + opts.append(tmp_str) + # print(opts) + if len(opts) > 1 and len(opts[-1]) < 50: ##如果最后一个太短了,和前一个合一起 + opts[-2] = opts[-2] + opts[-1] + opts = opts[:-1] + return "\n".join(opts) + + +def cut3(inp): + inp = inp.strip("\n") + return "\n".join(["%s" % item for item in inp.strip("。").split("。")]) + + +def cut4(inp): + inp = inp.strip("\n") + return "\n".join(["%s" % item for item in inp.strip(".").split(".")]) + + +# contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py +def cut5(inp): + # if not re.search(r'[^\w\s]', inp[-1]): + # inp += '。' + inp = inp.strip("\n") + punds = r'[,.;?!、,。?!;:]' + items = re.split(f'({punds})', inp) + items = ["".join(group) for group in zip(items[::2], items[1::2])] + opt = "\n".join(items) + return opt + + +def custom_sort_key(s): + # 使用正则表达式提取字符串中的数字部分和非数字部分 + parts = re.split('(\d+)', s) + # 将数字部分转换为整数,非数字部分保持不变 + parts = [int(part) if part.isdigit() else part for part in parts] + return parts + + +def change_choices(): + SoVITS_names, GPT_names = get_weights_names() + return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"} + + +pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth" +pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +SoVITS_weight_root = "SoVITS_weights" +GPT_weight_root = "GPT_weights" +os.makedirs(SoVITS_weight_root, exist_ok=True) +os.makedirs(GPT_weight_root, exist_ok=True) + + +def get_weights_names(): + SoVITS_names = [pretrained_sovits_name] + for name in os.listdir(SoVITS_weight_root): + if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (SoVITS_weight_root, name)) + GPT_names = [pretrained_gpt_name] + for name in os.listdir(GPT_weight_root): + if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (GPT_weight_root, name)) + return SoVITS_names, GPT_names + + +SoVITS_names, GPT_names = get_weights_names() diff --git a/train_base.py b/train_base.py new file mode 100644 index 00000000..1c3d5866 --- /dev/null +++ b/train_base.py @@ -0,0 +1,626 @@ +import os,shutil,sys,pdb,re +now_dir = os.getcwd() +sys.path.append(now_dir) +import json,yaml,warnings,torch +import platform +import psutil +import signal + +warnings.filterwarnings("ignore") +torch.manual_seed(233333) +tmp = os.path.join(now_dir, "TEMP") +os.makedirs(tmp, exist_ok=True) +os.environ["TEMP"] = tmp +if(os.path.exists(tmp)): + for name in os.listdir(tmp): + if(name=="jieba.cache"):continue + path="%s/%s"%(tmp,name) + delete=os.remove if os.path.isfile(path) else shutil.rmtree + try: + delete(path) + except Exception as e: + print(str(e)) + pass +import site +site_packages_roots = [] +for path in site.getsitepackages(): + if "packages" in path: + site_packages_roots.append(path) +if(site_packages_roots==[]):site_packages_roots=["%s/runtime/Lib/site-packages" % now_dir] +#os.environ["OPENBLAS_NUM_THREADS"] = "4" +os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" +os.environ["all_proxy"] = "" +for site_packages_root in site_packages_roots: + if os.path.exists(site_packages_root): + try: + with open("%s/users.pth" % (site_packages_root), "w") as f: + f.write( + "%s\n%s/tools\n%s/tools/damo_asr\n%s/GPT_SoVITS\n%s/tools/uvr5" + % (now_dir, now_dir, now_dir, now_dir, now_dir) + ) + break + except PermissionError: + pass +from tools import my_utils +import traceback +import shutil +import pdb +from subprocess import Popen +import signal +from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share +from tools.i18n.i18n import I18nAuto +i18n = I18nAuto() +from scipy.io import wavfile +from tools.my_utils import load_audio +from multiprocessing import cpu_count + +os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu + +n_cpu=cpu_count() + +ngpu = torch.cuda.device_count() +gpu_infos = [] +mem = [] +if_gpu_ok = False + +# 判断是否有能用来训练和加速推理的N卡 +if torch.cuda.is_available() or ngpu != 0: + for i in range(ngpu): + gpu_name = torch.cuda.get_device_name(i) + if any(value in gpu_name.upper()for value in ["10","16","20","30","40","A2","A3","A4","P4","A50","500","A60","70","80","90","M4","T4","TITAN","L4","4060"]): + # A10#A100#V100#A40#P40#M40#K80#A4500 + if_gpu_ok = True # 至少有一张能用的N卡 + gpu_infos.append("%s\t%s" % (i, gpu_name)) + mem.append(int(torch.cuda.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4)) +# 判断是否支持mps加速 +if torch.backends.mps.is_available(): + if_gpu_ok = True + gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) + mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 + +if if_gpu_ok and len(gpu_infos) > 0: + gpu_info = "\n".join(gpu_infos) + default_batch_size = min(mem) // 2 +else: + gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") + default_batch_size = 1 +gpus = "-".join([i[0] for i in gpu_infos]) + +pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth" +pretrained_gpt_name="GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +def get_weights_names(): + SoVITS_names = [pretrained_sovits_name] + for name in os.listdir(SoVITS_weight_root): + if name.endswith(".pth"):SoVITS_names.append(name) + GPT_names = [pretrained_gpt_name] + for name in os.listdir(GPT_weight_root): + if name.endswith(".ckpt"): GPT_names.append(name) + return SoVITS_names,GPT_names +SoVITS_weight_root="SoVITS_weights" +GPT_weight_root="GPT_weights" +os.makedirs(SoVITS_weight_root,exist_ok=True) +os.makedirs(GPT_weight_root,exist_ok=True) +SoVITS_names,GPT_names = get_weights_names() + +def custom_sort_key(s): + # 使用正则表达式提取字符串中的数字部分和非数字部分 + parts = re.split('(\d+)', s) + # 将数字部分转换为整数,非数字部分保持不变 + parts = [int(part) if part.isdigit() else part for part in parts] + return parts + +def change_choices(): + SoVITS_names, GPT_names = get_weights_names() + return {"choices": sorted(SoVITS_names,key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names,key=custom_sort_key), "__type__": "update"} + +p_label=None +p_uvr5=None +p_asr=None +p_tts_inference=None + +def kill_proc_tree(pid, including_parent=True): + try: + parent = psutil.Process(pid) + except psutil.NoSuchProcess: + # Process already terminated + return + + children = parent.children(recursive=True) + for child in children: + try: + os.kill(child.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + if including_parent: + try: + os.kill(parent.pid, signal.SIGTERM) # or signal.SIGKILL + except OSError: + pass + +system=platform.system() +def kill_process(pid): + if(system=="Windows"): + cmd = "taskkill /t /f /pid %s" % pid + os.system(cmd) + else: + kill_proc_tree(pid) + + +def change_label(if_label,path_list): + global p_label + if(if_label==True and p_label==None): + path_list=my_utils.clean_path(path_list) + cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s'%(python_exec,path_list,webui_port_subfix,is_share) + yield i18n("打标工具WebUI已开启") + print(cmd) + p_label = Popen(cmd, shell=True) + elif(if_label==False and p_label!=None): + kill_process(p_label.pid) + p_label=None + yield i18n("打标工具WebUI已关闭") + +def change_uvr5(if_uvr5): + global p_uvr5 + if(if_uvr5==True and p_uvr5==None): + cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5,is_share) + yield i18n("UVR5已开启") + print(cmd) + p_uvr5 = Popen(cmd, shell=True) + elif(if_uvr5==False and p_uvr5!=None): + kill_process(p_uvr5.pid) + p_uvr5=None + yield i18n("UVR5已关闭") + + +from tools.asr.config import asr_dict +def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): + global p_asr + if(p_asr==None): + asr_inp_dir=my_utils.clean_path(asr_inp_dir) + cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}' + cmd += f' -i "{asr_inp_dir}"' + cmd += f' -o "{asr_opt_dir}"' + cmd += f' -s {asr_model_size}' + cmd += f' -l {asr_lang}' + cmd += " -p %s"%("float16"if is_half==True else "float32") + + yield "ASR任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} + print(cmd) + p_asr = Popen(cmd, shell=True) + p_asr.wait() + p_asr=None + yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的ASR任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} + # return None + +def close_asr(): + global p_asr + if(p_asr!=None): + kill_process(p_asr.pid) + p_asr=None + return "已终止ASR进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + +p_train_SoVITS=None +def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D): + global p_train_SoVITS + if(p_train_SoVITS==None): + with open("GPT_SoVITS/configs/s2.json")as f: + data=f.read() + data=json.loads(data) + s2_dir="%s/%s"%(exp_root,exp_name) + os.makedirs("%s/logs_s2"%(s2_dir),exist_ok=True) + if(is_half==False): + data["train"]["fp16_run"]=False + batch_size=max(1,batch_size//2) + data["train"]["batch_size"]=batch_size + data["train"]["epochs"]=total_epoch + data["train"]["text_low_lr_rate"]=text_low_lr_rate + data["train"]["pretrained_s2G"]=pretrained_s2G + data["train"]["pretrained_s2D"]=pretrained_s2D + data["train"]["if_save_latest"]=if_save_latest + data["train"]["if_save_every_weights"]=if_save_every_weights + data["train"]["save_every_epoch"]=save_every_epoch + data["train"]["gpu_numbers"]=gpu_numbers1Ba + data["data"]["exp_dir"]=data["s2_ckpt_dir"]=s2_dir + data["save_weight_dir"]=SoVITS_weight_root + data["name"]=exp_name + tmp_config_path="%s/tmp_s2.json"%tmp + with open(tmp_config_path,"w")as f:f.write(json.dumps(data)) + + cmd = '"%s" GPT_SoVITS/s2_train.py --config "%s"'%(python_exec,tmp_config_path) + yield "SoVITS训练开始:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} + print(cmd) + p_train_SoVITS = Popen(cmd, shell=True) + p_train_SoVITS.wait() + p_train_SoVITS=None + yield "SoVITS训练完成",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} + +def close1Ba(): + global p_train_SoVITS + if(p_train_SoVITS!=None): + kill_process(p_train_SoVITS.pid) + p_train_SoVITS=None + return "已终止SoVITS训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + +p_train_GPT=None +def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1): + global p_train_GPT + if(p_train_GPT==None): + with open("GPT_SoVITS/configs/s1longer.yaml")as f: + data=f.read() + data=yaml.load(data, Loader=yaml.FullLoader) + s1_dir="%s/%s"%(exp_root,exp_name) + os.makedirs("%s/logs_s1"%(s1_dir),exist_ok=True) + if(is_half==False): + data["train"]["precision"]="32" + batch_size = max(1, batch_size // 2) + data["train"]["batch_size"]=batch_size + data["train"]["epochs"]=total_epoch + data["pretrained_s1"]=pretrained_s1 + data["train"]["save_every_n_epoch"]=save_every_epoch + data["train"]["if_save_every_weights"]=if_save_every_weights + data["train"]["if_save_latest"]=if_save_latest + data["train"]["if_dpo"]=if_dpo + data["train"]["half_weights_save_dir"]=GPT_weight_root + data["train"]["exp_name"]=exp_name + data["train_semantic_path"]="%s/6-name2semantic.tsv"%s1_dir + data["train_phoneme_path"]="%s/2-name2text.txt"%s1_dir + data["output_dir"]="%s/logs_s1"%s1_dir + + os.environ["_CUDA_VISIBLE_DEVICES"]=gpu_numbers.replace("-",",") + os.environ["hz"]="25hz" + tmp_config_path="%s/tmp_s1.yaml"%tmp + with open(tmp_config_path, "w") as f:f.write(yaml.dump(data, default_flow_style=False)) + # cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir) + cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" '%(python_exec,tmp_config_path) + yield "GPT训练开始:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} + print(cmd) + p_train_GPT = Popen(cmd, shell=True) + p_train_GPT.wait() + p_train_GPT=None + yield "GPT训练完成",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的GPT训练任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} + +def close1Bb(): + global p_train_GPT + if(p_train_GPT!=None): + kill_process(p_train_GPT.pid) + p_train_GPT=None + return "已终止GPT训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + +ps_slice=[] +def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_parts): + global ps_slice + inp = my_utils.clean_path(inp) + opt_root = my_utils.clean_path(opt_root) + if(os.path.exists(inp)==False): + yield "输入路径不存在",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + return + if os.path.isfile(inp):n_parts=1 + elif os.path.isdir(inp):pass + else: + yield "输入路径存在但既不是文件也不是文件夹",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + return + if (ps_slice == []): + for i_part in range(n_parts): + cmd = '"%s" tools/slice_audio.py "%s" "%s" %s %s %s %s %s %s %s %s %s''' % (python_exec,inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, i_part, n_parts) + print(cmd) + p = Popen(cmd, shell=True) + ps_slice.append(p) + yield "切割执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps_slice: + p.wait() + ps_slice=[] + yield "切割结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的切割任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + +def close_slice(): + global ps_slice + if (ps_slice != []): + for p_slice in ps_slice: + try: + kill_process(p_slice.pid) + except: + traceback.print_exc() + ps_slice=[] + return "已终止所有切割进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} + +ps1a=[] +def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir): + global ps1a + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if (ps1a == []): + opt_dir="%s/%s"%(exp_root,exp_name) + config={ + "inp_text":inp_text, + "inp_wav_dir":inp_wav_dir, + "exp_name":exp_name, + "opt_dir":opt_dir, + "bert_pretrained_dir":bert_pretrained_dir, + } + gpu_names=gpu_numbers.split("-") + all_parts=len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], + "is_half": str(is_half) + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py'%python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1a.append(p) + yield "文本进程执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps1a: + p.wait() + opt = [] + for i_part in range(all_parts): + txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) + with open(txt_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + path_text = "%s/2-name2text.txt" % opt_dir + with open(path_text, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1a=[] + yield "文本进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的文本任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + +def close1a(): + global ps1a + if (ps1a != []): + for p1a in ps1a: + try: + kill_process(p1a.pid) + except: + traceback.print_exc() + ps1a=[] + return "已终止所有1a进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} + +ps1b=[] +def open1b(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir): + global ps1b + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if (ps1b == []): + config={ + "inp_text":inp_text, + "inp_wav_dir":inp_wav_dir, + "exp_name":exp_name, + "opt_dir":"%s/%s"%(exp_root,exp_name), + "cnhubert_base_dir":ssl_pretrained_dir, + "is_half": str(is_half) + } + gpu_names=gpu_numbers.split("-") + all_parts=len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py'%python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1b.append(p) + yield "SSL提取进程执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps1b: + p.wait() + ps1b=[] + yield "SSL提取进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的SSL提取任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + +def close1b(): + global ps1b + if (ps1b != []): + for p1b in ps1b: + try: + kill_process(p1b.pid) + except: + traceback.print_exc() + ps1b=[] + return "已终止所有1b进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} + +ps1c=[] +def open1c(inp_text,exp_name,gpu_numbers,pretrained_s2G_path): + global ps1c + inp_text = my_utils.clean_path(inp_text) + if (ps1c == []): + opt_dir="%s/%s"%(exp_root,exp_name) + config={ + "inp_text":inp_text, + "exp_name":exp_name, + "opt_dir":opt_dir, + "pretrained_s2G":pretrained_s2G_path, + "s2config_path":"GPT_SoVITS/configs/s2.json", + "is_half": str(is_half) + } + gpu_names=gpu_numbers.split("-") + all_parts=len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py'%python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1c.append(p) + yield "语义token提取进程执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps1c: + p.wait() + opt = ["item_name\tsemantic_audio"] + path_semantic = "%s/6-name2semantic.tsv" % opt_dir + for i_part in range(all_parts): + semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) + with open(semantic_path, "r", encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w", encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + ps1c=[] + yield "语义token提取进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的语义token提取任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + +def close1c(): + global ps1c + if (ps1c != []): + for p1c in ps1c: + try: + kill_process(p1c.pid) + except: + traceback.print_exc() + ps1c=[] + return "已终止所有语义token进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} +#####inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numbers1c,bert_pretrained_dir,cnhubert_base_dir,pretrained_s2G +ps1abc=[] +def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numbers1c,bert_pretrained_dir,ssl_pretrained_dir,pretrained_s2G_path): + global ps1abc + inp_text = my_utils.clean_path(inp_text) + inp_wav_dir = my_utils.clean_path(inp_wav_dir) + if (ps1abc == []): + opt_dir="%s/%s"%(exp_root,exp_name) + try: + #############################1a + path_text="%s/2-name2text.txt" % opt_dir + if(os.path.exists(path_text)==False or (os.path.exists(path_text)==True and len(open(path_text,"r",encoding="utf8").read().strip("\n").split("\n"))<2)): + config={ + "inp_text":inp_text, + "inp_wav_dir":inp_wav_dir, + "exp_name":exp_name, + "opt_dir":opt_dir, + "bert_pretrained_dir":bert_pretrained_dir, + "is_half": str(is_half) + } + gpu_names=gpu_numbers1a.split("-") + all_parts=len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py'%python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield "进度:1a-ing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps1abc:p.wait() + + opt = [] + for i_part in range(all_parts):#txt_path="%s/2-name2text-%s.txt"%(opt_dir,i_part) + txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) + with open(txt_path, "r",encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(txt_path) + with open(path_text, "w",encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + + yield "进度:1a-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + ps1abc=[] + #############################1b + config={ + "inp_text":inp_text, + "inp_wav_dir":inp_wav_dir, + "exp_name":exp_name, + "opt_dir":opt_dir, + "cnhubert_base_dir":ssl_pretrained_dir, + } + gpu_names=gpu_numbers1Ba.split("-") + all_parts=len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py'%python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield "进度:1a-done, 1b-ing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps1abc:p.wait() + yield "进度:1a1b-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + ps1abc=[] + #############################1c + path_semantic = "%s/6-name2semantic.tsv" % opt_dir + if(os.path.exists(path_semantic)==False or (os.path.exists(path_semantic)==True and os.path.getsize(path_semantic)<31)): + config={ + "inp_text":inp_text, + "exp_name":exp_name, + "opt_dir":opt_dir, + "pretrained_s2G":pretrained_s2G_path, + "s2config_path":"GPT_SoVITS/configs/s2.json", + } + gpu_names=gpu_numbers1c.split("-") + all_parts=len(gpu_names) + for i_part in range(all_parts): + config.update( + { + "i_part": str(i_part), + "all_parts": str(all_parts), + "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], + } + ) + os.environ.update(config) + cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py'%python_exec + print(cmd) + p = Popen(cmd, shell=True) + ps1abc.append(p) + yield "进度:1a1b-done, 1cing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + for p in ps1abc:p.wait() + + opt = ["item_name\tsemantic_audio"] + for i_part in range(all_parts): + semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) + with open(semantic_path, "r",encoding="utf8") as f: + opt += f.read().strip("\n").split("\n") + os.remove(semantic_path) + with open(path_semantic, "w",encoding="utf8") as f: + f.write("\n".join(opt) + "\n") + yield "进度:all-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + ps1abc = [] + yield "一键三连进程结束", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} + except: + traceback.print_exc() + close1abc() + yield "一键三连中途报错", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} + else: + yield "已有正在进行的一键三连任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + +def close1abc(): + global ps1abc + if (ps1abc != []): + for p1abc in ps1abc: + try: + kill_process(p1abc.pid) + except: + traceback.print_exc() + ps1abc=[] + return "已终止所有一键三连进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} diff --git a/webui.py b/webui.py index cff7cdb2..d5a4b549 100644 --- a/webui.py +++ b/webui.py @@ -1,176 +1,12 @@ -import os,shutil,sys,pdb,re -now_dir = os.getcwd() -sys.path.append(now_dir) -import json,yaml,warnings,torch -import platform -import psutil -import signal -warnings.filterwarnings("ignore") -torch.manual_seed(233333) -tmp = os.path.join(now_dir, "TEMP") -os.makedirs(tmp, exist_ok=True) -os.environ["TEMP"] = tmp -if(os.path.exists(tmp)): - for name in os.listdir(tmp): - if(name=="jieba.cache"):continue - path="%s/%s"%(tmp,name) - delete=os.remove if os.path.isfile(path) else shutil.rmtree - try: - delete(path) - except Exception as e: - print(str(e)) - pass -import site -site_packages_roots = [] -for path in site.getsitepackages(): - if "packages" in path: - site_packages_roots.append(path) -if(site_packages_roots==[]):site_packages_roots=["%s/runtime/Lib/site-packages" % now_dir] -#os.environ["OPENBLAS_NUM_THREADS"] = "4" -os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" -os.environ["all_proxy"] = "" -for site_packages_root in site_packages_roots: - if os.path.exists(site_packages_root): - try: - with open("%s/users.pth" % (site_packages_root), "w") as f: - f.write( - "%s\n%s/tools\n%s/tools/damo_asr\n%s/GPT_SoVITS\n%s/tools/uvr5" - % (now_dir, now_dir, now_dir, now_dir, now_dir) - ) - break - except PermissionError: - pass -from tools import my_utils -import traceback -import shutil -import pdb +import os import gradio as gr -from subprocess import Popen -import signal -from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share from tools.i18n.i18n import I18nAuto i18n = I18nAuto() -from scipy.io import wavfile -from tools.my_utils import load_audio -from multiprocessing import cpu_count - -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu - -n_cpu=cpu_count() - -ngpu = torch.cuda.device_count() -gpu_infos = [] -mem = [] -if_gpu_ok = False - -# 判断是否有能用来训练和加速推理的N卡 -if torch.cuda.is_available() or ngpu != 0: - for i in range(ngpu): - gpu_name = torch.cuda.get_device_name(i) - if any(value in gpu_name.upper()for value in ["10","16","20","30","40","A2","A3","A4","P4","A50","500","A60","70","80","90","M4","T4","TITAN","L4","4060"]): - # A10#A100#V100#A40#P40#M40#K80#A4500 - if_gpu_ok = True # 至少有一张能用的N卡 - gpu_infos.append("%s\t%s" % (i, gpu_name)) - mem.append(int(torch.cuda.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4)) -# 判断是否支持mps加速 -if torch.backends.mps.is_available(): - if_gpu_ok = True - gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) - mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 - -if if_gpu_ok and len(gpu_infos) > 0: - gpu_info = "\n".join(gpu_infos) - default_batch_size = min(mem) // 2 -else: - gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") - default_batch_size = 1 -gpus = "-".join([i[0] for i in gpu_infos]) - -pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth" -pretrained_gpt_name="GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -def get_weights_names(): - SoVITS_names = [pretrained_sovits_name] - for name in os.listdir(SoVITS_weight_root): - if name.endswith(".pth"):SoVITS_names.append(name) - GPT_names = [pretrained_gpt_name] - for name in os.listdir(GPT_weight_root): - if name.endswith(".ckpt"): GPT_names.append(name) - return SoVITS_names,GPT_names -SoVITS_weight_root="SoVITS_weights" -GPT_weight_root="GPT_weights" -os.makedirs(SoVITS_weight_root,exist_ok=True) -os.makedirs(GPT_weight_root,exist_ok=True) -SoVITS_names,GPT_names = get_weights_names() - -def custom_sort_key(s): - # 使用正则表达式提取字符串中的数字部分和非数字部分 - parts = re.split('(\d+)', s) - # 将数字部分转换为整数,非数字部分保持不变 - parts = [int(part) if part.isdigit() else part for part in parts] - return parts - -def change_choices(): - SoVITS_names, GPT_names = get_weights_names() - return {"choices": sorted(SoVITS_names,key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names,key=custom_sort_key), "__type__": "update"} - -p_label=None -p_uvr5=None -p_asr=None -p_tts_inference=None - -def kill_proc_tree(pid, including_parent=True): - try: - parent = psutil.Process(pid) - except psutil.NoSuchProcess: - # Process already terminated - return - - children = parent.children(recursive=True) - for child in children: - try: - os.kill(child.pid, signal.SIGTERM) # or signal.SIGKILL - except OSError: - pass - if including_parent: - try: - os.kill(parent.pid, signal.SIGTERM) # or signal.SIGKILL - except OSError: - pass - -system=platform.system() -def kill_process(pid): - if(system=="Windows"): - cmd = "taskkill /t /f /pid %s" % pid - os.system(cmd) - else: - kill_proc_tree(pid) - - -def change_label(if_label,path_list): - global p_label - if(if_label==True and p_label==None): - path_list=my_utils.clean_path(path_list) - cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s'%(python_exec,path_list,webui_port_subfix,is_share) - yield i18n("打标工具WebUI已开启") - print(cmd) - p_label = Popen(cmd, shell=True) - elif(if_label==False and p_label!=None): - kill_process(p_label.pid) - p_label=None - yield i18n("打标工具WebUI已关闭") - -def change_uvr5(if_uvr5): - global p_uvr5 - if(if_uvr5==True and p_uvr5==None): - cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5,is_share) - yield i18n("UVR5已开启") - print(cmd) - p_uvr5 = Popen(cmd, shell=True) - elif(if_uvr5==False and p_uvr5!=None): - kill_process(p_uvr5.pid) - p_uvr5=None - yield i18n("UVR5已关闭") +from train_base import gpu_info, n_cpu, SoVITS_names, pretrained_sovits_name, pretrained_gpt_name, custom_sort_key, GPT_names, default_batch_size, kill_process, SoVITS_weight_root, GPT_weight_root, change_choices, change_label, change_uvr5, open_asr, change_tts_inference, open1Ba, open1Bb, close1Bb, open_slice, close_asr, open1a, close1a, open1b, close1Ba, close_slice, close1b, open1c, close1c, open1abc, close1abc, gpus +from tools.asr.config import asr_dict +from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share +from subprocess import Popen def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits_path): global p_tts_inference @@ -192,459 +28,6 @@ def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path p_tts_inference=None yield i18n("TTS推理进程已关闭") -from tools.asr.config import asr_dict -def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): - global p_asr - if(p_asr==None): - asr_inp_dir=my_utils.clean_path(asr_inp_dir) - cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}' - cmd += f' -i "{asr_inp_dir}"' - cmd += f' -o "{asr_opt_dir}"' - cmd += f' -s {asr_model_size}' - cmd += f' -l {asr_lang}' - cmd += " -p %s"%("float16"if is_half==True else "float32") - - yield "ASR任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} - print(cmd) - p_asr = Popen(cmd, shell=True) - p_asr.wait() - p_asr=None - yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的ASR任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} - # return None - -def close_asr(): - global p_asr - if(p_asr!=None): - kill_process(p_asr.pid) - p_asr=None - return "已终止ASR进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - -p_train_SoVITS=None -def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D): - global p_train_SoVITS - if(p_train_SoVITS==None): - with open("GPT_SoVITS/configs/s2.json")as f: - data=f.read() - data=json.loads(data) - s2_dir="%s/%s"%(exp_root,exp_name) - os.makedirs("%s/logs_s2"%(s2_dir),exist_ok=True) - if(is_half==False): - data["train"]["fp16_run"]=False - batch_size=max(1,batch_size//2) - data["train"]["batch_size"]=batch_size - data["train"]["epochs"]=total_epoch - data["train"]["text_low_lr_rate"]=text_low_lr_rate - data["train"]["pretrained_s2G"]=pretrained_s2G - data["train"]["pretrained_s2D"]=pretrained_s2D - data["train"]["if_save_latest"]=if_save_latest - data["train"]["if_save_every_weights"]=if_save_every_weights - data["train"]["save_every_epoch"]=save_every_epoch - data["train"]["gpu_numbers"]=gpu_numbers1Ba - data["data"]["exp_dir"]=data["s2_ckpt_dir"]=s2_dir - data["save_weight_dir"]=SoVITS_weight_root - data["name"]=exp_name - tmp_config_path="%s/tmp_s2.json"%tmp - with open(tmp_config_path,"w")as f:f.write(json.dumps(data)) - - cmd = '"%s" GPT_SoVITS/s2_train.py --config "%s"'%(python_exec,tmp_config_path) - yield "SoVITS训练开始:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} - print(cmd) - p_train_SoVITS = Popen(cmd, shell=True) - p_train_SoVITS.wait() - p_train_SoVITS=None - yield "SoVITS训练完成",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} - -def close1Ba(): - global p_train_SoVITS - if(p_train_SoVITS!=None): - kill_process(p_train_SoVITS.pid) - p_train_SoVITS=None - return "已终止SoVITS训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - -p_train_GPT=None -def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1): - global p_train_GPT - if(p_train_GPT==None): - with open("GPT_SoVITS/configs/s1longer.yaml")as f: - data=f.read() - data=yaml.load(data, Loader=yaml.FullLoader) - s1_dir="%s/%s"%(exp_root,exp_name) - os.makedirs("%s/logs_s1"%(s1_dir),exist_ok=True) - if(is_half==False): - data["train"]["precision"]="32" - batch_size = max(1, batch_size // 2) - data["train"]["batch_size"]=batch_size - data["train"]["epochs"]=total_epoch - data["pretrained_s1"]=pretrained_s1 - data["train"]["save_every_n_epoch"]=save_every_epoch - data["train"]["if_save_every_weights"]=if_save_every_weights - data["train"]["if_save_latest"]=if_save_latest - data["train"]["if_dpo"]=if_dpo - data["train"]["half_weights_save_dir"]=GPT_weight_root - data["train"]["exp_name"]=exp_name - data["train_semantic_path"]="%s/6-name2semantic.tsv"%s1_dir - data["train_phoneme_path"]="%s/2-name2text.txt"%s1_dir - data["output_dir"]="%s/logs_s1"%s1_dir - - os.environ["_CUDA_VISIBLE_DEVICES"]=gpu_numbers.replace("-",",") - os.environ["hz"]="25hz" - tmp_config_path="%s/tmp_s1.yaml"%tmp - with open(tmp_config_path, "w") as f:f.write(yaml.dump(data, default_flow_style=False)) - # cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir) - cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" '%(python_exec,tmp_config_path) - yield "GPT训练开始:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} - print(cmd) - p_train_GPT = Popen(cmd, shell=True) - p_train_GPT.wait() - p_train_GPT=None - yield "GPT训练完成",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的GPT训练任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} - -def close1Bb(): - global p_train_GPT - if(p_train_GPT!=None): - kill_process(p_train_GPT.pid) - p_train_GPT=None - return "已终止GPT训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - -ps_slice=[] -def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_parts): - global ps_slice - inp = my_utils.clean_path(inp) - opt_root = my_utils.clean_path(opt_root) - if(os.path.exists(inp)==False): - yield "输入路径不存在",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - return - if os.path.isfile(inp):n_parts=1 - elif os.path.isdir(inp):pass - else: - yield "输入路径存在但既不是文件也不是文件夹",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - return - if (ps_slice == []): - for i_part in range(n_parts): - cmd = '"%s" tools/slice_audio.py "%s" "%s" %s %s %s %s %s %s %s %s %s''' % (python_exec,inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, i_part, n_parts) - print(cmd) - p = Popen(cmd, shell=True) - ps_slice.append(p) - yield "切割执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps_slice: - p.wait() - ps_slice=[] - yield "切割结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的切割任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - -def close_slice(): - global ps_slice - if (ps_slice != []): - for p_slice in ps_slice: - try: - kill_process(p_slice.pid) - except: - traceback.print_exc() - ps_slice=[] - return "已终止所有切割进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} - -ps1a=[] -def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir): - global ps1a - inp_text = my_utils.clean_path(inp_text) - inp_wav_dir = my_utils.clean_path(inp_wav_dir) - if (ps1a == []): - opt_dir="%s/%s"%(exp_root,exp_name) - config={ - "inp_text":inp_text, - "inp_wav_dir":inp_wav_dir, - "exp_name":exp_name, - "opt_dir":opt_dir, - "bert_pretrained_dir":bert_pretrained_dir, - } - gpu_names=gpu_numbers.split("-") - all_parts=len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], - "is_half": str(is_half) - } - ) - os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py'%python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1a.append(p) - yield "文本进程执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps1a: - p.wait() - opt = [] - for i_part in range(all_parts): - txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) - with open(txt_path, "r", encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(txt_path) - path_text = "%s/2-name2text.txt" % opt_dir - with open(path_text, "w", encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - ps1a=[] - yield "文本进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的文本任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - -def close1a(): - global ps1a - if (ps1a != []): - for p1a in ps1a: - try: - kill_process(p1a.pid) - except: - traceback.print_exc() - ps1a=[] - return "已终止所有1a进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} - -ps1b=[] -def open1b(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir): - global ps1b - inp_text = my_utils.clean_path(inp_text) - inp_wav_dir = my_utils.clean_path(inp_wav_dir) - if (ps1b == []): - config={ - "inp_text":inp_text, - "inp_wav_dir":inp_wav_dir, - "exp_name":exp_name, - "opt_dir":"%s/%s"%(exp_root,exp_name), - "cnhubert_base_dir":ssl_pretrained_dir, - "is_half": str(is_half) - } - gpu_names=gpu_numbers.split("-") - all_parts=len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], - } - ) - os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py'%python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1b.append(p) - yield "SSL提取进程执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps1b: - p.wait() - ps1b=[] - yield "SSL提取进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的SSL提取任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - -def close1b(): - global ps1b - if (ps1b != []): - for p1b in ps1b: - try: - kill_process(p1b.pid) - except: - traceback.print_exc() - ps1b=[] - return "已终止所有1b进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} - -ps1c=[] -def open1c(inp_text,exp_name,gpu_numbers,pretrained_s2G_path): - global ps1c - inp_text = my_utils.clean_path(inp_text) - if (ps1c == []): - opt_dir="%s/%s"%(exp_root,exp_name) - config={ - "inp_text":inp_text, - "exp_name":exp_name, - "opt_dir":opt_dir, - "pretrained_s2G":pretrained_s2G_path, - "s2config_path":"GPT_SoVITS/configs/s2.json", - "is_half": str(is_half) - } - gpu_names=gpu_numbers.split("-") - all_parts=len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], - } - ) - os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py'%python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1c.append(p) - yield "语义token提取进程执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps1c: - p.wait() - opt = ["item_name\tsemantic_audio"] - path_semantic = "%s/6-name2semantic.tsv" % opt_dir - for i_part in range(all_parts): - semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) - with open(semantic_path, "r", encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(semantic_path) - with open(path_semantic, "w", encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - ps1c=[] - yield "语义token提取进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} - else: - yield "已有正在进行的语义token提取任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - -def close1c(): - global ps1c - if (ps1c != []): - for p1c in ps1c: - try: - kill_process(p1c.pid) - except: - traceback.print_exc() - ps1c=[] - return "已终止所有语义token进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} -#####inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numbers1c,bert_pretrained_dir,cnhubert_base_dir,pretrained_s2G -ps1abc=[] -def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numbers1c,bert_pretrained_dir,ssl_pretrained_dir,pretrained_s2G_path): - global ps1abc - inp_text = my_utils.clean_path(inp_text) - inp_wav_dir = my_utils.clean_path(inp_wav_dir) - if (ps1abc == []): - opt_dir="%s/%s"%(exp_root,exp_name) - try: - #############################1a - path_text="%s/2-name2text.txt" % opt_dir - if(os.path.exists(path_text)==False or (os.path.exists(path_text)==True and len(open(path_text,"r",encoding="utf8").read().strip("\n").split("\n"))<2)): - config={ - "inp_text":inp_text, - "inp_wav_dir":inp_wav_dir, - "exp_name":exp_name, - "opt_dir":opt_dir, - "bert_pretrained_dir":bert_pretrained_dir, - "is_half": str(is_half) - } - gpu_names=gpu_numbers1a.split("-") - all_parts=len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], - } - ) - os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/1-get-text.py'%python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - yield "进度:1a-ing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps1abc:p.wait() - - opt = [] - for i_part in range(all_parts):#txt_path="%s/2-name2text-%s.txt"%(opt_dir,i_part) - txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) - with open(txt_path, "r",encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(txt_path) - with open(path_text, "w",encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - - yield "进度:1a-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - ps1abc=[] - #############################1b - config={ - "inp_text":inp_text, - "inp_wav_dir":inp_wav_dir, - "exp_name":exp_name, - "opt_dir":opt_dir, - "cnhubert_base_dir":ssl_pretrained_dir, - } - gpu_names=gpu_numbers1Ba.split("-") - all_parts=len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], - } - ) - os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py'%python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - yield "进度:1a-done, 1b-ing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps1abc:p.wait() - yield "进度:1a1b-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - ps1abc=[] - #############################1c - path_semantic = "%s/6-name2semantic.tsv" % opt_dir - if(os.path.exists(path_semantic)==False or (os.path.exists(path_semantic)==True and os.path.getsize(path_semantic)<31)): - config={ - "inp_text":inp_text, - "exp_name":exp_name, - "opt_dir":opt_dir, - "pretrained_s2G":pretrained_s2G_path, - "s2config_path":"GPT_SoVITS/configs/s2.json", - } - gpu_names=gpu_numbers1c.split("-") - all_parts=len(gpu_names) - for i_part in range(all_parts): - config.update( - { - "i_part": str(i_part), - "all_parts": str(all_parts), - "_CUDA_VISIBLE_DEVICES": gpu_names[i_part], - } - ) - os.environ.update(config) - cmd = '"%s" GPT_SoVITS/prepare_datasets/3-get-semantic.py'%python_exec - print(cmd) - p = Popen(cmd, shell=True) - ps1abc.append(p) - yield "进度:1a1b-done, 1cing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps1abc:p.wait() - - opt = ["item_name\tsemantic_audio"] - for i_part in range(all_parts): - semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) - with open(semantic_path, "r",encoding="utf8") as f: - opt += f.read().strip("\n").split("\n") - os.remove(semantic_path) - with open(path_semantic, "w",encoding="utf8") as f: - f.write("\n".join(opt) + "\n") - yield "进度:all-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - ps1abc = [] - yield "一键三连进程结束", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} - except: - traceback.print_exc() - close1abc() - yield "一键三连中途报错", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} - else: - yield "已有正在进行的一键三连任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - -def close1abc(): - global ps1abc - if (ps1abc != []): - for p1abc in ps1abc: - try: - kill_process(p1abc.pid) - except: - traceback.print_exc() - ps1abc=[] - return "已终止所有一键三连进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} - with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown( value= From 280d5c617d4664066bd1c5c7ba74a0b2114ab56d Mon Sep 17 00:00:00 2001 From: jax Date: Sun, 18 Feb 2024 13:57:56 +0800 Subject: [PATCH 2/5] fix import --- webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webui.py b/webui.py index d5a4b549..0f3f89fb 100644 --- a/webui.py +++ b/webui.py @@ -3,7 +3,7 @@ import os import gradio as gr from tools.i18n.i18n import I18nAuto i18n = I18nAuto() -from train_base import gpu_info, n_cpu, SoVITS_names, pretrained_sovits_name, pretrained_gpt_name, custom_sort_key, GPT_names, default_batch_size, kill_process, SoVITS_weight_root, GPT_weight_root, change_choices, change_label, change_uvr5, open_asr, change_tts_inference, open1Ba, open1Bb, close1Bb, open_slice, close_asr, open1a, close1a, open1b, close1Ba, close_slice, close1b, open1c, close1c, open1abc, close1abc, gpus +from train_base import gpu_info, n_cpu, SoVITS_names, pretrained_sovits_name, pretrained_gpt_name, custom_sort_key, GPT_names, default_batch_size, kill_process, SoVITS_weight_root, GPT_weight_root, change_choices, change_label, change_uvr5, open_asr, open1Ba, open1Bb, close1Bb, open_slice, close_asr, open1a, close1a, open1b, close1Ba, close_slice, close1b, open1c, close1c, open1abc, close1abc, gpus from tools.asr.config import asr_dict from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share from subprocess import Popen From b4aaa221a822d283517304278aa7973b9a372b87 Mon Sep 17 00:00:00 2001 From: jax Date: Sun, 18 Feb 2024 14:14:04 +0800 Subject: [PATCH 3/5] fix var local --- train_base.py | 1 - webui.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/train_base.py b/train_base.py index 1c3d5866..73472932 100644 --- a/train_base.py +++ b/train_base.py @@ -116,7 +116,6 @@ def change_choices(): p_label=None p_uvr5=None p_asr=None -p_tts_inference=None def kill_proc_tree(pid, including_parent=True): try: diff --git a/webui.py b/webui.py index 0f3f89fb..df7822c0 100644 --- a/webui.py +++ b/webui.py @@ -8,6 +8,7 @@ from tools.asr.config import asr_dict from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share from subprocess import Popen +p_tts_inference=None def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits_path): global p_tts_inference if(if_tts==True and p_tts_inference==None): From 865341e2165c3db09cbe900427e98138cfed87a2 Mon Sep 17 00:00:00 2001 From: jax Date: Sun, 18 Feb 2024 21:29:36 +0800 Subject: [PATCH 4/5] fix --- GPT_SoVITS/{interence_base.py => inference_base.py} | 0 GPT_SoVITS/inference_webui.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename GPT_SoVITS/{interence_base.py => inference_base.py} (100%) diff --git a/GPT_SoVITS/interence_base.py b/GPT_SoVITS/inference_base.py similarity index 100% rename from GPT_SoVITS/interence_base.py rename to GPT_SoVITS/inference_base.py diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 87e2ae52..6776f3e9 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -1,6 +1,6 @@ import os from tools.i18n.i18n import I18nAuto -from interence_base import sovits_path, gpt_path, change_choices, GPT_names, custom_sort_key, SoVITS_names, change_sovits_weights, change_gpt_weights, get_tts_wav, cut1, cut2, cut3, cut4, cut5 +from inference_base import sovits_path, gpt_path, change_choices, GPT_names, custom_sort_key, SoVITS_names, change_sovits_weights, change_gpt_weights, get_tts_wav, cut1, cut2, cut3, cut4, cut5 import gradio as gr i18n = I18nAuto() From 0f960d2f7f79b6a979af9e9a38cb0269be083c3d Mon Sep 17 00:00:00 2001 From: jax Date: Tue, 20 Feb 2024 09:59:22 +0800 Subject: [PATCH 5/5] remove script --- train_script.py | 80 ------------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 train_script.py diff --git a/train_script.py b/train_script.py deleted file mode 100644 index a380e333..00000000 --- a/train_script.py +++ /dev/null @@ -1,80 +0,0 @@ - -from subprocess import Popen -from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix -from train_base import open1abc, open1Ba, open1Bb, gpus, default_batch_size, open_slice, open_asr -import os - -current_working_directory = os.getcwd() -inp_text_dir = current_working_directory + "/" + "output/asr_opt" -inp_text = inp_text_dir + "/slicer_opt.list" -inp_wav_dir = current_working_directory + "/" + "output/slicer_opt" -exp_name = "jax_clone_voice" -gpu_numbers="%s-%s"%(gpus,gpus) -bert_pretrained_dir = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" -ssl_pretrained_dir = "GPT_SoVITS/pretrained_models/chinese-hubert-base" -pretrained_s2G_path = "GPT_SoVITS/pretrained_models/s2G488k.pth" - -def slice_audio(inp,opt_root=inp_wav_dir): - openSliceGenerator = open_slice(inp,opt_root,"-34","4000","300","10","500",0.9,0.25,4) - for value in openSliceGenerator: - print(value) - -def asr(asr_inp_dir=inp_wav_dir, asr_opt_dir=inp_text_dir, asr_model="达摩 ASR (中文)", asr_model_size="large", asr_lang="zh"): - openASRGenerator = open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang) - for value in openASRGenerator: - print(value) - -def train_prepare(inp_text=inp_text,inp_wav_dir =inp_wav_dir ,exp_name = exp_name,gpu_numbers1a = gpu_numbers,gpu_numbers1Ba=gpu_numbers,gpu_numbers1c=gpu_numbers,bert_pretrained_dir=bert_pretrained_dir,ssl_pretrained_dir=ssl_pretrained_dir,pretrained_s2G_path=pretrained_s2G_path): - open1abcGenerator = open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numbers1c,bert_pretrained_dir,ssl_pretrained_dir,pretrained_s2G_path) - for value in open1abcGenerator: - print(value) - -batch_size = default_batch_size -total_epoch = 8 -text_low_lr_rate = 0.4 -if_save_latest = True -if_save_every_weights = True -save_every_epoch = 4 -gpu_numbers1Ba = "%s" % (gpus) -pretrained_s2D = "GPT_SoVITS/pretrained_models/s2D488k.pth" -pretrained_s2G = "GPT_SoVITS/pretrained_models/s2G488k.pth" - -def train_SoVITS(batch_size=batch_size,total_epoch=total_epoch,exp_name=exp_name,text_low_lr_rate=text_low_lr_rate,if_save_latest=if_save_latest,if_save_every_weights=if_save_every_weights,save_every_epoch=save_every_epoch,gpu_numbers1Ba=gpu_numbers1Ba,pretrained_s2G=pretrained_s2G,pretrained_s2D=pretrained_s2D): - open1BaGenerator = open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D) - for value in open1BaGenerator: - print(value) - -gpt_batch_size = default_batch_size -gpt_total_epoch = 15 -gpt_if_save_latest = True -gpt_if_save_every_weights = True -gpt_save_every_epoch = 5 -gpu_numbers1Bb = "%s" % (gpus) -pretrained_s1 = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -if_dpo = False - - -def train_GPT(batch_size=default_batch_size,total_epoch=gpt_total_epoch,exp_name=exp_name,if_dpo=if_dpo, if_save_latest=gpt_if_save_latest, - if_save_every_weights=gpt_if_save_every_weights,save_every_epoch=gpt_save_every_epoch,gpu_numbers=gpu_numbers1Bb,pretrained_s1=pretrained_s1): - open1BbGenerator = open1Bb(batch_size, total_epoch, exp_name, if_dpo, if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1) - for value in open1BbGenerator: - print(value) - - - - - - -# train_prepare(inp_text=inp_text, inp_wav_dir=inp_wav_dir, exp_name=exp_name, gpu_numbers1a=gpu_numbers, gpu_numbers1Ba=gpu_numbers, gpu_numbers1c=gpu_numbers, -# bert_pretrained_dir=bert_pretrained_dir, ssl_pretrained_dir=ssl_pretrained_dir, pretrained_s2G_path=pretrained_s2G_path ) - -# train_SoVITS(batch_size=batch_size, total_epoch=total_epoch, exp_name=exp_name, -# text_low_lr_rate=text_low_lr_rate, if_save_latest=if_save_latest, -# if_save_every_weights=if_save_every_weights, save_every_epoch=save_every_epoch, -# gpu_numbers1Ba=gpu_numbers1Ba, pretrained_s2D=pretrained_s2D, pretrained_s2G=pretrained_s2G) - - -# train_GPT(batch_size=gpt_batch_size, total_epoch=gpt_total_epoch, exp_name=exp_name, if_dpo=if_dpo, if_save_latest=gpt_if_save_latest, -# if_save_every_weights=gpt_if_save_every_weights, save_every_epoch=gpt_save_every_epoch, -# gpu_numbers=gpu_numbers1Bb, pretrained_s1=pretrained_s1 -# ) \ No newline at end of file