From f5c01be48f7388b3b6d94d90b64e20abb0cadc99 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Thu, 8 Aug 2024 02:06:08 +0800 Subject: [PATCH] fixed some bugs, and add some checking --- GPT_SoVITS/prepare_datasets/1-get-text.py | 3 +- .../prepare_datasets/2-get-hubert-wav32k.py | 3 +- GPT_SoVITS/prepare_datasets/3-get-semantic.py | 3 +- tools/my_utils.py | 79 ++++++++++++++++++- webui.py | 52 ++++-------- 5 files changed, 99 insertions(+), 41 deletions(-) diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index 7af6c100..bdeacc7b 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -7,7 +7,8 @@ inp_wav_dir = os.environ.get("inp_wav_dir") exp_name = os.environ.get("exp_name") i_part = os.environ.get("i_part") all_parts = os.environ.get("all_parts") -os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES") +if "_CUDA_VISIBLE_DEVICES" in os.environ: + os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] opt_dir = os.environ.get("opt_dir") bert_pretrained_dir = os.environ.get("bert_pretrained_dir") import torch diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 82f3c69d..27b61f27 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -6,7 +6,8 @@ inp_wav_dir= os.environ.get("inp_wav_dir") exp_name= os.environ.get("exp_name") i_part= os.environ.get("i_part") all_parts= os.environ.get("all_parts") -os.environ["CUDA_VISIBLE_DEVICES"]= os.environ.get("_CUDA_VISIBLE_DEVICES") +if "_CUDA_VISIBLE_DEVICES" in os.environ: + os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] from feature_extractor import cnhubert opt_dir= os.environ.get("opt_dir") cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir") diff --git a/GPT_SoVITS/prepare_datasets/3-get-semantic.py b/GPT_SoVITS/prepare_datasets/3-get-semantic.py index bbf7688b..a29a6629 100644 --- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py +++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py @@ -4,7 +4,8 @@ inp_text = os.environ.get("inp_text") exp_name = os.environ.get("exp_name") i_part = os.environ.get("i_part") all_parts = os.environ.get("all_parts") -os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES") +if "_CUDA_VISIBLE_DEVICES" in os.environ: + os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] opt_dir = os.environ.get("opt_dir") pretrained_s2G = os.environ.get("pretrained_s2G") s2config_path = os.environ.get("s2config_path") diff --git a/tools/my_utils.py b/tools/my_utils.py index 53544f8d..56db27cf 100644 --- a/tools/my_utils.py +++ b/tools/my_utils.py @@ -1,7 +1,10 @@ import platform,os,traceback import ffmpeg import numpy as np - +import gradio as gr +from i18n.i18n import I18nAuto +import pandas as pd +i18n = I18nAuto(language=os.environ.get('language','Auto')) def load_audio(file, sr): try: @@ -20,7 +23,7 @@ def load_audio(file, sr): ) except Exception as e: traceback.print_exc() - raise RuntimeError(f"Failed to load audio: {e}") + raise RuntimeError(i18n("Failed to load audio:")+e) return np.frombuffer(out, np.float32).flatten() @@ -30,3 +33,75 @@ def clean_path(path_str:str): return clean_path(path_str[0:-1]) path_str = path_str.replace('/', os.sep).replace('\\', os.sep) return path_str.strip(" ").strip('\'').strip("\n").strip('"').strip(" ").strip("\u202a") + + +def check_for_existance(file_list:list=None,is_train=False,is_dataset_processing=False): + files_status=[] + if is_train == True and file_list: + file_list.append(os.path.join(file_list[0],'2-name2text.txt')) + file_list.append(os.path.join(file_list[0],'3-bert')) + file_list.append(os.path.join(file_list[0],'4-cnhubert')) + file_list.append(os.path.join(file_list[0],'5-wav32k')) + file_list.append(os.path.join(file_list[0],'6-name2semantic.tsv')) + for file in file_list: + if os.path.exists(file):files_status.append(True) + else:files_status.append(False) + + if sum(files_status)!=0: + if is_train: + for file,status in zip(file_list,files_status): + if status:pass + else:gr.Warning(file) + gr.Warning(i18n('以下文件或文件夹不存在:')) + elif is_dataset_processing: + if not files_status[0]: + gr.Warning(file_list[0]) + if not files_status[1] and file_list[1]: + gr.Warning(file_list[1]) + gr.Warning(i18n('以下文件或文件夹不存在:')) + else: + if file_list[0]: + gr.Warning(file_list[0]) + gr.Warning(i18n('以下文件或文件夹不存在:')) + else: + gr.Warning(i18n('路径不能为空')) + +def check_details(path_list=None,is_train=False,is_dataset_processing=False): + if is_dataset_processing: + list_path, audio_path = path_list + if (not list_path.endswith('.list')): + gr.Warning(i18n('请填入正确的list路径')) + return + if audio_path: + if not os.path.isdir(audio_path): + gr.Warning(i18n('请填入正确的音频文件夹路径')) + return + with open(list_path,"r",encoding="utf8")as f: + line=f.readline().strip("\n").split("\n") + wav_name, spk_name, language, text = line.split("|") + wav_name=clean_path(wav_name) + if (audio_path != "" and audio_path != None): + wav_name = os.path.basename(wav_name) + wav_path = "%s/%s"%(audio_path, wav_name) + else: + wav_path=wav_name + if os.path.exists(wav_path): + ... + else: + gr.Warning(i18n('路径错误')) + if is_train: + path_list.append(os.path.join(path_list[0],'2-name2text.txt')) + path_list.append(os.path.join(path_list[0],'4-cnhubert')) + path_list.append(os.path.join(path_list[0],'5-wav32k')) + path_list.append(os.path.join(path_list[0],'6-name2semantic.tsv')) + phone_path, hubert_path, wav_path, semantic_path = path_list[1:] + with open(phone_path) as f: + if f.read(1):... + else:gr.Warning(i18n('缺少音素数据集')) + if os.listdir(hubert_path):... + else:gr.Warning(i18n('缺少Hubert数据集')) + if os.listdir(wav_path):... + else:gr.Warning(i18n('缺少音频数据集')) + df = pd.read_csv(semantic_path) + if len(pd) > 1:... + else:gr.Warning(i18n('缺少语义数据集')) diff --git a/webui.py b/webui.py index 73612bfa..a2be41bd 100644 --- a/webui.py +++ b/webui.py @@ -56,7 +56,7 @@ language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" os.environ["language"]=language i18n = I18nAuto(language=language) from scipy.io import wavfile -from tools.my_utils import load_audio +from tools.my_utils import load_audio, check_for_existance, check_details from multiprocessing import cpu_count # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu try: @@ -248,7 +248,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_ if(p_asr==None): asr_inp_dir=my_utils.clean_path(asr_inp_dir) asr_opt_dir=my_utils.clean_path(asr_opt_dir) - check_for_exists([asr_inp_dir]) + check_for_existance([asr_inp_dir]) cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}' cmd += f' -i "{asr_inp_dir}"' cmd += f' -o "{asr_opt_dir}"' @@ -279,7 +279,7 @@ def open_denoise(denoise_inp_dir, denoise_opt_dir): if(p_denoise==None): denoise_inp_dir=my_utils.clean_path(denoise_inp_dir) denoise_opt_dir=my_utils.clean_path(denoise_opt_dir) - check_for_exists([denoise_inp_dir]) + check_for_existance([denoise_inp_dir]) cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s'%(python_exec,denoise_inp_dir,denoise_opt_dir,"float16"if is_half==True else "float32") yield "语音降噪任务开启:%s"%cmd, {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}, {"__type__":"update"} @@ -308,7 +308,8 @@ def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_s data=json.loads(data) s2_dir="%s/%s"%(exp_root,exp_name) os.makedirs("%s/logs_s2"%(s2_dir),exist_ok=True) - check_for_exists([s2_dir],is_train=True) + check_for_existance([s2_dir],is_train=True) + check_details([s2_dir],is_train=True) if(is_half==False): data["train"]["fp16_run"]=False batch_size=max(1,batch_size//2) @@ -355,7 +356,8 @@ def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_ data=yaml.load(data, Loader=yaml.FullLoader) s1_dir="%s/%s"%(exp_root,exp_name) os.makedirs("%s/logs_s1"%(s1_dir),exist_ok=True) - check_for_exists([s1_dir],is_train=True) + check_for_existance([s1_dir],is_train=True) + check_details([s1_dir],is_train=True) if(is_half==False): data["train"]["precision"]="32" batch_size = max(1, batch_size // 2) @@ -400,7 +402,7 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k global ps_slice inp = my_utils.clean_path(inp) opt_root = my_utils.clean_path(opt_root) - check_for_exists([inp]) + check_for_existance([inp]) if(os.path.exists(inp)==False): yield "输入路径不存在", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} return @@ -439,7 +441,8 @@ def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir): global ps1a inp_text = my_utils.clean_path(inp_text) inp_wav_dir = my_utils.clean_path(inp_wav_dir) - check_for_exists([inp_text,inp_wav_dir], is_dataset_processing=True) + check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True) + check_details([inp_text,inp_wav_dir], is_dataset_processing=True) if (ps1a == []): opt_dir="%s/%s"%(exp_root,exp_name) config={ @@ -501,7 +504,8 @@ def open1b(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir): global ps1b inp_text = my_utils.clean_path(inp_text) inp_wav_dir = my_utils.clean_path(inp_wav_dir) - check_for_exists([inp_text,inp_wav_dir], is_dataset_processing=True) + check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True) + check_details([inp_text,inp_wav_dir], is_dataset_processing=True) if (ps1b == []): config={ "inp_text":inp_text, @@ -549,7 +553,8 @@ ps1c=[] def open1c(inp_text,exp_name,gpu_numbers,pretrained_s2G_path): global ps1c inp_text = my_utils.clean_path(inp_text) - check_for_exists([inp_text,''], is_dataset_processing=True) + check_for_existance([inp_text,''], is_dataset_processing=True) + check_details([inp_text,''], is_dataset_processing=True) if (ps1c == []): opt_dir="%s/%s"%(exp_root,exp_name) config={ @@ -608,7 +613,8 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb global ps1abc inp_text = my_utils.clean_path(inp_text) inp_wav_dir = my_utils.clean_path(inp_wav_dir) - check_for_exists([inp_text,inp_wav_dir]) + check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True) + check_details([inp_text,inp_wav_dir], is_dataset_processing=True) if (ps1abc == []): opt_dir="%s/%s"%(exp_root,exp_name) try: @@ -745,32 +751,6 @@ def switch_version(version_): gr.Warning(i18n(f'未下载{version.upper()}模型')) return {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D")}, {'__type__':'update', 'value':pretrained_gpt_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_gpt_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2]} -def check_for_exists(file_list=None,is_train=False,is_dataset_processing=False): - missing_files=[] - if is_train == True and file_list: - file_list.append(os.path.join(file_list[0],'2-name2text.txt')) - file_list.append(os.path.join(file_list[0],'3-bert')) - file_list.append(os.path.join(file_list[0],'4-cnhubert')) - file_list.append(os.path.join(file_list[0],'5-wav32k')) - file_list.append(os.path.join(file_list[0],'6-name2semantic.tsv')) - for file in file_list: - if os.path.exists(file):pass - else:missing_files.append(file) - if missing_files: - if is_train: - for missing_file in missing_files: - if missing_file != '': - gr.Warning(missing_file) - gr.Warning(i18n('以下文件或文件夹不存在:')) - else: - for missing_file in missing_files: - if missing_file != '': - gr.Warning(missing_file) - if file_list[-1]==[''] and is_dataset_processing: - pass - else: - gr.Warning(i18n('以下文件或文件夹不存在:')) - if os.path.exists('GPT_SoVITS/text/G2PWModel'):... else: cmd = '"%s" GPT_SoVITS/download.py'%python_exec