From 7e76bf4ca3578b65157895379949268ac1e5a89e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=82=A6?= Date: Mon, 12 Feb 2024 10:45:51 +0800 Subject: [PATCH] Update inference_webui.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加参考音频和文本自动填充,根据模型选择三级联动 --- GPT_SoVITS/inference_webui.py | 72 ++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 566985a4..a92663e7 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -135,6 +135,70 @@ if is_half == True: else: ssl_model = ssl_model.to(device) +# 初始化引导音频列表 + +def replace_chinese(text): + pattern = r'([\u4e00-\u9fa5]{5}).*' + result = re.sub(pattern, r'\1...', text) + return result + +def init_wav_list(sovits_path): + + wav_path = "./output/slicer_opt" + match = re.search(r'([a-zA-Z]+)_e\d+_s\d+\.pth',sovits_path) + if match: + result = match.group(1) + wav_path = f"./logs/{result}/5-wav32k/" + else: + return [],{} + + res_wavs = {} + + res_text = ["请选择参考音频"] + + # 读取文本 + text = "" + with open(rf'./logs/{result}/2-name2text.txt', 'r',encoding='utf-8') as f: + text = f.read() + + # 遍历目录 + for file_path in os.listdir(wav_path): + # 检查当前file_path是否为文件 + if os.path.isfile(os.path.join(wav_path, file_path)): + # 将文件名添加到列表中 + match = re.search(rf'{file_path}\t(.+?)\t(.+?)\t(.+?)\n', text) + if match: + # 提取匹配到的内容 + extracted_text = match.group(3) + # print(extracted_text) + + # 传入音频文件路径,获取音频数据和采样率 + audio_data, sample_rate = librosa.load(f'./logs/{result}/5-wav32k/{file_path}') + # 使用librosa.get_duration函数计算音频文件的长度 + duration = librosa.get_duration(y=audio_data, sr=sample_rate) + duration = int(duration) + key = f"{replace_chinese(extracted_text)}_{duration}秒" + res_text.append(key) + res_wavs[key] = (f'./logs/{result}/5-wav32k/{file_path}',extracted_text) + + + else: + print("No match found") + + + return res_text,res_wavs + +# 切换参考音频 + +def change_wav(audio_name): + + first_key = list(reference_dict.keys())[0] + + try: + value = reference_dict[audio_name] + return value[0],value[1] + except Exception as e: + return reference_dict[first_key][0],reference_dict[first_key][1] def change_sovits_weights(sovits_path): global vq_model, hps @@ -158,6 +222,10 @@ def change_sovits_weights(sovits_path): print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) with open("./sweight.txt", "w", encoding="utf-8") as f: f.write(sovits_path) + global reference_wavs,reference_dict + reference_wavs,reference_dict = init_wav_list(sovits_path) + + return gr.Dropdown.update(choices=reference_wavs) change_sovits_weights(sovits_path) @@ -600,9 +668,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: with gr.Row(): GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True) SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True) + wavs_dropdown = gr.Dropdown(label=i18n("参考音频列表"), choices=reference_wavs,value="请选择参考音频",interactive=True) refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) - SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown], []) + SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown], [wavs_dropdown]) GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], []) gr.Markdown(value=i18n("*请上传并填写参考信息")) with gr.Row(): @@ -611,6 +680,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: prompt_language = gr.Dropdown( label=i18n("参考音频的语种"), choices=[i18n("中文"), i18n("英文"), i18n("日文"), i18n("中英混合"), i18n("日英混合"), i18n("多语种混合")], value=i18n("中文") ) + wavs_dropdown.change(change_wav,[wavs_dropdown],[inp_ref,prompt_text]) gr.Markdown(value=i18n("*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。")) with gr.Row(): text = gr.Textbox(label=i18n("需要合成的文本"), value="")