diff --git a/Ref_Audio_Selector/config_param/config_manager.py b/Ref_Audio_Selector/config_param/config_manager.py index 5aaa1b6..63d2335 100644 --- a/Ref_Audio_Selector/config_param/config_manager.py +++ b/Ref_Audio_Selector/config_param/config_manager.py @@ -25,6 +25,7 @@ class ParamReadWriteManager: self.text_url = 'text_url' self.text_param = 'text_param' + self.refer_type_param = 'refer_type_param' self.ref_path_param = 'ref_path_param' self.ref_text_param = 'ref_text_param' self.emotion_param = 'emotion_param' diff --git a/Ref_Audio_Selector/ref_audio_selector_webui.py b/Ref_Audio_Selector/ref_audio_selector_webui.py index 6249193..bfbb6b7 100644 --- a/Ref_Audio_Selector/ref_audio_selector_webui.py +++ b/Ref_Audio_Selector/ref_audio_selector_webui.py @@ -146,7 +146,7 @@ def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path # 根据参考音频和测试文本,执行批量推理 def model_inference(text_work_space_dir, text_role, slider_request_concurrency_num, text_refer_audio_file_dir, - text_url, + text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content_dir): text_work_space_dir, text_refer_audio_file_dir, text_test_content_dir \ @@ -173,7 +173,8 @@ def model_inference(text_work_space_dir, text_role, slider_request_concurrency_n text_asr_audio_dir = os.path.join(inference_dir, params.inference_audio_text_aggregation_dir) - url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) + url_composer = audio_inference.TTSURLComposer(text_url, dropdown_refer_type_param, text_emotion, text_text, + text_ref_path, text_ref_text) url_composer.is_valid() text_list = common.read_text_file_to_list(text_test_content_dir) if text_list is None or len(text_list) == 0: @@ -403,8 +404,9 @@ def create_config(text_work_space_dir, text_role, text_template, text_refer_audi # 基于请求路径和参数,合成完整的请求路径 -def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion): - url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) +def whole_url(text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion): + url_composer = audio_inference.TTSURLComposer(text_url, dropdown_refer_type_param, text_emotion, text_text, + text_ref_path, text_ref_text) if url_composer.is_emotion(): text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False) else: @@ -585,6 +587,16 @@ def save_work_dir(text_work_space_dir, text_role): return role_dir +def chang_refer_type_param(selected_value): + rw_param.write(rw_param.refer_type_param, selected_value) + if selected_value == "参考音频": + return {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": False, + "__type__": "update"} + else: + return {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": True, + "__type__": "update"} + + def init_ui(): init.init_all() @@ -789,11 +801,16 @@ def init_ui(): value=init.text_url_default) with gr.Row(): text_text = gr.Text(label=i18n("请输入文本参数名"), value=init.text_text_default) + dropdown_refer_type_param = gr.Dropdown(label=i18n("类型"), choices=["参考音频", "角色情绪"], + value=init.dropdown_refer_type_param_default, interactive=True) text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"), - value=init.text_ref_path_default) + value=init.text_ref_path_default, visible=True) text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"), - value=init.text_ref_text_default) - text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value=init.text_emotion_default) + value=init.text_ref_text_default, visible=True) + text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value=init.text_emotion_default, + visible=False) + dropdown_refer_type_param.change(chang_refer_type_param, [dropdown_refer_type_param], + [text_ref_path, text_ref_text, text_emotion]) text_whole_url = gr.Text(label=i18n("完整地址"), value="", interactive=False) text_text.blur(lambda value: rw_param.write(rw_param.text_param, value), [text_text], []) @@ -801,19 +818,26 @@ def init_ui(): text_ref_text.blur(lambda value: rw_param.write(rw_param.ref_text_param, value), [text_ref_text], []) text_emotion.blur(lambda value: rw_param.write(rw_param.emotion_param, value), [text_emotion], []) - text_url.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], + text_url.input(whole_url, + [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion], [text_whole_url]) text_url.blur(save_generate_audio_url, [text_url], []) - text_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], + text_text.input(whole_url, [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, + text_emotion], [text_whole_url]) text_text.blur(save_text_param, [text_text], []) - text_ref_path.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], + text_ref_path.input(whole_url, + [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, + text_emotion], [text_whole_url]) text_ref_path.blur(save_ref_path_param, [text_ref_path], []) - text_ref_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], + text_ref_text.input(whole_url, + [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, + text_emotion], [text_whole_url]) text_ref_text.blur(save_ref_text_param, [text_ref_text], []) - text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], + text_emotion.input(whole_url, [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, + text_emotion], [text_whole_url]) text_emotion.blur(save_emotion_param, [text_emotion], []) gr.Markdown(value=i18n("2.3:配置待推理文本,一句一行,尽量保证文本多样性,不同情绪、不同类型的都来一点")) @@ -955,7 +979,7 @@ def init_ui(): button_sample_result_open.click(open_file, [text_refer_audio_file_dir], []) button_model_inference.click(model_inference, [text_work_space_dir, text_role, slider_request_concurrency_num, - text_refer_audio_file_dir, text_url, + text_refer_audio_file_dir, text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content], [text_model_inference_info, text_asr_audio_dir, text_inference_audio_file_dir]) diff --git a/Ref_Audio_Selector/tool/audio_inference.py b/Ref_Audio_Selector/tool/audio_inference.py index 8729c5d..e1aa8c2 100644 --- a/Ref_Audio_Selector/tool/audio_inference.py +++ b/Ref_Audio_Selector/tool/audio_inference.py @@ -52,8 +52,10 @@ class SetModelURLComposer: class TTSURLComposer: - def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name): + def __init__(self, base_url, refer_type_param, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name): self.base_url = base_url + # 角色情绪 or 参考音频 + self.refer_type_param = refer_type_param self.emotion_param_name = emotion_param_name self.text_param_name = text_param_name self.ref_path_param_name = ref_path_param_name @@ -70,7 +72,7 @@ class TTSURLComposer: raise ValueError("请输入至少一个参考or情绪的参数") def is_emotion(self): - return self.emotion_param_name is not None and self.emotion_param_name != '' + return self.refer_type_param == '角色情绪' def build_url_with_emotion(self, text_value, emotion_value, need_url_encode=True): params = { diff --git a/Ref_Audio_Selector/ui_init/init_ui_param.py b/Ref_Audio_Selector/ui_init/init_ui_param.py index b3e72a2..df7bf17 100644 --- a/Ref_Audio_Selector/ui_init/init_ui_param.py +++ b/Ref_Audio_Selector/ui_init/init_ui_param.py @@ -46,6 +46,8 @@ text_api_v2_sovits_model_param_default = None text_url_default = None # 文本参数名 text_text_default = None +# 参考参数类型 +dropdown_refer_type_param_default = None # 参考音频路径参数名 text_ref_path_default = None # 参考音频文本参数名 @@ -110,7 +112,7 @@ def init_first(): def init_second(): global text_api_set_model_base_url_default, text_api_gpt_param_default, text_api_sovits_param_default, text_api_v2_set_gpt_model_base_url_default, text_api_v2_gpt_model_param_default - global text_api_v2_set_sovits_model_base_url_default, text_api_v2_sovits_model_param_default, text_url_default, text_text_default, text_ref_path_default + global text_api_v2_set_sovits_model_base_url_default, text_api_v2_sovits_model_param_default, text_url_default, text_text_default, dropdown_refer_type_param_default, text_ref_path_default global text_ref_text_default, text_emotion_default, text_test_content_default, slider_request_concurrency_num_default, slider_request_concurrency_max_num text_api_set_model_base_url_default = empty_default(rw_param.read(rw_param.api_set_model_base_url), @@ -129,6 +131,7 @@ def init_second(): text_url_default = empty_default(rw_param.read(rw_param.text_url), 'http://localhost:9880?prompt_language=中文&text_language=中文&cut_punc=') text_text_default = empty_default(rw_param.read(rw_param.text_param), 'text') + dropdown_refer_type_param_default = empty_default(rw_param.read(rw_param.refer_type_param), '参考音频') text_ref_path_default = empty_default(rw_param.read(rw_param.ref_path_param), 'refer_wav_path') text_ref_text_default = empty_default(rw_param.read(rw_param.ref_text_param), 'prompt_text')