参考类型,添加选择

This commit is contained in:
Downupanddownup 2024-04-29 11:23:41 +08:00
parent 5280d17d2f
commit c26fa983a4
4 changed files with 46 additions and 16 deletions

View File

@ -25,6 +25,7 @@ class ParamReadWriteManager:
self.text_url = 'text_url' self.text_url = 'text_url'
self.text_param = 'text_param' self.text_param = 'text_param'
self.refer_type_param = 'refer_type_param'
self.ref_path_param = 'ref_path_param' self.ref_path_param = 'ref_path_param'
self.ref_text_param = 'ref_text_param' self.ref_text_param = 'ref_text_param'
self.emotion_param = 'emotion_param' self.emotion_param = 'emotion_param'

View File

@ -146,7 +146,7 @@ def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path
# 根据参考音频和测试文本,执行批量推理 # 根据参考音频和测试文本,执行批量推理
def model_inference(text_work_space_dir, text_role, slider_request_concurrency_num, text_refer_audio_file_dir, def model_inference(text_work_space_dir, text_role, slider_request_concurrency_num, text_refer_audio_file_dir,
text_url, text_url, dropdown_refer_type_param,
text_text, text_ref_path, text_ref_text, text_emotion, text_text, text_ref_path, text_ref_text, text_emotion,
text_test_content_dir): text_test_content_dir):
text_work_space_dir, text_refer_audio_file_dir, text_test_content_dir \ text_work_space_dir, text_refer_audio_file_dir, text_test_content_dir \
@ -173,7 +173,8 @@ def model_inference(text_work_space_dir, text_role, slider_request_concurrency_n
text_asr_audio_dir = os.path.join(inference_dir, text_asr_audio_dir = os.path.join(inference_dir,
params.inference_audio_text_aggregation_dir) params.inference_audio_text_aggregation_dir)
url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) url_composer = audio_inference.TTSURLComposer(text_url, dropdown_refer_type_param, text_emotion, text_text,
text_ref_path, text_ref_text)
url_composer.is_valid() url_composer.is_valid()
text_list = common.read_text_file_to_list(text_test_content_dir) text_list = common.read_text_file_to_list(text_test_content_dir)
if text_list is None or len(text_list) == 0: if text_list is None or len(text_list) == 0:
@ -403,8 +404,9 @@ def create_config(text_work_space_dir, text_role, text_template, text_refer_audi
# 基于请求路径和参数,合成完整的请求路径 # 基于请求路径和参数,合成完整的请求路径
def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion): def whole_url(text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion):
url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) url_composer = audio_inference.TTSURLComposer(text_url, dropdown_refer_type_param, text_emotion, text_text,
text_ref_path, text_ref_text)
if url_composer.is_emotion(): if url_composer.is_emotion():
text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False) text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False)
else: else:
@ -585,6 +587,16 @@ def save_work_dir(text_work_space_dir, text_role):
return role_dir return role_dir
def chang_refer_type_param(selected_value):
rw_param.write(rw_param.refer_type_param, selected_value)
if selected_value == "参考音频":
return {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": False,
"__type__": "update"}
else:
return {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": True,
"__type__": "update"}
def init_ui(): def init_ui():
init.init_all() init.init_all()
@ -789,11 +801,16 @@ def init_ui():
value=init.text_url_default) value=init.text_url_default)
with gr.Row(): with gr.Row():
text_text = gr.Text(label=i18n("请输入文本参数名"), value=init.text_text_default) text_text = gr.Text(label=i18n("请输入文本参数名"), value=init.text_text_default)
dropdown_refer_type_param = gr.Dropdown(label=i18n("类型"), choices=["参考音频", "角色情绪"],
value=init.dropdown_refer_type_param_default, interactive=True)
text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"), text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"),
value=init.text_ref_path_default) value=init.text_ref_path_default, visible=True)
text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"), text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"),
value=init.text_ref_text_default) value=init.text_ref_text_default, visible=True)
text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value=init.text_emotion_default) text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value=init.text_emotion_default,
visible=False)
dropdown_refer_type_param.change(chang_refer_type_param, [dropdown_refer_type_param],
[text_ref_path, text_ref_text, text_emotion])
text_whole_url = gr.Text(label=i18n("完整地址"), value="", interactive=False) text_whole_url = gr.Text(label=i18n("完整地址"), value="", interactive=False)
text_text.blur(lambda value: rw_param.write(rw_param.text_param, value), [text_text], []) text_text.blur(lambda value: rw_param.write(rw_param.text_param, value), [text_text], [])
@ -801,19 +818,26 @@ def init_ui():
text_ref_text.blur(lambda value: rw_param.write(rw_param.ref_text_param, value), [text_ref_text], []) text_ref_text.blur(lambda value: rw_param.write(rw_param.ref_text_param, value), [text_ref_text], [])
text_emotion.blur(lambda value: rw_param.write(rw_param.emotion_param, value), [text_emotion], []) text_emotion.blur(lambda value: rw_param.write(rw_param.emotion_param, value), [text_emotion], [])
text_url.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], text_url.input(whole_url,
[text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion],
[text_whole_url]) [text_whole_url])
text_url.blur(save_generate_audio_url, [text_url], []) text_url.blur(save_generate_audio_url, [text_url], [])
text_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], text_text.input(whole_url, [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text,
text_emotion],
[text_whole_url]) [text_whole_url])
text_text.blur(save_text_param, [text_text], []) text_text.blur(save_text_param, [text_text], [])
text_ref_path.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], text_ref_path.input(whole_url,
[text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text,
text_emotion],
[text_whole_url]) [text_whole_url])
text_ref_path.blur(save_ref_path_param, [text_ref_path], []) text_ref_path.blur(save_ref_path_param, [text_ref_path], [])
text_ref_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], text_ref_text.input(whole_url,
[text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text,
text_emotion],
[text_whole_url]) [text_whole_url])
text_ref_text.blur(save_ref_text_param, [text_ref_text], []) text_ref_text.blur(save_ref_text_param, [text_ref_text], [])
text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], text_emotion.input(whole_url, [text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text,
text_emotion],
[text_whole_url]) [text_whole_url])
text_emotion.blur(save_emotion_param, [text_emotion], []) text_emotion.blur(save_emotion_param, [text_emotion], [])
gr.Markdown(value=i18n("2.3:配置待推理文本,一句一行,尽量保证文本多样性,不同情绪、不同类型的都来一点")) gr.Markdown(value=i18n("2.3:配置待推理文本,一句一行,尽量保证文本多样性,不同情绪、不同类型的都来一点"))
@ -955,7 +979,7 @@ def init_ui():
button_sample_result_open.click(open_file, [text_refer_audio_file_dir], []) button_sample_result_open.click(open_file, [text_refer_audio_file_dir], [])
button_model_inference.click(model_inference, button_model_inference.click(model_inference,
[text_work_space_dir, text_role, slider_request_concurrency_num, [text_work_space_dir, text_role, slider_request_concurrency_num,
text_refer_audio_file_dir, text_url, text_refer_audio_file_dir, text_url, dropdown_refer_type_param,
text_text, text_ref_path, text_ref_text, text_emotion, text_text, text_ref_path, text_ref_text, text_emotion,
text_test_content], text_test_content],
[text_model_inference_info, text_asr_audio_dir, text_inference_audio_file_dir]) [text_model_inference_info, text_asr_audio_dir, text_inference_audio_file_dir])

View File

@ -52,8 +52,10 @@ class SetModelURLComposer:
class TTSURLComposer: class TTSURLComposer:
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name): def __init__(self, base_url, refer_type_param, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
self.base_url = base_url self.base_url = base_url
# 角色情绪 or 参考音频
self.refer_type_param = refer_type_param
self.emotion_param_name = emotion_param_name self.emotion_param_name = emotion_param_name
self.text_param_name = text_param_name self.text_param_name = text_param_name
self.ref_path_param_name = ref_path_param_name self.ref_path_param_name = ref_path_param_name
@ -70,7 +72,7 @@ class TTSURLComposer:
raise ValueError("请输入至少一个参考or情绪的参数") raise ValueError("请输入至少一个参考or情绪的参数")
def is_emotion(self): def is_emotion(self):
return self.emotion_param_name is not None and self.emotion_param_name != '' return self.refer_type_param == '角色情绪'
def build_url_with_emotion(self, text_value, emotion_value, need_url_encode=True): def build_url_with_emotion(self, text_value, emotion_value, need_url_encode=True):
params = { params = {

View File

@ -46,6 +46,8 @@ text_api_v2_sovits_model_param_default = None
text_url_default = None text_url_default = None
# 文本参数名 # 文本参数名
text_text_default = None text_text_default = None
# 参考参数类型
dropdown_refer_type_param_default = None
# 参考音频路径参数名 # 参考音频路径参数名
text_ref_path_default = None text_ref_path_default = None
# 参考音频文本参数名 # 参考音频文本参数名
@ -110,7 +112,7 @@ def init_first():
def init_second(): def init_second():
global text_api_set_model_base_url_default, text_api_gpt_param_default, text_api_sovits_param_default, text_api_v2_set_gpt_model_base_url_default, text_api_v2_gpt_model_param_default global text_api_set_model_base_url_default, text_api_gpt_param_default, text_api_sovits_param_default, text_api_v2_set_gpt_model_base_url_default, text_api_v2_gpt_model_param_default
global text_api_v2_set_sovits_model_base_url_default, text_api_v2_sovits_model_param_default, text_url_default, text_text_default, text_ref_path_default global text_api_v2_set_sovits_model_base_url_default, text_api_v2_sovits_model_param_default, text_url_default, text_text_default, dropdown_refer_type_param_default, text_ref_path_default
global text_ref_text_default, text_emotion_default, text_test_content_default, slider_request_concurrency_num_default, slider_request_concurrency_max_num global text_ref_text_default, text_emotion_default, text_test_content_default, slider_request_concurrency_num_default, slider_request_concurrency_max_num
text_api_set_model_base_url_default = empty_default(rw_param.read(rw_param.api_set_model_base_url), text_api_set_model_base_url_default = empty_default(rw_param.read(rw_param.api_set_model_base_url),
@ -129,6 +131,7 @@ def init_second():
text_url_default = empty_default(rw_param.read(rw_param.text_url), text_url_default = empty_default(rw_param.read(rw_param.text_url),
'http://localhost:9880?prompt_language=中文&text_language=中文&cut_punc=') 'http://localhost:9880?prompt_language=中文&text_language=中文&cut_punc=')
text_text_default = empty_default(rw_param.read(rw_param.text_param), 'text') text_text_default = empty_default(rw_param.read(rw_param.text_param), 'text')
dropdown_refer_type_param_default = empty_default(rw_param.read(rw_param.refer_type_param), '参考音频')
text_ref_path_default = empty_default(rw_param.read(rw_param.ref_path_param), 'refer_wav_path') text_ref_path_default = empty_default(rw_param.read(rw_param.ref_path_param), 'refer_wav_path')
text_ref_text_default = empty_default(rw_param.read(rw_param.ref_text_param), 'prompt_text') text_ref_text_default = empty_default(rw_param.read(rw_param.ref_text_param), 'prompt_text')