bug修复

This commit is contained in:
Downupanddownup 2024-04-30 10:04:41 +08:00
parent 2dc36d3d60
commit fa45c5ac4f
3 changed files with 43 additions and 12 deletions

View File

@ -71,7 +71,8 @@ def convert_from_list(text_work_space_dir, text_role, text_list_input):
return i18n(text_convert_from_list_info), text_sample_dir
def start_similarity_analysis(work_space_dir, sample_dir, speaker_verification, base_voice_path, need_similarity_output):
def start_similarity_analysis(work_space_dir, sample_dir, speaker_verification, base_voice_path,
need_similarity_output):
similarity_list = None
similarity_file_dir = None
@ -129,7 +130,8 @@ def sample(text_work_space_dir, text_role, text_sample_dir, dropdown_speaker_ver
ref_audio_dir = os.path.join(base_role_dir, params.reference_audio_dir)
time_consuming, (similarity_list, _, _) \
= time_util.time_monitor(start_similarity_analysis)(base_role_dir, text_sample_dir, dropdown_speaker_verification, text_base_voice_path,
= time_util.time_monitor(start_similarity_analysis)(base_role_dir, text_sample_dir,
dropdown_speaker_verification, text_base_voice_path,
checkbox_similarity_output)
text_sample_info = f"耗时:{time_consuming:0.1f}秒;抽样成功:生成目录{ref_audio_dir}"
@ -334,8 +336,9 @@ def similarity_audio_output(text_work_space_dir, text_role, text_base_audio_path
raise Exception("说话人验证模型不能为空")
time_consuming, (similarity_list, similarity_file, similarity_file_dir) \
= time_util.time_monitor(start_similarity_analysis)(base_role_dir,text_compare_audio_dir,
dropdown_speaker_verification, text_base_audio_path, True)
= time_util.time_monitor(start_similarity_analysis)(base_role_dir, text_compare_audio_dir,
dropdown_speaker_verification, text_base_audio_path,
True)
if similarity_list is None:
raise Exception("相似度分析失败")
@ -383,6 +386,8 @@ def create_config(text_work_space_dir, text_role, text_template, text_refer_audi
text_work_space_dir, text_refer_audio_file_dir \
= common.batch_clean_paths([text_work_space_dir, text_refer_audio_file_dir])
global hide_config_file
config_file = None
text_create_config_info = None
try:
@ -834,7 +839,7 @@ def init_ui():
visible=False)
dropdown_refer_type_param.change(chang_refer_type_param, [dropdown_refer_type_param],
[text_ref_path, text_ref_text, text_emotion])
text_whole_url = gr.Text(label=i18n("完整地址"), value="", interactive=False)
text_whole_url = gr.Text(label=i18n("完整地址"), value=init.text_whole_url_default, interactive=False)
text_text.blur(lambda value: rw_param.write(rw_param.text_param, value), [text_text], [])
text_ref_path.blur(lambda value: rw_param.write(rw_param.ref_path_param, value), [text_ref_path], [])
@ -849,6 +854,11 @@ def init_ui():
text_emotion],
[text_whole_url])
text_text.blur(lambda value: rw_param.write(rw_param.text_param, value), [text_text], [])
dropdown_refer_type_param.change(whole_url,
[text_url, dropdown_refer_type_param, text_text, text_ref_path,
text_ref_text,
text_emotion],
[text_whole_url])
text_ref_path.input(whole_url,
[text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text,
text_emotion],
@ -883,7 +893,7 @@ def init_ui():
button_model_inference = gr.Button(i18n("开启批量推理"), variant="primary", scale=4)
text_model_inference_info = gr.Text(label=i18n("批量推理结果"), value="", interactive=False, scale=4)
button_model_inference_result_open = gr.Button(i18n("打开目录"), variant="primary", scale=1)
with gr.Tab(label=i18n("第三步:进行参考音频效果校验与筛选")):
with gr.Tab(label=i18n("第三步:进行参考音频推理效果准确度校验")):
gr.Markdown(value=i18n("3.1启动asr获取推理音频文本"))
text_asr_audio_dir = gr.Text(label=i18n("待asr的音频所在目录"), value=init.text_asr_audio_dir_default,
interactive=True)
@ -918,7 +928,8 @@ def init_ui():
value=init.text_text_similarity_analysis_path_default,
interactive=True)
slider_text_similarity_amplification_boundary = gr.Slider(minimum=0, maximum=1, step=0.01,
label=i18n("文本相似度放大边界因为原始模型输出的相似度差异太小所以进行了一次放大放大逻辑为边界值以下归0边界值到1的区间重新映射到0-1"),
label=i18n(
"文本相似度放大边界因为原始模型输出的相似度差异太小所以进行了一次放大放大逻辑为边界值以下归0边界值到1的区间重新映射到0-1"),
value=init.slider_text_similarity_amplification_boundary_default,
interactive=True)
slider_text_similarity_amplification_boundary.change(
@ -979,7 +990,8 @@ def init_ui():
button_similarity_audio_output_result_open = gr.Button(i18n("打开目录"), variant="primary", scale=1)
button_similarity_audio_output.click(similarity_audio_output,
[text_work_space_dir, text_role, text_base_audio_path,
text_compare_audio_dir, dropdown_speaker_verification_2], [text_similarity_audio_output_info])
text_compare_audio_dir, dropdown_speaker_verification_2],
[text_similarity_audio_output_info])
button_similarity_audio_output_result_open.click(lambda: open_file(hide_voice_similarity_dir), [], [])
gr.Markdown(value=i18n("4.2:如果发现存在低音质的推理音频,那么就去参考音频目录下,把原参考音频删了"))
gr.Markdown(value=i18n("4.3:删除参考音频之后,按下面的操作,会将推理音频目录下对应的音频也删掉"))
@ -989,7 +1001,8 @@ def init_ui():
button_sync_ref_audio.click(sync_ref_audio, [text_work_space_dir, text_role, text_refer_audio_file_dir,
text_inference_audio_file_dir], [text_sync_ref_info])
with gr.Tab("第五步:生成参考音频配置文本"):
gr.Markdown(value=i18n("5.1:编辑模板,占位符说明:\${emotion}表示相对路径加音频文件名;\${ref_path}表示音频相对角色目录的文件路径;\${ref_text}:表示音频文本"))
gr.Markdown(value=i18n(
"5.1:编辑模板,占位符说明:\${emotion}表示相对路径加音频文件名;\${ref_path}表示音频相对角色目录的文件路径;\${ref_text}:表示音频文本"))
text_template = gr.Text(label=i18n("模板内容"), value=init.text_template_default, lines=10)
text_template.blur(lambda value: rw_param.write(rw_param.text_template, value), [text_template], [])
gr.Markdown(value=i18n("5.2:生成配置"))
@ -1001,7 +1014,8 @@ def init_ui():
[text_work_space_dir, text_role, text_template, text_refer_audio_file_dir],
[text_create_config_info])
button_create_config_result_open.click(lambda: open_file(hide_config_file), [], [])
button_sample.click(sample, [text_work_space_dir, text_role, text_sample_dir, dropdown_speaker_verification_1, text_base_voice_path,
button_sample.click(sample, [text_work_space_dir, text_role, text_sample_dir, dropdown_speaker_verification_1,
text_base_voice_path,
slider_subsection_num, slider_sample_num, checkbox_similarity_output],
[text_sample_info, text_refer_audio_file_dir])
button_sample_result_open.click(open_file, [text_refer_audio_file_dir], [])

View File

@ -184,7 +184,7 @@ def generate_audio_files_for_emotion_group(url_composer, text_list, emotion_list
end_time = time.perf_counter() # 获取计时终点
elapsed_time = end_time - start_time # 计算执行耗时
# 记录日志内容
log_message = f"进程ID: {os.getpid()}, generate_audio_files_for_emotion_group 执行耗时: {elapsed_time:.6f}"
log_message = f"进程ID: {os.getpid()}, generate_audio_files_for_emotion_group 执行耗时: {elapsed_time:.6f};推理数量: {has_generated_count}"
p_logger.info(log_message)

View File

@ -1,6 +1,7 @@
import os
import multiprocessing
import Ref_Audio_Selector.config_param.config_params as params
import Ref_Audio_Selector.tool.audio_inference as audio_inference
import Ref_Audio_Selector.common.common as common
rw_param = params.config_manager.get_rw_param()
@ -44,6 +45,8 @@ text_api_v2_set_sovits_model_base_url_default = None
text_api_v2_sovits_model_param_default = None
# 推理服务请求地址与参数
text_url_default = None
# 推理服务请求完整地址
text_whole_url_default = None
# 文本参数名
text_text_default = None
# 参考参数类型
@ -112,7 +115,7 @@ def init_first():
def init_second():
global text_api_set_model_base_url_default, text_api_gpt_param_default, text_api_sovits_param_default, text_api_v2_set_gpt_model_base_url_default, text_api_v2_gpt_model_param_default
global text_api_v2_set_sovits_model_base_url_default, text_api_v2_sovits_model_param_default, text_url_default, text_text_default, dropdown_refer_type_param_default, text_ref_path_default
global text_api_v2_set_sovits_model_base_url_default, text_api_v2_sovits_model_param_default, text_url_default, text_whole_url_default, text_text_default, dropdown_refer_type_param_default, text_ref_path_default
global text_ref_text_default, text_emotion_default, text_test_content_default, slider_request_concurrency_num_default, slider_request_concurrency_max_num
text_api_set_model_base_url_default = empty_default(rw_param.read(rw_param.api_set_model_base_url),
@ -137,6 +140,9 @@ def init_second():
text_ref_text_default = empty_default(rw_param.read(rw_param.ref_text_param), 'prompt_text')
text_emotion_default = empty_default(rw_param.read(rw_param.emotion_param), 'emotion')
text_whole_url_default = whole_url(text_url_default, dropdown_refer_type_param_default, text_text_default,
text_ref_path_default, text_ref_text_default, text_emotion_default)
text_test_content_default = empty_default(rw_param.read(rw_param.test_content_path), params.default_test_text_path)
slider_request_concurrency_max_num = multiprocessing.cpu_count()
@ -146,6 +152,17 @@ def init_second():
slider_request_concurrency_num_default = min(int(slider_request_concurrency_num_default), slider_request_concurrency_max_num)
# 基于请求路径和参数,合成完整的请求路径
def whole_url(text_url, dropdown_refer_type_param, text_text, text_ref_path, text_ref_text, text_emotion):
url_composer = audio_inference.TTSURLComposer(text_url, dropdown_refer_type_param, text_emotion, text_text,
text_ref_path, text_ref_text)
if url_composer.is_emotion():
text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False)
else:
text_whole_url = url_composer.build_url_with_ref('测试内容', '参考路径', '参考文本', False)
return text_whole_url
def init_third():
global text_asr_audio_dir_default, text_text_similarity_analysis_path_default, slider_text_similarity_amplification_boundary_default, text_text_similarity_result_path_default