From d855eecc7bd3438523ea32c309669b55b8b66a0e Mon Sep 17 00:00:00 2001 From: Downupanddownup Date: Thu, 25 Apr 2024 18:50:52 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=9B=AE=E5=BD=95=E4=BF=9D?= =?UTF-8?q?=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Ref_Audio_Selector/config.ini | 4 - Ref_Audio_Selector/config/config_manager.py | 29 +++- Ref_Audio_Selector/file/base_info/role.txt | 0 .../file/base_info/work_dir.txt | 0 .../ref_audio_selector_webui.py | 137 ++++++++++++------ 5 files changed, 121 insertions(+), 49 deletions(-) create mode 100644 Ref_Audio_Selector/file/base_info/role.txt create mode 100644 Ref_Audio_Selector/file/base_info/work_dir.txt diff --git a/Ref_Audio_Selector/config.ini b/Ref_Audio_Selector/config.ini index 1aef25d..81bcb96 100644 --- a/Ref_Audio_Selector/config.ini +++ b/Ref_Audio_Selector/config.ini @@ -1,10 +1,6 @@ # config.ini [Base] -# 工作目录 -work_dir = -# 角色 -role = # 参考音频目录 reference_audio_dir = refer_audio diff --git a/Ref_Audio_Selector/config/config_manager.py b/Ref_Audio_Selector/config/config_manager.py index 6a7a188..4183f2a 100644 --- a/Ref_Audio_Selector/config/config_manager.py +++ b/Ref_Audio_Selector/config/config_manager.py @@ -1,5 +1,27 @@ import configparser -import re +import Ref_Audio_Selector.common.common as common + + +class ParamReadWriteManager: + def __init__(self): + self.work_dir_path = 'Ref_Audio_Selector/file/base_info/work_dir.txt' + self.role_path = 'Ref_Audio_Selector/file/base_info/role.txt' + + def read_work_dir(self): + content = common.read_file(self.work_dir_path) + return content.strip() + + def read_role(self): + content = common.read_file(self.role_path) + return content.strip() + + def write_work_dir(self, work_dir_content): + clean_content = work_dir_content.strip() + common.write_text_to_file(clean_content, self.work_dir_path) + + def write_role(self, role_content): + clean_content = role_content.strip() + common.write_text_to_file(clean_content, self.role_path) class ConfigManager: @@ -36,11 +58,16 @@ class ConfigManager: _config = ConfigManager() +_param_read_write_manager = ParamReadWriteManager() def get_config(): return _config +def get_rw_param(): + return _param_read_write_manager + + if __name__ == '__main__': print(_config.print()) diff --git a/Ref_Audio_Selector/file/base_info/role.txt b/Ref_Audio_Selector/file/base_info/role.txt new file mode 100644 index 0000000..e69de29 diff --git a/Ref_Audio_Selector/file/base_info/work_dir.txt b/Ref_Audio_Selector/file/base_info/work_dir.txt new file mode 100644 index 0000000..e69de29 diff --git a/Ref_Audio_Selector/ref_audio_selector_webui.py b/Ref_Audio_Selector/ref_audio_selector_webui.py index 7eeba39..3cab594 100644 --- a/Ref_Audio_Selector/ref_audio_selector_webui.py +++ b/Ref_Audio_Selector/ref_audio_selector_webui.py @@ -20,24 +20,37 @@ i18n = I18nAuto() p_similarity = None p_asr = None p_text_similarity = None +rw_param = params.config_manager.get_rw_param() # 校验基础信息 -def check_base_info(text_work_space_dir): +def check_base_info(text_work_space_dir, text_role): if text_work_space_dir is None or text_work_space_dir == '': raise Exception("工作目录不能为空") + if text_role is None or text_role == '': + raise Exception("角色不能为空") + base_role_dir = os.path.join(text_work_space_dir, text_role) + # 判断目录是否存在 + if not os.path.exists(base_role_dir): + # 如果不存在,则创建目录 + os.makedirs(base_role_dir, exist_ok=True) + return base_role_dir # 从list文件,提取参考音频 -def convert_from_list(text_work_space_dir, text_list_input): - ref_audio_all = os.path.join(text_work_space_dir, - params.list_to_convert_reference_audio_dir) - text_convert_from_list_info = f"转换成功:生成目录{ref_audio_all}" - text_sample_dir = ref_audio_all +def convert_from_list(text_work_space_dir, text_role, text_list_input): + text_convert_from_list_info = None + text_sample_dir = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_list_input is None or text_list_input == '': raise Exception("list文件路径不能为空") + + ref_audio_all = os.path.join(base_role_dir, + params.list_to_convert_reference_audio_dir) + text_convert_from_list_info = f"转换成功:生成目录{ref_audio_all}" + text_sample_dir = ref_audio_all + audio_similarity.convert_from_list(text_list_input, ref_audio_all) except Exception as e: traceback.print_exc() @@ -79,12 +92,12 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_ # 基于一个基准音频,从参考音频目录中进行分段抽样 -def sample(text_work_space_dir, text_sample_dir, text_base_voice_path, +def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path, text_subsection_num, text_sample_num, checkbox_similarity_output): - ref_audio_dir = os.path.join(text_work_space_dir, params.reference_audio_dir) - text_sample_info = f"抽样成功:生成目录{ref_audio_dir}" + ref_audio_dir = None + text_sample_info = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_sample_dir is None or text_sample_dir == '': raise Exception("参考音频抽样目录不能为空,请先完成上一步操作") if text_base_voice_path is None or text_base_voice_path == '': @@ -94,7 +107,10 @@ def sample(text_work_space_dir, text_sample_dir, text_base_voice_path, if text_sample_num is None or text_sample_num == '': raise Exception("每段随机抽样个数不能为空") - similarity_list, _, _ = start_similarity_analysis(text_work_space_dir, text_sample_dir, + ref_audio_dir = os.path.join(base_role_dir, params.reference_audio_dir) + text_sample_info = f"抽样成功:生成目录{ref_audio_dir}" + + similarity_list, _, _ = start_similarity_analysis(base_role_dir, text_sample_dir, text_base_voice_path, checkbox_similarity_output) if similarity_list is None: @@ -113,15 +129,14 @@ def sample(text_work_space_dir, text_sample_dir, text_base_voice_path, # 根据参考音频和测试文本,执行批量推理 -def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_url, +def model_inference(text_work_space_dir, text_role, text_model_inference_voice_dir, text_url, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content): - inference_dir = os.path.join(text_work_space_dir, params.inference_audio_dir) - text_asr_audio_dir = os.path.join(inference_dir, - params.inference_audio_text_aggregation_dir) - text_model_inference_info = f"推理成功:生成目录{inference_dir}" + inference_dir = None + text_asr_audio_dir = None + text_model_inference_info = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_model_inference_voice_dir is None or text_model_inference_voice_dir == '': raise Exception("待推理的参考音频所在目录不能为空,请先完成上一步操作") if text_url is None or text_url == '': @@ -133,6 +148,12 @@ def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_ur if (text_ref_path is None or text_ref_path == '') and (text_ref_text is None or text_ref_text == '') and ( text_emotion is None or text_emotion == ''): raise Exception("参考音频路径/文本和角色情绪二选一填写,不能全部为空") + + inference_dir = os.path.join(base_role_dir, params.inference_audio_dir) + text_asr_audio_dir = os.path.join(inference_dir, + params.inference_audio_text_aggregation_dir) + text_model_inference_info = f"推理成功:生成目录{inference_dir}" + url_composer = audio_inference.URLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) url_composer.is_valid() text_list = common.read_text_file_to_list(text_test_content) @@ -151,13 +172,13 @@ def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_ur # 对推理生成音频执行asr -def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model, +def asr(text_work_space_dir, text_role, text_asr_audio_dir, dropdown_asr_model, dropdown_asr_size, dropdown_asr_lang): asr_file = None text_text_similarity_analysis_path = None text_asr_info = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_asr_audio_dir is None or text_asr_audio_dir == '': raise Exception("待asr的音频所在目录不能为空,请先完成上一步操作") if dropdown_asr_model is None or dropdown_asr_model == '': @@ -166,7 +187,7 @@ def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model, raise Exception("asr模型大小不能为空") if dropdown_asr_lang is None or dropdown_asr_lang == '': raise Exception("asr语言不能为空") - asr_file = open_asr(text_asr_audio_dir, text_work_space_dir, dropdown_asr_model, dropdown_asr_size, + asr_file = open_asr(text_asr_audio_dir, base_role_dir, dropdown_asr_model, dropdown_asr_size, dropdown_asr_lang) text_text_similarity_analysis_path = asr_file text_asr_info = f"asr成功:生成文件{asr_file}" @@ -209,14 +230,16 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): # 对asr生成的文件,与原本的文本内容,进行相似度分析 -def text_similarity_analysis(text_work_space_dir, +def text_similarity_analysis(text_work_space_dir, text_role, text_text_similarity_analysis_path): - similarity_dir = os.path.join(text_work_space_dir, params.text_similarity_output_dir) - text_text_similarity_analysis_info = f"相似度分析成功:生成目录{similarity_dir}" + similarity_dir = None + text_text_similarity_analysis_info = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_text_similarity_analysis_path is None or text_text_similarity_analysis_path == '': raise Exception("asr生成的文件路径不能为空,请先完成上一步操作") + similarity_dir = os.path.join(base_role_dir, params.text_similarity_output_dir) + text_text_similarity_analysis_info = f"相似度分析成功:生成目录{similarity_dir}" open_text_similarity_analysis(text_text_similarity_analysis_path, similarity_dir) except Exception as e: traceback.print_exc() @@ -244,17 +267,17 @@ def open_text_similarity_analysis(asr_file_path, output_dir, similarity_enlarge_ # 根据一个参考音频,对指定目录下的音频进行相似度分析,并输出到另一个目录 -def similarity_audio_output(text_work_space_dir, text_base_audio_path, +def similarity_audio_output(text_work_space_dir, text_role, text_base_audio_path, text_compare_audio_dir): text_similarity_audio_output_info = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_base_audio_path is None or text_base_audio_path == '': raise Exception("基准音频路径不能为空") if text_compare_audio_dir is None or text_compare_audio_dir == '': raise Exception("待分析的音频所在目录不能为空") similarity_list, similarity_file, similarity_file_dir = start_similarity_analysis( - text_work_space_dir, text_compare_audio_dir, text_base_audio_path, True) + base_role_dir, text_compare_audio_dir, text_base_audio_path, True) if similarity_list is None: raise Exception("相似度分析失败") @@ -268,11 +291,11 @@ def similarity_audio_output(text_work_space_dir, text_base_audio_path, # 根据参考音频目录的删除情况,将其同步到推理生成的音频目录中,即参考音频目录下,删除了几个参考音频,就在推理目录下,将这些参考音频生成的音频文件移除 -def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir, +def sync_ref_audio(text_work_space_dir, text_role, text_sync_ref_audio_dir, text_sync_inference_audio_dir): text_sync_ref_audio_info = None try: - check_base_info(text_work_space_dir) + check_base_info(text_work_space_dir, text_role) if text_sync_ref_audio_dir is None or text_sync_ref_audio_dir == '': raise Exception("参考音频目录不能为空") if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '': @@ -287,17 +310,19 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir, # 根据模板和参考音频目录,生成参考音频配置内容 -def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2): - config_file = os.path.join(text_work_space_dir, f'{params.reference_audio_config_filename}.json') - text_create_config_info = f"配置生成成功:生成文件{config_file}" +def create_config(text_work_space_dir, text_role, text_template, text_sync_ref_audio_dir2): + config_file = None + text_create_config_info = None try: - check_base_info(text_work_space_dir) + base_role_dir = check_base_info(text_work_space_dir, text_role) if text_template is None or text_template == '': raise Exception("参考音频抽样目录不能为空") if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '': raise Exception("参考音频目录不能为空") + config_file = os.path.join(base_role_dir, f'{params.reference_audio_config_filename}.json') + text_create_config_info = f"配置生成成功:生成文件{config_file}" ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2) - audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(), + audio_config.generate_audio_config(base_role_dir, text_template, ref_audio_manager.get_ref_audio_list(), config_file) except Exception as e: traceback.print_exc() @@ -315,9 +340,33 @@ def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion): return text_whole_url +def save_work_dir(text_work_space_dir, text_role): + rw_param.write_work_dir(text_work_space_dir) + if text_role is not None and text_role != '': + return text_role + else: + role_dir = '' + for i in range(1, 101): + dir_name = os.path.join(text_work_space_dir, f"role_{i}") + if not os.path.isdir(dir_name): + role_dir = dir_name + break + rw_param.write_role(role_dir) + return role_dir + + +def save_role(text_role): + rw_param.write_role(text_role) + + with gr.Blocks() as app: gr.Markdown(value=i18n("基本介绍:这是一个从训练素材中,批量提取参考音频,并进行效果评估与配置生成的工具")) - text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"), value="") + with gr.Row(): + text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"), + value=rw_param.read_work_dir()) + text_role = gr.Text(label=i18n("角色名称"), value=rw_param.read_role()) + text_work_space_dir.input(save_work_dir, [text_work_space_dir, text_role], [text_role]) + text_role.input(save_role, [text_role], []) with gr.Accordion(label=i18n("第一步:基于训练素材,生成待选参考音频列表"), open=False): gr.Markdown(value=i18n("1.1:选择list文件,并提取3-10秒的素材作为参考候选")) text_list_input = gr.Text(label=i18n("请输入list文件路径"), value="") @@ -326,7 +375,7 @@ with gr.Blocks() as app: text_convert_from_list_info = gr.Text(label=i18n("参考列表生成结果"), value="", interactive=False) gr.Markdown(value=i18n("1.2:选择基准音频,执行相似度匹配,并分段随机抽样")) text_sample_dir = gr.Text(label=i18n("参考音频抽样目录"), value="", interactive=False) - button_convert_from_list.click(convert_from_list, [text_work_space_dir, text_list_input], + button_convert_from_list.click(convert_from_list, [text_work_space_dir, text_role, text_list_input], [text_convert_from_list_info, text_sample_dir]) with gr.Row(): text_base_voice_path = gr.Text(label=i18n("请输入基准音频路径"), value="") @@ -369,7 +418,7 @@ with gr.Blocks() as app: gr.Markdown(value=i18n("3.1:启动asr,获取推理音频文本")) text_asr_audio_dir = gr.Text(label=i18n("待asr的音频所在目录"), value="", interactive=False) button_model_inference.click(model_inference, - [text_work_space_dir, text_model_inference_voice_dir, text_url, + [text_work_space_dir, text_role, text_model_inference_voice_dir, text_url, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content], [text_model_inference_info, text_asr_audio_dir]) with gr.Row(): @@ -396,13 +445,13 @@ with gr.Blocks() as app: text_asr_info = gr.Text(label=i18n("asr结果"), value="", interactive=False) gr.Markdown(value=i18n("3.2:启动文本相似度分析")) text_text_similarity_analysis_path = gr.Text(label=i18n("待分析的文件路径"), value="", interactive=False) - button_asr.click(asr, [text_work_space_dir, text_asr_audio_dir, dropdown_asr_model, + button_asr.click(asr, [text_work_space_dir, text_role, text_asr_audio_dir, dropdown_asr_model, dropdown_asr_size, dropdown_asr_lang], [text_asr_info, text_text_similarity_analysis_path]) with gr.Row(): button_text_similarity_analysis = gr.Button(i18n("启动文本相似度分析"), variant="primary") text_text_similarity_analysis_info = gr.Text(label=i18n("文本相似度分析结果"), value="", interactive=False) - button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, + button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, text_role, text_text_similarity_analysis_path], [text_text_similarity_analysis_info]) gr.Markdown(value=i18n("3.3:根据相似度分析结果,重点检查最后几条是否存在复读等问题")) @@ -414,7 +463,7 @@ with gr.Blocks() as app: button_similarity_audio_output = gr.Button(i18n("输出相似度-参考音频到临时目录"), variant="primary") text_similarity_audio_output_info = gr.Text(label=i18n("输出结果"), value="", interactive=False) button_similarity_audio_output.click(similarity_audio_output, - [text_work_space_dir, text_base_audio_path, + [text_work_space_dir, text_role, text_base_audio_path, text_compare_audio_dir], [text_similarity_audio_output_info]) with gr.Row(): text_sync_ref_audio_dir = gr.Text(label=i18n("参考音频路径"), value="", interactive=True) @@ -422,7 +471,7 @@ with gr.Blocks() as app: with gr.Row(): button_sync_ref_audio = gr.Button(i18n("将参考音频的删除情况,同步到推理音频目录"), variant="primary") text_sync_ref_info = gr.Text(label=i18n("同步结果"), value="", interactive=False) - button_sync_ref_audio.click(sync_ref_audio, [text_work_space_dir, text_sync_ref_audio_dir, + button_sync_ref_audio.click(sync_ref_audio, [text_work_space_dir, text_role, text_sync_ref_audio_dir, text_sync_inference_audio_dir], [text_sync_ref_info]) with gr.Accordion("第四步:生成参考音频配置文本", open=False): gr.Markdown(value=i18n("4.1:编辑模板")) @@ -436,9 +485,9 @@ with gr.Blocks() as app: button_create_config = gr.Button(i18n("生成配置"), variant="primary") text_create_config_info = gr.Text(label=i18n("生成结果"), value="", interactive=False) button_create_config.click(create_config, - [text_work_space_dir, text_template, text_sync_ref_audio_dir2], + [text_work_space_dir, text_role, text_template, text_sync_ref_audio_dir2], [text_create_config_info]) - button_sample.click(sample, [text_work_space_dir, text_sample_dir, text_base_voice_path, + button_sample.click(sample, [text_work_space_dir, text_role, text_sample_dir, text_base_voice_path, text_subsection_num, text_sample_num, checkbox_similarity_output], [text_sample_info, text_model_inference_voice_dir, text_sync_ref_audio_dir, text_sync_ref_audio_dir2])