diff --git a/Ref_Audio_Selector/common/common.py b/Ref_Audio_Selector/common/common.py index 9742a44..5957fc3 100644 --- a/Ref_Audio_Selector/common/common.py +++ b/Ref_Audio_Selector/common/common.py @@ -1,4 +1,7 @@ from tools import my_utils +from config import python_exec, is_half +import subprocess +import sys import os @@ -118,6 +121,34 @@ def check_path_existence_and_return(path): return "" +def open_file(filepath): + if sys.platform.startswith('darwin'): + subprocess.run(['open', filepath]) # macOS + elif os.name == 'nt': # For Windows + os.startfile(filepath) + elif os.name == 'posix': # For Linux, Unix, etc. + subprocess.run(['xdg-open', filepath]) + + +def start_new_service(script_path): + # 对于Windows系统 + if sys.platform.startswith('win'): + cmd = f'start cmd /k {python_exec} {script_path}' + # 对于Mac或者Linux系统 + else: + cmd = f'xterm -e {python_exec} {script_path}' + + proc = subprocess.Popen(cmd, shell=True) + + # 关闭之前启动的子进程 + # proc.terminate() + + # 或者如果需要强制关闭可以使用 + # proc.kill() + + return proc + + if __name__ == '__main__': dir = r'C:\Users\Administrator\Desktop/test' dir2 = r'"C:\Users\Administrator\Desktop\test2"' diff --git a/Ref_Audio_Selector/ref_audio_selector_webui.py b/Ref_Audio_Selector/ref_audio_selector_webui.py index 0167033..41ce14a 100644 --- a/Ref_Audio_Selector/ref_audio_selector_webui.py +++ b/Ref_Audio_Selector/ref_audio_selector_webui.py @@ -6,10 +6,12 @@ import gradio as gr from Ref_Audio_Selector.config_param.log_config import logger +import Ref_Audio_Selector.tool.model_manager as model_manager import Ref_Audio_Selector.tool.audio_similarity as audio_similarity import Ref_Audio_Selector.tool.audio_inference as audio_inference import Ref_Audio_Selector.tool.audio_config as audio_config -import Ref_Audio_Selector.tool.delete_inference_with_ref as delete_inference_with_ref +import Ref_Audio_Selector.tool.audio_check as audio_check +import Ref_Audio_Selector.tool.text_check as text_check import Ref_Audio_Selector.common.common as common import Ref_Audio_Selector.config_param.config_params as params import Ref_Audio_Selector.common.time_util as time_util @@ -148,7 +150,7 @@ def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path # 根据参考音频和测试文本,执行批量推理 -def model_inference(text_work_space_dir, text_role, text_model_inference_voice_dir, text_url, +def model_inference(text_work_space_dir, text_role, slider_request_concurrency_num, text_model_inference_voice_dir, text_url, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content_dir): text_work_space_dir, text_model_inference_voice_dir, text_test_content_dir \ @@ -175,7 +177,7 @@ def model_inference(text_work_space_dir, text_role, text_model_inference_voice_d text_asr_audio_dir = os.path.join(inference_dir, params.inference_audio_text_aggregation_dir) - url_composer = audio_inference.URLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) + url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) url_composer.is_valid() text_list = common.read_text_file_to_list(text_test_content_dir) if text_list is None or len(text_list) == 0: @@ -187,7 +189,8 @@ def model_inference(text_work_space_dir, text_role, text_model_inference_voice_d time_consuming, _ = time_util.time_monitor(audio_inference.generate_audio_files_parallel)(url_composer, text_list, ref_audio_manager.get_ref_audio_list(), - inference_dir, 3) + inference_dir, + slider_request_concurrency_num) text_model_inference_info = f"耗时:{time_consuming:0.1f}秒;推理成功:生成目录{inference_dir}" @@ -267,7 +270,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): # 对asr生成的文件,与原本的文本内容,进行相似度分析 -def text_similarity_analysis(text_work_space_dir, text_role, +def text_similarity_analysis(text_work_space_dir, text_role, slider_text_similarity_amplification_boundary, text_text_similarity_analysis_path): text_work_space_dir, text_text_similarity_analysis_path \ = common.batch_clean_paths([text_work_space_dir, text_text_similarity_analysis_path]) @@ -281,7 +284,7 @@ def text_similarity_analysis(text_work_space_dir, text_role, similarity_dir = os.path.join(base_role_dir, params.text_similarity_output_dir) time_consuming, _ = time_util.time_monitor(open_text_similarity_analysis)(text_text_similarity_analysis_path, - similarity_dir) + similarity_dir, slider_text_similarity_amplification_boundary) text_text_similarity_analysis_info = f"耗时:{time_consuming:0.1f}秒;相似度分析成功:生成目录{similarity_dir}" @@ -357,10 +360,9 @@ def sync_ref_audio(text_work_space_dir, text_role, text_sync_ref_audio_dir, if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '': raise Exception("推理生成的音频目录不能为空") time_consuming, (delete_text_wav_num, delete_emotion_dir_num) \ - = time_util.time_monitor(delete_inference_with_ref.sync_ref_audio)(text_sync_ref_audio_dir, + = time_util.time_monitor(audio_check.sync_ref_audio)(text_sync_ref_audio_dir, text_sync_inference_audio_dir) - # delete_text_wav_num, delete_emotion_dir_num = delete_inference_with_ref.sync_ref_audio( - # text_sync_ref_audio_dir, text_sync_inference_audio_dir) + text_sync_ref_audio_info = (f"耗时:{time_consuming:0.1f}秒;推理音频目录{text_sync_inference_audio_dir}下," f"text目录删除了{delete_text_wav_num}个推理音频,emotion目录下,删除了{delete_emotion_dir_num}个目录") except Exception as e: @@ -402,7 +404,7 @@ def create_config(text_work_space_dir, text_role, text_template, text_sync_ref_a # 基于请求路径和参数,合成完整的请求路径 def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion): - url_composer = audio_inference.URLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) + url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) if url_composer.is_emotion(): text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False) else: @@ -410,6 +412,111 @@ def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion): return text_whole_url +def start_api(): + text_start_api_info = None + try: + proc = common.start_new_service('api.py') + text_start_api_info = "启动完成" + except Exception as e: + logger.error("发生异常: \n%s", traceback.format_exc()) + text_start_api_info = f"发生异常:{e}" + return text_start_api_info + + +def refresh_api_model(): + return ({"choices": model_manager.get_gpt_model_names(), "__type__": "update"}, + {"choices": model_manager.get_sovits_model_names(), "__type__": "update"}) + + +def api_set_model_whole_url(text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param): + url = audio_inference.SetModelURLComposer("all", text_api_set_model_base_url, text_api_gpt_param, text_api_sovits_param) + return url.build_get_url([dropdown_api_gpt_models, dropdown_api_sovits_models], False) + + +def start_api_set_model(text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param): + text_api_start_set_model_request_info = None + try: + if dropdown_api_gpt_models is None or dropdown_api_gpt_models == '': + raise Exception("GPT模型不能为空") + if dropdown_api_sovits_models is None or dropdown_api_sovits_models == '': + raise Exception("Sovits模型不能为空") + url = audio_inference.SetModelURLComposer("all", text_api_set_model_base_url, text_api_gpt_param, text_api_sovits_param) + url.is_valid() + time_consuming, result = time_util.time_monitor(audio_inference.start_api_set_model)(url, dropdown_api_gpt_models, dropdown_api_sovits_models) + text_api_start_set_model_request_info = f"耗时:{time_consuming:0.1f}秒;请求结果:{result}" + except Exception as e: + logger.error("发生异常: \n%s", traceback.format_exc()) + text_api_start_set_model_request_info = f"发生异常:{e}" + return text_api_start_set_model_request_info + + +def refresh_api_v2_gpt_model(): + return {"choices": model_manager.get_gpt_model_names(), "__type__": "update"} + + +def api_v2_set_gpt_whole_url(text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models): + url = audio_inference.SetModelURLComposer("gpt", text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, None) + return url.build_get_url([dropdown_api_v2_gpt_models], False) + +def start_api_v2_set_gpt_model(text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models): + text_api_v2_start_set_gpt_model_request_info = None + try: + if dropdown_api_v2_gpt_models is None or dropdown_api_v2_gpt_models == '': + raise Exception("GPT模型不能为空") + url = audio_inference.SetModelURLComposer("gpt", text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, None) + url.is_valid() + time_consuming, result = time_util.time_monitor(audio_inference.start_api_v2_set_gpt_model)(url, dropdown_api_v2_gpt_models) + text_api_v2_start_set_gpt_model_request_info = f"耗时:{time_consuming:0.1f}秒;请求结果:{result}" + except Exception as e: + logger.error("发生异常: \n%s", traceback.format_exc()) + text_api_v2_start_set_gpt_model_request_info = f"发生异常:{e}" + return text_api_v2_start_set_gpt_model_request_info + + +def refresh_api_v2_sovits_model(): + return {"choices": model_manager.get_sovits_model_names(), "__type__": "update"} + +def api_v2_set_sovits_whole_url(text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models): + url = audio_inference.SetModelURLComposer("sovits", text_api_v2_set_sovits_model_base_url, None, text_api_v2_sovits_model_param) + return url.build_get_url([dropdown_api_v2_sovits_models], False) + + +def start_api_v2_set_sovits_model(text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models): + text_api_v2_start_set_sovits_model_request_info = None + try: + if dropdown_api_v2_sovits_models is None or dropdown_api_v2_sovits_models == '': + raise Exception("Sovits模型不能为空") + url = audio_inference.SetModelURLComposer("sovits", text_api_v2_set_sovits_model_base_url, None, text_api_v2_sovits_model_param) + url.is_valid() + time_consuming, result = time_util.time_monitor(audio_inference.start_api_v2_set_sovits_model)(url, dropdown_api_v2_sovits_models) + text_api_v2_start_set_sovits_model_request_info = f"耗时:{time_consuming:0.1f}秒;请求结果:{result}" + except Exception as e: + logger.error("发生异常: \n%s", traceback.format_exc()) + text_api_v2_start_set_sovits_model_request_info = f"发生异常:{e}" + return text_api_v2_start_set_sovits_model_request_info + + +def open_file(file_path): + common.open_file(my_utils.clean_path(file_path)) + + +def delete_ref_audio_below_boundary(ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir, slider_audio_text_similarity_boundary): + text_delete_ref_audio_below_boundary_info = None + ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir = common.batch_clean_paths([ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir]) + try: + if ref_audio_path is None or ref_audio_path == '': + raise Exception("参考音频路径不能为空") + if text_text_similarity_result_path is None or text_text_similarity_result_path == '': + raise Exception("文本相似度结果路径不能为空") + time_consuming, count = time_util.time_monitor(text_check.delete_ref_audio_below_boundary)(ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir, slider_audio_text_similarity_boundary) + text_delete_ref_audio_below_boundary_info = f"耗时:{time_consuming:0.1f}秒;删除参考音频数量:{count}" + except Exception as e: + logger.error("发生异常: \n%s", traceback.format_exc()) + text_delete_ref_audio_below_boundary_info = f"发生异常:{e}" + return text_delete_ref_audio_below_boundary_info + + + def save_generate_audio_url(generate_audio_url): rw_param.write(rw_param.generate_audio_url, generate_audio_url) @@ -461,8 +568,8 @@ if __name__ == '__main__': text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"), value=default_work_space_dir) text_role = gr.Text(label=i18n("角色名称"), value=default_role) - text_work_space_dir.input(save_work_dir, [text_work_space_dir, text_role], [text_role]) - text_role.input(save_role, [text_role], []) + text_work_space_dir.blur(save_work_dir, [text_work_space_dir, text_role], [text_role]) + text_role.blur(save_role, [text_role], []) with gr.Tab(label=i18n("第一步:基于训练素材,生成待选参考音频列表")): gr.Markdown(value=i18n("1.1:选择list文件,并提取3-10秒的素材作为参考候选")) text_list_input = gr.Text(label=i18n("请输入list文件路径"), value="") @@ -493,47 +600,62 @@ if __name__ == '__main__': value=default_model_inference_voice_dir, interactive=True) gr.Markdown(value=i18n("2.1:启动推理服务,并配置模型参数")) with gr.Accordion(label=i18n("详情")): - with gr.Tab(label=i18n("主项目下api服务")): + with gr.Tab(label=i18n("主项目下api.py服务")): gr.Markdown(value=i18n("2.1.1:启动服务")) with gr.Row(): - gr.Button(i18n("启动api"), variant="primary") - gr.Text(label=i18n("api启动信息"), value="", interactive=False) + button_start_api = gr.Button(i18n("启动api"), variant="primary") + text_start_api_info = gr.Text(label=i18n("api启动信息"), value="", interactive=False) + button_start_api.click(start_api, [], [text_start_api_info]) gr.Markdown(value=i18n("2.1.2:设置模型参数")) - gr.Text(label=i18n("请输入api服务模型切换接口地址"), value="", interactive=True) + text_api_set_model_base_url = gr.Text(label=i18n("请输入api服务模型切换接口地址"), value="", interactive=True) with gr.Row(): - gr.Dropdown(label=i18n("GPT模型列表"), choices=[], value="", interactive=True) - gr.Dropdown(label=i18n("SoVITS模型列表"), choices=[], value="", interactive=True) - gr.Button(i18n("刷新模型路径"), variant="primary") + dropdown_api_gpt_models = gr.Dropdown(label=i18n("GPT模型列表"), choices=model_manager.get_gpt_model_names(), value="", interactive=True) + dropdown_api_sovits_models = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=model_manager.get_sovits_model_names(), value="", interactive=True) + button_refresh_api_model = gr.Button(i18n("刷新模型路径"), variant="primary") + button_refresh_api_model.click(refresh_api_model, [], [dropdown_api_gpt_models, dropdown_api_sovits_models]) with gr.Row(): - gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True) - gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True) + text_api_gpt_param = gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True) + text_api_sovits_param = gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True) gr.Markdown(value=i18n("2.1.3:发起设置请求")) - gr.Text(label=i18n("完整的模型参数设置请求地址"), value="", interactive=False) + text_api_set_model_whole_url = gr.Text(label=i18n("完整的模型参数设置请求地址"), value="", interactive=False) + dropdown_api_gpt_models.change(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url]) + dropdown_api_sovits_models.change(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url]) + text_api_gpt_param.input(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url]) + text_api_sovits_param.input(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url]) with gr.Row(): - gr.Button(i18n("发起模型设置请求"), variant="primary") - gr.Text(label=i18n("设置请求结果"), value="", interactive=False) - with gr.Tab(label=i18n("fast项目下api_v2服务")): + button_api_start_set_model_request = gr.Button(i18n("发起模型设置请求"), variant="primary") + text_api_start_set_model_request_info = gr.Text(label=i18n("设置请求结果"), value="", interactive=False) + button_api_start_set_model_request.click(start_api_set_model, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_start_set_model_request_info]) + with gr.Tab(label=i18n("fast项目下api_v2.py服务")): gr.Markdown(value=i18n("2.1.1:请到你的项目下,启动服务")) gr.Markdown(value=i18n("2.1.2:设置GPT模型参数")) - gr.Text(label=i18n("请输入api服务GPT模型切换接口地址"), value="", interactive=True) + text_api_v2_set_gpt_model_base_url = gr.Text(label=i18n("请输入api服务GPT模型切换接口地址"), value="", interactive=True) with gr.Row(): - gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True) - gr.Dropdown(label=i18n("GPT模型列表"), choices=[], value="", interactive=True) - gr.Button(i18n("刷新模型路径"), variant="primary") - gr.Text(label=i18n("完整的GPT模型参数设置请求地址"), value="", interactive=False) + text_api_v2_gpt_model_param = gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True) + dropdown_api_v2_gpt_models = gr.Dropdown(label=i18n("GPT模型列表"), choices=model_manager.get_gpt_model_names(), value="", interactive=True) + button_api_v2_refresh_gpt = gr.Button(i18n("刷新模型路径"), variant="primary") + button_api_v2_refresh_gpt.click(refresh_api_v2_gpt_model, [], [dropdown_api_v2_gpt_models]) + text_api_v2_set_gpt_model_whole_url = gr.Text(label=i18n("完整的GPT模型参数设置请求地址"), value="", interactive=False) + text_api_v2_gpt_model_param.input(api_v2_set_gpt_whole_url, [text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models], [text_api_v2_set_gpt_model_whole_url]) + dropdown_api_v2_gpt_models.change(api_v2_set_gpt_whole_url, [text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models], [text_api_v2_set_gpt_model_whole_url]) with gr.Row(): - gr.Button(i18n("发起GPT模型设置请求"), variant="primary") - gr.Text(label=i18n("设置请求结果"), value="", interactive=False) + button_api_v2_start_set_gpt_model_request = gr.Button(i18n("发起GPT模型设置请求"), variant="primary") + text_api_v2_start_set_gpt_model_request_info = gr.Text(label=i18n("设置请求结果"), value="", interactive=False) + button_api_v2_start_set_gpt_model_request.click(start_api_v2_set_gpt_model, [text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models], [text_api_v2_start_set_gpt_model_request_info]) gr.Markdown(value=i18n("2.1.3:设置SoVITS模型参数")) - gr.Text(label=i18n("请输入api服务SoVITS模型切换接口地址"), value="", interactive=True) + text_api_v2_set_sovits_model_base_url = gr.Text(label=i18n("请输入api服务SoVITS模型切换接口地址"), value="", interactive=True) with gr.Row(): - gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True) - gr.Dropdown(label=i18n("SoVITS模型列表"), choices=[], value="", interactive=True) - gr.Button(i18n("刷新模型路径"), variant="primary") - gr.Text(label=i18n("完整的SoVITS模型参数设置请求地址"), value="", interactive=False) + text_api_v2_sovits_model_param = gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True) + dropdown_api_v2_sovits_models = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=model_manager.get_sovits_model_names(), value="", interactive=True) + button_api_v2_refresh_sovits = gr.Button(i18n("刷新模型路径"), variant="primary") + button_api_v2_refresh_sovits.click(refresh_api_v2_sovits_model, [], [dropdown_api_v2_sovits_models]) + text_api_v2_set_sovits_model_whole_url = gr.Text(label=i18n("完整的SoVITS模型参数设置请求地址"), value="", interactive=False) + text_api_v2_sovits_model_param.input(api_v2_set_sovits_whole_url, [text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models], [text_api_v2_set_sovits_model_whole_url]) + dropdown_api_v2_sovits_models.change(api_v2_set_sovits_whole_url, [text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models], [text_api_v2_set_sovits_model_whole_url]) with gr.Row(): - gr.Button(i18n("发起SoVITS模型设置请求"), variant="primary") - gr.Text(label=i18n("设置请求结果"), value="", interactive=False) + button_api_v2_start_set_sovits_model_request = gr.Button(i18n("发起SoVITS模型设置请求"), variant="primary") + text_api_v2_start_set_sovits_model_request_info = gr.Text(label=i18n("设置请求结果"), value="", interactive=False) + button_api_v2_start_set_sovits_model_request.click(start_api_v2_set_sovits_model, [text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models], [text_api_v2_start_set_sovits_model_request_info]) with gr.Tab(label=i18n("第三方推理服务")): gr.Markdown(value=i18n("启动第三方推理服务,并完成参考音频打包,模型参数设置等操作")) gr.Markdown(value=i18n("2.2:配置推理服务参数信息,参考音频路径/文本和角色情绪二选一,如果是角色情绪,需要先执行第四步," @@ -567,7 +689,7 @@ if __name__ == '__main__': default_test_content_path = params.default_test_text_path text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path) gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的")) - gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3, + slider_request_concurrency_num = gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3, interactive=True) with gr.Row(): button_model_inference = gr.Button(i18n("开启批量推理"), variant="primary") @@ -606,7 +728,7 @@ if __name__ == '__main__': with gr.Row(): text_text_similarity_analysis_path = gr.Text(label=i18n("待分析的文件路径"), value=default_text_similarity_analysis_path, interactive=True) - gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("文本相似度放大边界"), value=0.90, + slider_text_similarity_amplification_boundary = gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("文本相似度放大边界"), value=0.90, interactive=True) button_asr.click(asr, [text_work_space_dir, text_role, text_asr_audio_dir, dropdown_asr_model, dropdown_asr_size, dropdown_asr_lang], @@ -615,18 +737,22 @@ if __name__ == '__main__': button_text_similarity_analysis = gr.Button(i18n("启动文本相似度分析"), variant="primary") text_text_similarity_analysis_info = gr.Text(label=i18n("文本相似度分析结果"), value="", interactive=False) - button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, text_role, + button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, text_role, slider_text_similarity_amplification_boundary, text_text_similarity_analysis_path], [text_text_similarity_analysis_info]) gr.Markdown(value=i18n("3.3:根据相似度分析结果,重点检查最后几条是否存在复读等问题")) with gr.Row(): - gr.Text(label=i18n("文本相似度分析结果文件所在路径"), value="", interactive=True) - gr.Button(i18n("打开文本相似度分析结果文件"), variant="primary") - gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("音频文本相似度边界值"), value=0.80, + text_text_similarity_result_path = gr.Text(label=i18n("文本相似度分析结果文件所在路径"), value="", interactive=True) + button_open_text_similarity_result = gr.Button(i18n("打开文本相似度分析结果文件"), variant="primary") + button_open_text_similarity_result.click(open_file, [text_text_similarity_result_path], []) + slider_audio_text_similarity_boundary = gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("音频文本相似度边界值"), value=0.80, interactive=True) + text_sync_inference_audio_dir2 = gr.Text(label=i18n("被同步的推理音频路径"), + value="", interactive=True) with gr.Row(): - gr.Button(i18n("删除音频文本相似度边界值以下的参考音频"), variant="primary") - gr.Text(label=i18n("删除结果"), value="", interactive=True) + button_delete_ref_audio_below_boundary = gr.Button(i18n("删除音频文本相似度边界值以下的参考音频"), variant="primary") + text_delete_ref_audio_below_boundary_info = gr.Text(label=i18n("删除结果"), value="", interactive=True) + button_delete_ref_audio_below_boundary.click(delete_ref_audio_below_boundary, [text_model_inference_voice_dir, text_text_similarity_result_path, text_sync_inference_audio_dir2, slider_audio_text_similarity_boundary], [text_delete_ref_audio_below_boundary_info]) with gr.Tab(label=i18n("第四步:校验参考音频音质")): gr.Markdown(value=i18n("4.1:对结果按音频相似度排序,或许有用吧,主要还是耳朵听")) with gr.Row(): @@ -676,7 +802,7 @@ if __name__ == '__main__': [text_sample_info, text_model_inference_voice_dir, text_sync_ref_audio_dir, text_sync_ref_audio_dir2]) button_model_inference.click(model_inference, - [text_work_space_dir, text_role, text_model_inference_voice_dir, text_url, + [text_work_space_dir, text_role, slider_request_concurrency_num, text_model_inference_voice_dir, text_url, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content], [text_model_inference_info, text_asr_audio_dir, text_sync_inference_audio_dir]) diff --git a/Ref_Audio_Selector/tool/delete_inference_with_ref.py b/Ref_Audio_Selector/tool/audio_check.py similarity index 100% rename from Ref_Audio_Selector/tool/delete_inference_with_ref.py rename to Ref_Audio_Selector/tool/audio_check.py diff --git a/Ref_Audio_Selector/tool/audio_inference.py b/Ref_Audio_Selector/tool/audio_inference.py index f6b4569..2535dec 100644 --- a/Ref_Audio_Selector/tool/audio_inference.py +++ b/Ref_Audio_Selector/tool/audio_inference.py @@ -3,16 +3,55 @@ import os import requests import itertools import multiprocessing -from multiprocessing import Pool from concurrent.futures import ProcessPoolExecutor import numpy as np import Ref_Audio_Selector.config_param.config_params as params -from Ref_Audio_Selector.common.time_util import timeit_decorator from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote from Ref_Audio_Selector.config_param.log_config import logger, p_logger -class URLComposer: +class SetModelURLComposer: + def __init__(self, type, base_url, gpt_param_name, sovits_param_name): + self.type = type + self.base_url = base_url + self.gpt_param_name = gpt_param_name + self.sovits_param_name = sovits_param_name + + def is_valid(self): + if self.base_url is None or self.base_url == '': + raise Exception("请求地址不能为空") + if self.type in ['gpt', 'all']: + if self.gpt_param_name is None or self.gpt_param_name == '': + raise Exception("GPT参数名不能为空") + if self.type in ['sovits', 'all']: + if self.sovits_param_name is None or self.sovits_param_name == '': + raise Exception("Sovits参数名不能为空") + + def build_get_url(self, value_array, need_url_encode=True): + params = {} + if self.type == 'gpt': + params[self.gpt_param_name] = value_array[0] + if self.type == 'sovits': + params[self.sovits_param_name] = value_array[0] + if self.type == 'all': + params[self.gpt_param_name] = value_array[0] + params[self.sovits_param_name] = value_array[1] + return append_params_to_url(self.base_url, params, need_url_encode) + + def build_post_url(self, value_array, need_url_encode=True): + url = append_params_to_url(self.base_url, {}, need_url_encode) + params = {} + if self.type == 'gpt': + params[self.gpt_param_name] = value_array[0] + if self.type == 'sovits': + params[self.sovits_param_name] = value_array[0] + if self.type == 'all': + params[self.gpt_param_name] = value_array[0] + params[self.sovits_param_name] = value_array[1] + return url, params + + +class TTSURLComposer: def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name): self.base_url = base_url self.emotion_param_name = emotion_param_name @@ -34,30 +73,26 @@ class URLComposer: return self.emotion_param_name is not None and self.emotion_param_name != '' def build_url_with_emotion(self, text_value, emotion_value, need_url_encode=True): - if not self.emotion_param_name: - raise ValueError("Emotion parameter name is not set.") params = { self.text_param_name: text_value, self.emotion_param_name: emotion_value, } - return self._append_params_to_url(params, need_url_encode) + return append_params_to_url(self.base_url, params, need_url_encode) def build_url_with_ref(self, text_value, ref_path_value, ref_text_value, need_url_encode=True): - if self.emotion_param_name: - raise ValueError("Cannot use reference parameters when emotion parameter is set.") params = { self.text_param_name: text_value, self.ref_path_param_name: ref_path_value, self.ref_text_param_name: ref_text_value, } - return self._append_params_to_url(params, need_url_encode) + return append_params_to_url(self.base_url, params, need_url_encode) - def _append_params_to_url(self, params, need_url_encode): - url_with_params = self.base_url - if params: - query_params = '&'.join([f"{k}={v}" for k, v in params.items()]) - url_with_params += '?' + query_params if '?' not in self.base_url else '&' + query_params - return url_with_params if not need_url_encode else safe_encode_query_params(url_with_params) + +def append_params_to_url(url_with_params, params, need_url_encode): + if params: + query_params = '&'.join([f"{k}={v}" for k, v in params.items()]) + url_with_params += '?' + query_params if '?' not in url_with_params else '&' + query_params + return url_with_params if not need_url_encode else safe_encode_query_params(url_with_params) def safe_encode_query_params(original_url): @@ -87,8 +122,9 @@ def generate_audio_files_parallel(url_composer, text_list, emotion_list, output_ emotion_groups = np.array_split(emotion_list, num_processes) with ProcessPoolExecutor(max_workers=num_processes) as executor: - futures = [executor.submit(generate_audio_files_for_emotion_group, url_composer, text_list, group, output_dir_path) - for group in emotion_groups] + futures = [ + executor.submit(generate_audio_files_for_emotion_group, url_composer, text_list, group, output_dir_path) + for group in emotion_groups] for future in futures: future.result() # 等待所有进程完成 @@ -162,3 +198,33 @@ def inference_audio_from_api(url): return response.content else: raise Exception(f"Failed to fetch audio from API. Server responded with status code {response.status_code}.") + + +def start_api_set_model(set_model_url_composer, gpt_models, sovits_models): + url, post_body = set_model_url_composer.build_post_url(gpt_models, sovits_models) + response = requests.post(url, json=post_body) + if response.status_code == 200: + result = response.text + return result + else: + return f'请求失败,状态码:{response.status_code}' + + +def start_api_v2_set_gpt_model(set_model_url_composer, gpt_models): + url = set_model_url_composer.build_get_url([gpt_models]) + response = requests.get(url) + if response.status_code == 200: + result = response.text + return result + else: + return f'请求失败,状态码:{response.status_code}' + + +def start_api_v2_set_sovits_model(set_model_url_composer, sovits_models): + url = set_model_url_composer.build_get_url([sovits_models]) + response = requests.get(url) + if response.status_code == 200: + result = response.text + return result + else: + return f'请求失败,状态码:{response.status_code}' diff --git a/Ref_Audio_Selector/tool/model_manager.py b/Ref_Audio_Selector/tool/model_manager.py new file mode 100644 index 0000000..53e352e --- /dev/null +++ b/Ref_Audio_Selector/tool/model_manager.py @@ -0,0 +1,34 @@ +import os +import re + +pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth" +pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +SoVITS_weight_root = "SoVITS_weights" +GPT_weight_root = "GPT_weights" +os.makedirs(SoVITS_weight_root, exist_ok=True) +os.makedirs(GPT_weight_root, exist_ok=True) + + +def custom_sort_key(s): + # 使用正则表达式提取字符串中的数字部分和非数字部分 + parts = re.split('(\d+)', s) + # 将数字部分转换为整数,非数字部分保持不变 + parts = [int(part) if part.isdigit() else part for part in parts] + return parts + + +def get_gpt_model_names(): + gpt_names = [pretrained_gpt_name] + for name in os.listdir(SoVITS_weight_root): + if name.endswith(".ckpt"): gpt_names.append("%s/%s" % (SoVITS_weight_root, name)) + sorted(gpt_names, key=custom_sort_key) + return gpt_names + + +def get_sovits_model_names(): + sovits_names = [pretrained_sovits_name] + for name in os.listdir(GPT_weight_root): + if name.endswith(".pth"): sovits_names.append("%s/%s" % (GPT_weight_root, name)) + sorted(sovits_names, key=custom_sort_key) + return sovits_names + diff --git a/Ref_Audio_Selector/tool/text_check.py b/Ref_Audio_Selector/tool/text_check.py new file mode 100644 index 0000000..98c299d --- /dev/null +++ b/Ref_Audio_Selector/tool/text_check.py @@ -0,0 +1,77 @@ +import os +import Ref_Audio_Selector.common.common as common +import Ref_Audio_Selector.tool.audio_check as audio_check +from Ref_Audio_Selector.config_param.log_config import logger + + +def parse_text_similarity_result_txt(file_path): + """ + 解析指定格式的txt文件,每行格式:f"{item['average_similarity_score']}|{item['count']}|{item['emotion']}" + + :param file_path: txt文件的路径 + :return: 包含解析后数据的字典列表 + """ + data_list = [] + with open(file_path, 'r', encoding='utf-8') as file: + for line in file: + # 使用'|'作为分隔符分割每行数据 + parts = line.strip().split('|') + if len(parts) == 3: + # 将分割后的字符串转换为浮点数、整数和字符串 + try: + item = { + 'average_similarity_score': float(parts[0]), + 'count': int(parts[1]), + 'emotion': parts[2] + } + data_list.append(item) + except ValueError as e: + # 如果转换失败,打印错误信息并跳过该行 + logger.error(f"Error parsing line: {line.strip()} - {e}") + + return data_list + + +def remove_low_similarity_files(ref_audio_list, report_list, audio_text_similarity_boundary): + """ + 根据条件删除低相似度音频文件并返回删除数量。 + + :param ref_audio_list: 包含音频路径和情感属性的列表 + :param report_list: 包含相似度评分和情感属性的列表 + :param audio_text_similarity_boundary: 相似度阈值 + :return: 删除的文件数量 + """ + deleted_count = 0 + + # 筛选出平均相似度低于阈值的报告 + low_similarity_reports = [report for report in report_list if + report['average_similarity_score'] < audio_text_similarity_boundary] + + # 遍历低相似度报告,查找并删除对应音频文件 + for report in low_similarity_reports: + emotion = report['emotion'] + # 查找ref_audio_list中相同情感的音频文件路径 + matching_refs = [ref for ref in ref_audio_list if ref['emotion'] == emotion] + for match in matching_refs: + ref_path = match['ref_path'] + # 检查文件是否存在,然后尝试删除 + if os.path.exists(ref_path): + try: + os.remove(ref_path) + deleted_count += 1 + logger.info(f"Deleted file: {ref_path}") + except Exception as e: + logger.error(f"Error deleting file {ref_path}: {e}") + else: + logger.error(f"File not found: {ref_path}") + + return deleted_count + + +def delete_ref_audio_below_boundary(ref_audio_path, text_similarity_result_path, sync_inference_audio_dir, + audio_text_similarity_boundary): + ref_audio_list = common.RefAudioListManager(ref_audio_path) + report_list = parse_text_similarity_result_txt(text_similarity_result_path) + count = remove_low_similarity_files(ref_audio_list, report_list, audio_text_similarity_boundary) + audio_check.sync_ref_audio(ref_audio_path, sync_inference_audio_dir) + return count diff --git a/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py b/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py index 96ce218..c4eccd6 100644 --- a/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py +++ b/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py @@ -56,7 +56,7 @@ def calculate_average_similarity_by_emotion(data_list): similarity_score = item['similarity_score'] result_dict[emotion].append(similarity_score) - average_scores = [{'emotion': emotion, 'average_similarity_score': sum(scores) / len(scores)} + average_scores = [{'emotion': emotion, 'average_similarity_score': sum(scores) / len(scores), 'count': len(scores)} for emotion, scores in result_dict.items()] average_scores.sort(key=lambda x: x['average_similarity_score'], reverse=True) @@ -123,7 +123,7 @@ def process(asr_file_path, output_dir, similarity_enlarge_boundary): average_similarity_file = os.path.join(output_dir, f'{params.text_emotion_average_similarity_report_filename}.txt') average_similarity_content = \ - '\n'.join([f"{item['average_similarity_score']}|{item['emotion']}" for item in average_similarity_list]) + '\n'.join([f"{item['average_similarity_score']}|{item['count']}|{item['emotion']}" for item in average_similarity_list]) common.write_text_to_file(average_similarity_content, average_similarity_file) emotion_detail_list = group_and_sort_by_field(records, 'emotion')