添加事件绑定和实现

This commit is contained in:
Downupanddownup 2024-04-28 14:10:02 +08:00
parent 25b65cdfd0
commit 9264f7e38e
7 changed files with 401 additions and 67 deletions

View File

@ -1,4 +1,7 @@
from tools import my_utils from tools import my_utils
from config import python_exec, is_half
import subprocess
import sys
import os import os
@ -118,6 +121,34 @@ def check_path_existence_and_return(path):
return "" return ""
def open_file(filepath):
if sys.platform.startswith('darwin'):
subprocess.run(['open', filepath]) # macOS
elif os.name == 'nt': # For Windows
os.startfile(filepath)
elif os.name == 'posix': # For Linux, Unix, etc.
subprocess.run(['xdg-open', filepath])
def start_new_service(script_path):
# 对于Windows系统
if sys.platform.startswith('win'):
cmd = f'start cmd /k {python_exec} {script_path}'
# 对于Mac或者Linux系统
else:
cmd = f'xterm -e {python_exec} {script_path}'
proc = subprocess.Popen(cmd, shell=True)
# 关闭之前启动的子进程
# proc.terminate()
# 或者如果需要强制关闭可以使用
# proc.kill()
return proc
if __name__ == '__main__': if __name__ == '__main__':
dir = r'C:\Users\Administrator\Desktop/test' dir = r'C:\Users\Administrator\Desktop/test'
dir2 = r'"C:\Users\Administrator\Desktop\test2"' dir2 = r'"C:\Users\Administrator\Desktop\test2"'

View File

@ -6,10 +6,12 @@ import gradio as gr
from Ref_Audio_Selector.config_param.log_config import logger from Ref_Audio_Selector.config_param.log_config import logger
import Ref_Audio_Selector.tool.model_manager as model_manager
import Ref_Audio_Selector.tool.audio_similarity as audio_similarity import Ref_Audio_Selector.tool.audio_similarity as audio_similarity
import Ref_Audio_Selector.tool.audio_inference as audio_inference import Ref_Audio_Selector.tool.audio_inference as audio_inference
import Ref_Audio_Selector.tool.audio_config as audio_config import Ref_Audio_Selector.tool.audio_config as audio_config
import Ref_Audio_Selector.tool.delete_inference_with_ref as delete_inference_with_ref import Ref_Audio_Selector.tool.audio_check as audio_check
import Ref_Audio_Selector.tool.text_check as text_check
import Ref_Audio_Selector.common.common as common import Ref_Audio_Selector.common.common as common
import Ref_Audio_Selector.config_param.config_params as params import Ref_Audio_Selector.config_param.config_params as params
import Ref_Audio_Selector.common.time_util as time_util import Ref_Audio_Selector.common.time_util as time_util
@ -148,7 +150,7 @@ def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path
# 根据参考音频和测试文本,执行批量推理 # 根据参考音频和测试文本,执行批量推理
def model_inference(text_work_space_dir, text_role, text_model_inference_voice_dir, text_url, def model_inference(text_work_space_dir, text_role, slider_request_concurrency_num, text_model_inference_voice_dir, text_url,
text_text, text_ref_path, text_ref_text, text_emotion, text_text, text_ref_path, text_ref_text, text_emotion,
text_test_content_dir): text_test_content_dir):
text_work_space_dir, text_model_inference_voice_dir, text_test_content_dir \ text_work_space_dir, text_model_inference_voice_dir, text_test_content_dir \
@ -175,7 +177,7 @@ def model_inference(text_work_space_dir, text_role, text_model_inference_voice_d
text_asr_audio_dir = os.path.join(inference_dir, text_asr_audio_dir = os.path.join(inference_dir,
params.inference_audio_text_aggregation_dir) params.inference_audio_text_aggregation_dir)
url_composer = audio_inference.URLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text)
url_composer.is_valid() url_composer.is_valid()
text_list = common.read_text_file_to_list(text_test_content_dir) text_list = common.read_text_file_to_list(text_test_content_dir)
if text_list is None or len(text_list) == 0: if text_list is None or len(text_list) == 0:
@ -187,7 +189,8 @@ def model_inference(text_work_space_dir, text_role, text_model_inference_voice_d
time_consuming, _ = time_util.time_monitor(audio_inference.generate_audio_files_parallel)(url_composer, time_consuming, _ = time_util.time_monitor(audio_inference.generate_audio_files_parallel)(url_composer,
text_list, text_list,
ref_audio_manager.get_ref_audio_list(), ref_audio_manager.get_ref_audio_list(),
inference_dir, 3) inference_dir,
slider_request_concurrency_num)
text_model_inference_info = f"耗时:{time_consuming:0.1f}秒;推理成功:生成目录{inference_dir}" text_model_inference_info = f"耗时:{time_consuming:0.1f}秒;推理成功:生成目录{inference_dir}"
@ -267,7 +270,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
# 对asr生成的文件与原本的文本内容进行相似度分析 # 对asr生成的文件与原本的文本内容进行相似度分析
def text_similarity_analysis(text_work_space_dir, text_role, def text_similarity_analysis(text_work_space_dir, text_role, slider_text_similarity_amplification_boundary,
text_text_similarity_analysis_path): text_text_similarity_analysis_path):
text_work_space_dir, text_text_similarity_analysis_path \ text_work_space_dir, text_text_similarity_analysis_path \
= common.batch_clean_paths([text_work_space_dir, text_text_similarity_analysis_path]) = common.batch_clean_paths([text_work_space_dir, text_text_similarity_analysis_path])
@ -281,7 +284,7 @@ def text_similarity_analysis(text_work_space_dir, text_role,
similarity_dir = os.path.join(base_role_dir, params.text_similarity_output_dir) similarity_dir = os.path.join(base_role_dir, params.text_similarity_output_dir)
time_consuming, _ = time_util.time_monitor(open_text_similarity_analysis)(text_text_similarity_analysis_path, time_consuming, _ = time_util.time_monitor(open_text_similarity_analysis)(text_text_similarity_analysis_path,
similarity_dir) similarity_dir, slider_text_similarity_amplification_boundary)
text_text_similarity_analysis_info = f"耗时:{time_consuming:0.1f}秒;相似度分析成功:生成目录{similarity_dir}" text_text_similarity_analysis_info = f"耗时:{time_consuming:0.1f}秒;相似度分析成功:生成目录{similarity_dir}"
@ -357,10 +360,9 @@ def sync_ref_audio(text_work_space_dir, text_role, text_sync_ref_audio_dir,
if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '': if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '':
raise Exception("推理生成的音频目录不能为空") raise Exception("推理生成的音频目录不能为空")
time_consuming, (delete_text_wav_num, delete_emotion_dir_num) \ time_consuming, (delete_text_wav_num, delete_emotion_dir_num) \
= time_util.time_monitor(delete_inference_with_ref.sync_ref_audio)(text_sync_ref_audio_dir, = time_util.time_monitor(audio_check.sync_ref_audio)(text_sync_ref_audio_dir,
text_sync_inference_audio_dir) text_sync_inference_audio_dir)
# delete_text_wav_num, delete_emotion_dir_num = delete_inference_with_ref.sync_ref_audio(
# text_sync_ref_audio_dir, text_sync_inference_audio_dir)
text_sync_ref_audio_info = (f"耗时:{time_consuming:0.1f}秒;推理音频目录{text_sync_inference_audio_dir}下," text_sync_ref_audio_info = (f"耗时:{time_consuming:0.1f}秒;推理音频目录{text_sync_inference_audio_dir}下,"
f"text目录删除了{delete_text_wav_num}个推理音频emotion目录下删除了{delete_emotion_dir_num}个目录") f"text目录删除了{delete_text_wav_num}个推理音频emotion目录下删除了{delete_emotion_dir_num}个目录")
except Exception as e: except Exception as e:
@ -402,7 +404,7 @@ def create_config(text_work_space_dir, text_role, text_template, text_sync_ref_a
# 基于请求路径和参数,合成完整的请求路径 # 基于请求路径和参数,合成完整的请求路径
def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion): def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion):
url_composer = audio_inference.URLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text) url_composer = audio_inference.TTSURLComposer(text_url, text_emotion, text_text, text_ref_path, text_ref_text)
if url_composer.is_emotion(): if url_composer.is_emotion():
text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False) text_whole_url = url_composer.build_url_with_emotion('测试内容', '情绪类型', False)
else: else:
@ -410,6 +412,111 @@ def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion):
return text_whole_url return text_whole_url
def start_api():
text_start_api_info = None
try:
proc = common.start_new_service('api.py')
text_start_api_info = "启动完成"
except Exception as e:
logger.error("发生异常: \n%s", traceback.format_exc())
text_start_api_info = f"发生异常:{e}"
return text_start_api_info
def refresh_api_model():
return ({"choices": model_manager.get_gpt_model_names(), "__type__": "update"},
{"choices": model_manager.get_sovits_model_names(), "__type__": "update"})
def api_set_model_whole_url(text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param):
url = audio_inference.SetModelURLComposer("all", text_api_set_model_base_url, text_api_gpt_param, text_api_sovits_param)
return url.build_get_url([dropdown_api_gpt_models, dropdown_api_sovits_models], False)
def start_api_set_model(text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param):
text_api_start_set_model_request_info = None
try:
if dropdown_api_gpt_models is None or dropdown_api_gpt_models == '':
raise Exception("GPT模型不能为空")
if dropdown_api_sovits_models is None or dropdown_api_sovits_models == '':
raise Exception("Sovits模型不能为空")
url = audio_inference.SetModelURLComposer("all", text_api_set_model_base_url, text_api_gpt_param, text_api_sovits_param)
url.is_valid()
time_consuming, result = time_util.time_monitor(audio_inference.start_api_set_model)(url, dropdown_api_gpt_models, dropdown_api_sovits_models)
text_api_start_set_model_request_info = f"耗时:{time_consuming:0.1f}秒;请求结果:{result}"
except Exception as e:
logger.error("发生异常: \n%s", traceback.format_exc())
text_api_start_set_model_request_info = f"发生异常:{e}"
return text_api_start_set_model_request_info
def refresh_api_v2_gpt_model():
return {"choices": model_manager.get_gpt_model_names(), "__type__": "update"}
def api_v2_set_gpt_whole_url(text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models):
url = audio_inference.SetModelURLComposer("gpt", text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, None)
return url.build_get_url([dropdown_api_v2_gpt_models], False)
def start_api_v2_set_gpt_model(text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models):
text_api_v2_start_set_gpt_model_request_info = None
try:
if dropdown_api_v2_gpt_models is None or dropdown_api_v2_gpt_models == '':
raise Exception("GPT模型不能为空")
url = audio_inference.SetModelURLComposer("gpt", text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, None)
url.is_valid()
time_consuming, result = time_util.time_monitor(audio_inference.start_api_v2_set_gpt_model)(url, dropdown_api_v2_gpt_models)
text_api_v2_start_set_gpt_model_request_info = f"耗时:{time_consuming:0.1f}秒;请求结果:{result}"
except Exception as e:
logger.error("发生异常: \n%s", traceback.format_exc())
text_api_v2_start_set_gpt_model_request_info = f"发生异常:{e}"
return text_api_v2_start_set_gpt_model_request_info
def refresh_api_v2_sovits_model():
return {"choices": model_manager.get_sovits_model_names(), "__type__": "update"}
def api_v2_set_sovits_whole_url(text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models):
url = audio_inference.SetModelURLComposer("sovits", text_api_v2_set_sovits_model_base_url, None, text_api_v2_sovits_model_param)
return url.build_get_url([dropdown_api_v2_sovits_models], False)
def start_api_v2_set_sovits_model(text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models):
text_api_v2_start_set_sovits_model_request_info = None
try:
if dropdown_api_v2_sovits_models is None or dropdown_api_v2_sovits_models == '':
raise Exception("Sovits模型不能为空")
url = audio_inference.SetModelURLComposer("sovits", text_api_v2_set_sovits_model_base_url, None, text_api_v2_sovits_model_param)
url.is_valid()
time_consuming, result = time_util.time_monitor(audio_inference.start_api_v2_set_sovits_model)(url, dropdown_api_v2_sovits_models)
text_api_v2_start_set_sovits_model_request_info = f"耗时:{time_consuming:0.1f}秒;请求结果:{result}"
except Exception as e:
logger.error("发生异常: \n%s", traceback.format_exc())
text_api_v2_start_set_sovits_model_request_info = f"发生异常:{e}"
return text_api_v2_start_set_sovits_model_request_info
def open_file(file_path):
common.open_file(my_utils.clean_path(file_path))
def delete_ref_audio_below_boundary(ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir, slider_audio_text_similarity_boundary):
text_delete_ref_audio_below_boundary_info = None
ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir = common.batch_clean_paths([ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir])
try:
if ref_audio_path is None or ref_audio_path == '':
raise Exception("参考音频路径不能为空")
if text_text_similarity_result_path is None or text_text_similarity_result_path == '':
raise Exception("文本相似度结果路径不能为空")
time_consuming, count = time_util.time_monitor(text_check.delete_ref_audio_below_boundary)(ref_audio_path, text_text_similarity_result_path, text_sync_inference_audio_dir, slider_audio_text_similarity_boundary)
text_delete_ref_audio_below_boundary_info = f"耗时:{time_consuming:0.1f}秒;删除参考音频数量:{count}"
except Exception as e:
logger.error("发生异常: \n%s", traceback.format_exc())
text_delete_ref_audio_below_boundary_info = f"发生异常:{e}"
return text_delete_ref_audio_below_boundary_info
def save_generate_audio_url(generate_audio_url): def save_generate_audio_url(generate_audio_url):
rw_param.write(rw_param.generate_audio_url, generate_audio_url) rw_param.write(rw_param.generate_audio_url, generate_audio_url)
@ -461,8 +568,8 @@ if __name__ == '__main__':
text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"), text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"),
value=default_work_space_dir) value=default_work_space_dir)
text_role = gr.Text(label=i18n("角色名称"), value=default_role) text_role = gr.Text(label=i18n("角色名称"), value=default_role)
text_work_space_dir.input(save_work_dir, [text_work_space_dir, text_role], [text_role]) text_work_space_dir.blur(save_work_dir, [text_work_space_dir, text_role], [text_role])
text_role.input(save_role, [text_role], []) text_role.blur(save_role, [text_role], [])
with gr.Tab(label=i18n("第一步:基于训练素材,生成待选参考音频列表")): with gr.Tab(label=i18n("第一步:基于训练素材,生成待选参考音频列表")):
gr.Markdown(value=i18n("1.1选择list文件并提取3-10秒的素材作为参考候选")) gr.Markdown(value=i18n("1.1选择list文件并提取3-10秒的素材作为参考候选"))
text_list_input = gr.Text(label=i18n("请输入list文件路径"), value="") text_list_input = gr.Text(label=i18n("请输入list文件路径"), value="")
@ -493,47 +600,62 @@ if __name__ == '__main__':
value=default_model_inference_voice_dir, interactive=True) value=default_model_inference_voice_dir, interactive=True)
gr.Markdown(value=i18n("2.1:启动推理服务,并配置模型参数")) gr.Markdown(value=i18n("2.1:启动推理服务,并配置模型参数"))
with gr.Accordion(label=i18n("详情")): with gr.Accordion(label=i18n("详情")):
with gr.Tab(label=i18n("主项目下api服务")): with gr.Tab(label=i18n("主项目下api.py服务")):
gr.Markdown(value=i18n("2.1.1:启动服务")) gr.Markdown(value=i18n("2.1.1:启动服务"))
with gr.Row(): with gr.Row():
gr.Button(i18n("启动api"), variant="primary") button_start_api = gr.Button(i18n("启动api"), variant="primary")
gr.Text(label=i18n("api启动信息"), value="", interactive=False) text_start_api_info = gr.Text(label=i18n("api启动信息"), value="", interactive=False)
button_start_api.click(start_api, [], [text_start_api_info])
gr.Markdown(value=i18n("2.1.2:设置模型参数")) gr.Markdown(value=i18n("2.1.2:设置模型参数"))
gr.Text(label=i18n("请输入api服务模型切换接口地址"), value="", interactive=True) text_api_set_model_base_url = gr.Text(label=i18n("请输入api服务模型切换接口地址"), value="", interactive=True)
with gr.Row(): with gr.Row():
gr.Dropdown(label=i18n("GPT模型列表"), choices=[], value="", interactive=True) dropdown_api_gpt_models = gr.Dropdown(label=i18n("GPT模型列表"), choices=model_manager.get_gpt_model_names(), value="", interactive=True)
gr.Dropdown(label=i18n("SoVITS模型列表"), choices=[], value="", interactive=True) dropdown_api_sovits_models = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=model_manager.get_sovits_model_names(), value="", interactive=True)
gr.Button(i18n("刷新模型路径"), variant="primary") button_refresh_api_model = gr.Button(i18n("刷新模型路径"), variant="primary")
button_refresh_api_model.click(refresh_api_model, [], [dropdown_api_gpt_models, dropdown_api_sovits_models])
with gr.Row(): with gr.Row():
gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True) text_api_gpt_param = gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True)
gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True) text_api_sovits_param = gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True)
gr.Markdown(value=i18n("2.1.3:发起设置请求")) gr.Markdown(value=i18n("2.1.3:发起设置请求"))
gr.Text(label=i18n("完整的模型参数设置请求地址"), value="", interactive=False) text_api_set_model_whole_url = gr.Text(label=i18n("完整的模型参数设置请求地址"), value="", interactive=False)
dropdown_api_gpt_models.change(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url])
dropdown_api_sovits_models.change(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url])
text_api_gpt_param.input(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url])
text_api_sovits_param.input(api_set_model_whole_url, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_set_model_whole_url])
with gr.Row(): with gr.Row():
gr.Button(i18n("发起模型设置请求"), variant="primary") button_api_start_set_model_request = gr.Button(i18n("发起模型设置请求"), variant="primary")
gr.Text(label=i18n("设置请求结果"), value="", interactive=False) text_api_start_set_model_request_info = gr.Text(label=i18n("设置请求结果"), value="", interactive=False)
with gr.Tab(label=i18n("fast项目下api_v2服务")): button_api_start_set_model_request.click(start_api_set_model, [text_api_set_model_base_url, dropdown_api_gpt_models, dropdown_api_sovits_models, text_api_gpt_param, text_api_sovits_param], [text_api_start_set_model_request_info])
with gr.Tab(label=i18n("fast项目下api_v2.py服务")):
gr.Markdown(value=i18n("2.1.1:请到你的项目下,启动服务")) gr.Markdown(value=i18n("2.1.1:请到你的项目下,启动服务"))
gr.Markdown(value=i18n("2.1.2设置GPT模型参数")) gr.Markdown(value=i18n("2.1.2设置GPT模型参数"))
gr.Text(label=i18n("请输入api服务GPT模型切换接口地址"), value="", interactive=True) text_api_v2_set_gpt_model_base_url = gr.Text(label=i18n("请输入api服务GPT模型切换接口地址"), value="", interactive=True)
with gr.Row(): with gr.Row():
gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True) text_api_v2_gpt_model_param = gr.Text(label=i18n("GPT模型参数名"), value="", interactive=True)
gr.Dropdown(label=i18n("GPT模型列表"), choices=[], value="", interactive=True) dropdown_api_v2_gpt_models = gr.Dropdown(label=i18n("GPT模型列表"), choices=model_manager.get_gpt_model_names(), value="", interactive=True)
gr.Button(i18n("刷新模型路径"), variant="primary") button_api_v2_refresh_gpt = gr.Button(i18n("刷新模型路径"), variant="primary")
gr.Text(label=i18n("完整的GPT模型参数设置请求地址"), value="", interactive=False) button_api_v2_refresh_gpt.click(refresh_api_v2_gpt_model, [], [dropdown_api_v2_gpt_models])
text_api_v2_set_gpt_model_whole_url = gr.Text(label=i18n("完整的GPT模型参数设置请求地址"), value="", interactive=False)
text_api_v2_gpt_model_param.input(api_v2_set_gpt_whole_url, [text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models], [text_api_v2_set_gpt_model_whole_url])
dropdown_api_v2_gpt_models.change(api_v2_set_gpt_whole_url, [text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models], [text_api_v2_set_gpt_model_whole_url])
with gr.Row(): with gr.Row():
gr.Button(i18n("发起GPT模型设置请求"), variant="primary") button_api_v2_start_set_gpt_model_request = gr.Button(i18n("发起GPT模型设置请求"), variant="primary")
gr.Text(label=i18n("设置请求结果"), value="", interactive=False) text_api_v2_start_set_gpt_model_request_info = gr.Text(label=i18n("设置请求结果"), value="", interactive=False)
button_api_v2_start_set_gpt_model_request.click(start_api_v2_set_gpt_model, [text_api_v2_set_gpt_model_base_url, text_api_v2_gpt_model_param, dropdown_api_v2_gpt_models], [text_api_v2_start_set_gpt_model_request_info])
gr.Markdown(value=i18n("2.1.3设置SoVITS模型参数")) gr.Markdown(value=i18n("2.1.3设置SoVITS模型参数"))
gr.Text(label=i18n("请输入api服务SoVITS模型切换接口地址"), value="", interactive=True) text_api_v2_set_sovits_model_base_url = gr.Text(label=i18n("请输入api服务SoVITS模型切换接口地址"), value="", interactive=True)
with gr.Row(): with gr.Row():
gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True) text_api_v2_sovits_model_param = gr.Text(label=i18n("SoVITS模型参数名"), value="", interactive=True)
gr.Dropdown(label=i18n("SoVITS模型列表"), choices=[], value="", interactive=True) dropdown_api_v2_sovits_models = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=model_manager.get_sovits_model_names(), value="", interactive=True)
gr.Button(i18n("刷新模型路径"), variant="primary") button_api_v2_refresh_sovits = gr.Button(i18n("刷新模型路径"), variant="primary")
gr.Text(label=i18n("完整的SoVITS模型参数设置请求地址"), value="", interactive=False) button_api_v2_refresh_sovits.click(refresh_api_v2_sovits_model, [], [dropdown_api_v2_sovits_models])
text_api_v2_set_sovits_model_whole_url = gr.Text(label=i18n("完整的SoVITS模型参数设置请求地址"), value="", interactive=False)
text_api_v2_sovits_model_param.input(api_v2_set_sovits_whole_url, [text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models], [text_api_v2_set_sovits_model_whole_url])
dropdown_api_v2_sovits_models.change(api_v2_set_sovits_whole_url, [text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models], [text_api_v2_set_sovits_model_whole_url])
with gr.Row(): with gr.Row():
gr.Button(i18n("发起SoVITS模型设置请求"), variant="primary") button_api_v2_start_set_sovits_model_request = gr.Button(i18n("发起SoVITS模型设置请求"), variant="primary")
gr.Text(label=i18n("设置请求结果"), value="", interactive=False) text_api_v2_start_set_sovits_model_request_info = gr.Text(label=i18n("设置请求结果"), value="", interactive=False)
button_api_v2_start_set_sovits_model_request.click(start_api_v2_set_sovits_model, [text_api_v2_set_sovits_model_base_url, text_api_v2_sovits_model_param, dropdown_api_v2_sovits_models], [text_api_v2_start_set_sovits_model_request_info])
with gr.Tab(label=i18n("第三方推理服务")): with gr.Tab(label=i18n("第三方推理服务")):
gr.Markdown(value=i18n("启动第三方推理服务,并完成参考音频打包,模型参数设置等操作")) gr.Markdown(value=i18n("启动第三方推理服务,并完成参考音频打包,模型参数设置等操作"))
gr.Markdown(value=i18n("2.2:配置推理服务参数信息,参考音频路径/文本和角色情绪二选一,如果是角色情绪,需要先执行第四步," gr.Markdown(value=i18n("2.2:配置推理服务参数信息,参考音频路径/文本和角色情绪二选一,如果是角色情绪,需要先执行第四步,"
@ -567,7 +689,7 @@ if __name__ == '__main__':
default_test_content_path = params.default_test_text_path default_test_content_path = params.default_test_text_path
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path) text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的")) gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3, slider_request_concurrency_num = gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3,
interactive=True) interactive=True)
with gr.Row(): with gr.Row():
button_model_inference = gr.Button(i18n("开启批量推理"), variant="primary") button_model_inference = gr.Button(i18n("开启批量推理"), variant="primary")
@ -606,7 +728,7 @@ if __name__ == '__main__':
with gr.Row(): with gr.Row():
text_text_similarity_analysis_path = gr.Text(label=i18n("待分析的文件路径"), text_text_similarity_analysis_path = gr.Text(label=i18n("待分析的文件路径"),
value=default_text_similarity_analysis_path, interactive=True) value=default_text_similarity_analysis_path, interactive=True)
gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("文本相似度放大边界"), value=0.90, slider_text_similarity_amplification_boundary = gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("文本相似度放大边界"), value=0.90,
interactive=True) interactive=True)
button_asr.click(asr, [text_work_space_dir, text_role, text_asr_audio_dir, dropdown_asr_model, button_asr.click(asr, [text_work_space_dir, text_role, text_asr_audio_dir, dropdown_asr_model,
dropdown_asr_size, dropdown_asr_lang], dropdown_asr_size, dropdown_asr_lang],
@ -615,18 +737,22 @@ if __name__ == '__main__':
button_text_similarity_analysis = gr.Button(i18n("启动文本相似度分析"), variant="primary") button_text_similarity_analysis = gr.Button(i18n("启动文本相似度分析"), variant="primary")
text_text_similarity_analysis_info = gr.Text(label=i18n("文本相似度分析结果"), value="", text_text_similarity_analysis_info = gr.Text(label=i18n("文本相似度分析结果"), value="",
interactive=False) interactive=False)
button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, text_role, button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, text_role, slider_text_similarity_amplification_boundary,
text_text_similarity_analysis_path], text_text_similarity_analysis_path],
[text_text_similarity_analysis_info]) [text_text_similarity_analysis_info])
gr.Markdown(value=i18n("3.3:根据相似度分析结果,重点检查最后几条是否存在复读等问题")) gr.Markdown(value=i18n("3.3:根据相似度分析结果,重点检查最后几条是否存在复读等问题"))
with gr.Row(): with gr.Row():
gr.Text(label=i18n("文本相似度分析结果文件所在路径"), value="", interactive=True) text_text_similarity_result_path = gr.Text(label=i18n("文本相似度分析结果文件所在路径"), value="", interactive=True)
gr.Button(i18n("打开文本相似度分析结果文件"), variant="primary") button_open_text_similarity_result = gr.Button(i18n("打开文本相似度分析结果文件"), variant="primary")
gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("音频文本相似度边界值"), value=0.80, button_open_text_similarity_result.click(open_file, [text_text_similarity_result_path], [])
slider_audio_text_similarity_boundary = gr.Slider(minimum=0, maximum=1, step=0.01, label=i18n("音频文本相似度边界值"), value=0.80,
interactive=True) interactive=True)
text_sync_inference_audio_dir2 = gr.Text(label=i18n("被同步的推理音频路径"),
value="", interactive=True)
with gr.Row(): with gr.Row():
gr.Button(i18n("删除音频文本相似度边界值以下的参考音频"), variant="primary") button_delete_ref_audio_below_boundary = gr.Button(i18n("删除音频文本相似度边界值以下的参考音频"), variant="primary")
gr.Text(label=i18n("删除结果"), value="", interactive=True) text_delete_ref_audio_below_boundary_info = gr.Text(label=i18n("删除结果"), value="", interactive=True)
button_delete_ref_audio_below_boundary.click(delete_ref_audio_below_boundary, [text_model_inference_voice_dir, text_text_similarity_result_path, text_sync_inference_audio_dir2, slider_audio_text_similarity_boundary], [text_delete_ref_audio_below_boundary_info])
with gr.Tab(label=i18n("第四步:校验参考音频音质")): with gr.Tab(label=i18n("第四步:校验参考音频音质")):
gr.Markdown(value=i18n("4.1:对结果按音频相似度排序,或许有用吧,主要还是耳朵听")) gr.Markdown(value=i18n("4.1:对结果按音频相似度排序,或许有用吧,主要还是耳朵听"))
with gr.Row(): with gr.Row():
@ -676,7 +802,7 @@ if __name__ == '__main__':
[text_sample_info, text_model_inference_voice_dir, text_sync_ref_audio_dir, [text_sample_info, text_model_inference_voice_dir, text_sync_ref_audio_dir,
text_sync_ref_audio_dir2]) text_sync_ref_audio_dir2])
button_model_inference.click(model_inference, button_model_inference.click(model_inference,
[text_work_space_dir, text_role, text_model_inference_voice_dir, text_url, [text_work_space_dir, text_role, slider_request_concurrency_num, text_model_inference_voice_dir, text_url,
text_text, text_ref_path, text_ref_text, text_emotion, text_text, text_ref_path, text_ref_text, text_emotion,
text_test_content], text_test_content],
[text_model_inference_info, text_asr_audio_dir, text_sync_inference_audio_dir]) [text_model_inference_info, text_asr_audio_dir, text_sync_inference_audio_dir])

View File

@ -3,16 +3,55 @@ import os
import requests import requests
import itertools import itertools
import multiprocessing import multiprocessing
from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor from concurrent.futures import ProcessPoolExecutor
import numpy as np import numpy as np
import Ref_Audio_Selector.config_param.config_params as params import Ref_Audio_Selector.config_param.config_params as params
from Ref_Audio_Selector.common.time_util import timeit_decorator
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote
from Ref_Audio_Selector.config_param.log_config import logger, p_logger from Ref_Audio_Selector.config_param.log_config import logger, p_logger
class URLComposer: class SetModelURLComposer:
def __init__(self, type, base_url, gpt_param_name, sovits_param_name):
self.type = type
self.base_url = base_url
self.gpt_param_name = gpt_param_name
self.sovits_param_name = sovits_param_name
def is_valid(self):
if self.base_url is None or self.base_url == '':
raise Exception("请求地址不能为空")
if self.type in ['gpt', 'all']:
if self.gpt_param_name is None or self.gpt_param_name == '':
raise Exception("GPT参数名不能为空")
if self.type in ['sovits', 'all']:
if self.sovits_param_name is None or self.sovits_param_name == '':
raise Exception("Sovits参数名不能为空")
def build_get_url(self, value_array, need_url_encode=True):
params = {}
if self.type == 'gpt':
params[self.gpt_param_name] = value_array[0]
if self.type == 'sovits':
params[self.sovits_param_name] = value_array[0]
if self.type == 'all':
params[self.gpt_param_name] = value_array[0]
params[self.sovits_param_name] = value_array[1]
return append_params_to_url(self.base_url, params, need_url_encode)
def build_post_url(self, value_array, need_url_encode=True):
url = append_params_to_url(self.base_url, {}, need_url_encode)
params = {}
if self.type == 'gpt':
params[self.gpt_param_name] = value_array[0]
if self.type == 'sovits':
params[self.sovits_param_name] = value_array[0]
if self.type == 'all':
params[self.gpt_param_name] = value_array[0]
params[self.sovits_param_name] = value_array[1]
return url, params
class TTSURLComposer:
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name): def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
self.base_url = base_url self.base_url = base_url
self.emotion_param_name = emotion_param_name self.emotion_param_name = emotion_param_name
@ -34,30 +73,26 @@ class URLComposer:
return self.emotion_param_name is not None and self.emotion_param_name != '' return self.emotion_param_name is not None and self.emotion_param_name != ''
def build_url_with_emotion(self, text_value, emotion_value, need_url_encode=True): def build_url_with_emotion(self, text_value, emotion_value, need_url_encode=True):
if not self.emotion_param_name:
raise ValueError("Emotion parameter name is not set.")
params = { params = {
self.text_param_name: text_value, self.text_param_name: text_value,
self.emotion_param_name: emotion_value, self.emotion_param_name: emotion_value,
} }
return self._append_params_to_url(params, need_url_encode) return append_params_to_url(self.base_url, params, need_url_encode)
def build_url_with_ref(self, text_value, ref_path_value, ref_text_value, need_url_encode=True): def build_url_with_ref(self, text_value, ref_path_value, ref_text_value, need_url_encode=True):
if self.emotion_param_name:
raise ValueError("Cannot use reference parameters when emotion parameter is set.")
params = { params = {
self.text_param_name: text_value, self.text_param_name: text_value,
self.ref_path_param_name: ref_path_value, self.ref_path_param_name: ref_path_value,
self.ref_text_param_name: ref_text_value, self.ref_text_param_name: ref_text_value,
} }
return self._append_params_to_url(params, need_url_encode) return append_params_to_url(self.base_url, params, need_url_encode)
def _append_params_to_url(self, params, need_url_encode):
url_with_params = self.base_url def append_params_to_url(url_with_params, params, need_url_encode):
if params: if params:
query_params = '&'.join([f"{k}={v}" for k, v in params.items()]) query_params = '&'.join([f"{k}={v}" for k, v in params.items()])
url_with_params += '?' + query_params if '?' not in self.base_url else '&' + query_params url_with_params += '?' + query_params if '?' not in url_with_params else '&' + query_params
return url_with_params if not need_url_encode else safe_encode_query_params(url_with_params) return url_with_params if not need_url_encode else safe_encode_query_params(url_with_params)
def safe_encode_query_params(original_url): def safe_encode_query_params(original_url):
@ -87,8 +122,9 @@ def generate_audio_files_parallel(url_composer, text_list, emotion_list, output_
emotion_groups = np.array_split(emotion_list, num_processes) emotion_groups = np.array_split(emotion_list, num_processes)
with ProcessPoolExecutor(max_workers=num_processes) as executor: with ProcessPoolExecutor(max_workers=num_processes) as executor:
futures = [executor.submit(generate_audio_files_for_emotion_group, url_composer, text_list, group, output_dir_path) futures = [
for group in emotion_groups] executor.submit(generate_audio_files_for_emotion_group, url_composer, text_list, group, output_dir_path)
for group in emotion_groups]
for future in futures: for future in futures:
future.result() # 等待所有进程完成 future.result() # 等待所有进程完成
@ -162,3 +198,33 @@ def inference_audio_from_api(url):
return response.content return response.content
else: else:
raise Exception(f"Failed to fetch audio from API. Server responded with status code {response.status_code}.") raise Exception(f"Failed to fetch audio from API. Server responded with status code {response.status_code}.")
def start_api_set_model(set_model_url_composer, gpt_models, sovits_models):
url, post_body = set_model_url_composer.build_post_url(gpt_models, sovits_models)
response = requests.post(url, json=post_body)
if response.status_code == 200:
result = response.text
return result
else:
return f'请求失败,状态码:{response.status_code}'
def start_api_v2_set_gpt_model(set_model_url_composer, gpt_models):
url = set_model_url_composer.build_get_url([gpt_models])
response = requests.get(url)
if response.status_code == 200:
result = response.text
return result
else:
return f'请求失败,状态码:{response.status_code}'
def start_api_v2_set_sovits_model(set_model_url_composer, sovits_models):
url = set_model_url_composer.build_get_url([sovits_models])
response = requests.get(url)
if response.status_code == 200:
result = response.text
return result
else:
return f'请求失败,状态码:{response.status_code}'

View File

@ -0,0 +1,34 @@
import os
import re
pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth"
pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
SoVITS_weight_root = "SoVITS_weights"
GPT_weight_root = "GPT_weights"
os.makedirs(SoVITS_weight_root, exist_ok=True)
os.makedirs(GPT_weight_root, exist_ok=True)
def custom_sort_key(s):
# 使用正则表达式提取字符串中的数字部分和非数字部分
parts = re.split('(\d+)', s)
# 将数字部分转换为整数,非数字部分保持不变
parts = [int(part) if part.isdigit() else part for part in parts]
return parts
def get_gpt_model_names():
gpt_names = [pretrained_gpt_name]
for name in os.listdir(SoVITS_weight_root):
if name.endswith(".ckpt"): gpt_names.append("%s/%s" % (SoVITS_weight_root, name))
sorted(gpt_names, key=custom_sort_key)
return gpt_names
def get_sovits_model_names():
sovits_names = [pretrained_sovits_name]
for name in os.listdir(GPT_weight_root):
if name.endswith(".pth"): sovits_names.append("%s/%s" % (GPT_weight_root, name))
sorted(sovits_names, key=custom_sort_key)
return sovits_names

View File

@ -0,0 +1,77 @@
import os
import Ref_Audio_Selector.common.common as common
import Ref_Audio_Selector.tool.audio_check as audio_check
from Ref_Audio_Selector.config_param.log_config import logger
def parse_text_similarity_result_txt(file_path):
"""
解析指定格式的txt文件每行格式f"{item['average_similarity_score']}|{item['count']}|{item['emotion']}"
:param file_path: txt文件的路径
:return: 包含解析后数据的字典列表
"""
data_list = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
# 使用'|'作为分隔符分割每行数据
parts = line.strip().split('|')
if len(parts) == 3:
# 将分割后的字符串转换为浮点数、整数和字符串
try:
item = {
'average_similarity_score': float(parts[0]),
'count': int(parts[1]),
'emotion': parts[2]
}
data_list.append(item)
except ValueError as e:
# 如果转换失败,打印错误信息并跳过该行
logger.error(f"Error parsing line: {line.strip()} - {e}")
return data_list
def remove_low_similarity_files(ref_audio_list, report_list, audio_text_similarity_boundary):
"""
根据条件删除低相似度音频文件并返回删除数量
:param ref_audio_list: 包含音频路径和情感属性的列表
:param report_list: 包含相似度评分和情感属性的列表
:param audio_text_similarity_boundary: 相似度阈值
:return: 删除的文件数量
"""
deleted_count = 0
# 筛选出平均相似度低于阈值的报告
low_similarity_reports = [report for report in report_list if
report['average_similarity_score'] < audio_text_similarity_boundary]
# 遍历低相似度报告,查找并删除对应音频文件
for report in low_similarity_reports:
emotion = report['emotion']
# 查找ref_audio_list中相同情感的音频文件路径
matching_refs = [ref for ref in ref_audio_list if ref['emotion'] == emotion]
for match in matching_refs:
ref_path = match['ref_path']
# 检查文件是否存在,然后尝试删除
if os.path.exists(ref_path):
try:
os.remove(ref_path)
deleted_count += 1
logger.info(f"Deleted file: {ref_path}")
except Exception as e:
logger.error(f"Error deleting file {ref_path}: {e}")
else:
logger.error(f"File not found: {ref_path}")
return deleted_count
def delete_ref_audio_below_boundary(ref_audio_path, text_similarity_result_path, sync_inference_audio_dir,
audio_text_similarity_boundary):
ref_audio_list = common.RefAudioListManager(ref_audio_path)
report_list = parse_text_similarity_result_txt(text_similarity_result_path)
count = remove_low_similarity_files(ref_audio_list, report_list, audio_text_similarity_boundary)
audio_check.sync_ref_audio(ref_audio_path, sync_inference_audio_dir)
return count

View File

@ -56,7 +56,7 @@ def calculate_average_similarity_by_emotion(data_list):
similarity_score = item['similarity_score'] similarity_score = item['similarity_score']
result_dict[emotion].append(similarity_score) result_dict[emotion].append(similarity_score)
average_scores = [{'emotion': emotion, 'average_similarity_score': sum(scores) / len(scores)} average_scores = [{'emotion': emotion, 'average_similarity_score': sum(scores) / len(scores), 'count': len(scores)}
for emotion, scores in result_dict.items()] for emotion, scores in result_dict.items()]
average_scores.sort(key=lambda x: x['average_similarity_score'], reverse=True) average_scores.sort(key=lambda x: x['average_similarity_score'], reverse=True)
@ -123,7 +123,7 @@ def process(asr_file_path, output_dir, similarity_enlarge_boundary):
average_similarity_file = os.path.join(output_dir, average_similarity_file = os.path.join(output_dir,
f'{params.text_emotion_average_similarity_report_filename}.txt') f'{params.text_emotion_average_similarity_report_filename}.txt')
average_similarity_content = \ average_similarity_content = \
'\n'.join([f"{item['average_similarity_score']}|{item['emotion']}" for item in average_similarity_list]) '\n'.join([f"{item['average_similarity_score']}|{item['count']}|{item['emotion']}" for item in average_similarity_list])
common.write_text_to_file(average_similarity_content, average_similarity_file) common.write_text_to_file(average_similarity_content, average_similarity_file)
emotion_detail_list = group_and_sort_by_field(records, 'emotion') emotion_detail_list = group_and_sort_by_field(records, 'emotion')