添加说话人确认模型切换

This commit is contained in:
Downupanddownup 2024-04-29 14:14:16 +08:00
parent 5081168918
commit 8182908f7d
3 changed files with 55 additions and 16 deletions

View File

@ -8,6 +8,18 @@ GPT_weight_root = "GPT_weights"
os.makedirs(SoVITS_weight_root, exist_ok=True)
os.makedirs(GPT_weight_root, exist_ok=True)
speaker_verification_models = {
'speech_campplus_sv_zh-cn_16k-common': {
'task': 'speaker-verification',
'model': 'Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
'model_revision': 'v1.0.0'
},
'speech_eres2net_sv_zh-cn_16k-common': {
'task': 'speaker-verification',
'model': 'Ref_Audio_Selector/tool/speaker_verification/models/speech_eres2net_sv_zh-cn_16k-common',
'model_revision': 'v1.0.5'
}
}
def custom_sort_key(s):
# 使用正则表达式提取字符串中的数字部分和非数字部分

View File

@ -71,7 +71,7 @@ def convert_from_list(text_work_space_dir, text_role, text_list_input):
return i18n(text_convert_from_list_info), text_sample_dir
def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_similarity_output):
def start_similarity_analysis(work_space_dir, sample_dir, speaker_verification, base_voice_path, need_similarity_output):
similarity_list = None
similarity_file_dir = None
@ -87,6 +87,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_
cmd += f' -r "{base_voice_path}"'
cmd += f' -c "{sample_dir}"'
cmd += f' -o {similarity_file}'
cmd += f' -m {speaker_verification}'
logger.info(cmd)
p_similarity = Popen(cmd, shell=True)
@ -105,7 +106,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_
# 基于一个基准音频,从参考音频目录中进行分段抽样
def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path,
def sample(text_work_space_dir, text_role, text_sample_dir, dropdown_speaker_verification, text_base_voice_path,
slider_subsection_num, slider_sample_num, checkbox_similarity_output):
text_work_space_dir, text_sample_dir, text_base_voice_path \
= common.batch_clean_paths([text_work_space_dir, text_sample_dir, text_base_voice_path])
@ -122,11 +123,13 @@ def sample(text_work_space_dir, text_role, text_sample_dir, text_base_voice_path
raise Exception("分段数不能为空")
if slider_sample_num is None or slider_sample_num == '':
raise Exception("每段随机抽样个数不能为空")
if dropdown_speaker_verification is None or dropdown_speaker_verification == '':
raise Exception("说话人确认算法不能为空")
ref_audio_dir = os.path.join(base_role_dir, params.reference_audio_dir)
time_consuming, (similarity_list, _, _) \
= time_util.time_monitor(start_similarity_analysis)(base_role_dir, text_sample_dir, text_base_voice_path,
= time_util.time_monitor(start_similarity_analysis)(base_role_dir, text_sample_dir, dropdown_speaker_verification, text_base_voice_path,
checkbox_similarity_output)
text_sample_info = f"耗时:{time_consuming:0.1f}秒;抽样成功:生成目录{ref_audio_dir}"
@ -315,7 +318,7 @@ hide_voice_similarity_dir = ''
# 根据一个参考音频,对指定目录下的音频进行相似度分析,并输出到另一个目录
def similarity_audio_output(text_work_space_dir, text_role, text_base_audio_path,
text_compare_audio_dir):
text_compare_audio_dir, dropdown_speaker_verification):
global hide_voice_similarity_dir
text_work_space_dir, text_base_audio_path, text_compare_audio_dir \
= common.batch_clean_paths([text_work_space_dir, text_base_audio_path, text_compare_audio_dir])
@ -327,10 +330,12 @@ def similarity_audio_output(text_work_space_dir, text_role, text_base_audio_path
raise Exception("基准音频路径不能为空")
if text_compare_audio_dir is None or text_compare_audio_dir == '':
raise Exception("待分析的音频所在目录不能为空")
if dropdown_speaker_verification is None or dropdown_speaker_verification == '':
raise Exception("说话人验证模型不能为空")
time_consuming, (similarity_list, similarity_file, similarity_file_dir) \
= time_util.time_monitor(start_similarity_analysis)(base_role_dir,
text_compare_audio_dir, text_base_audio_path, True)
= time_util.time_monitor(start_similarity_analysis)(base_role_dir,text_compare_audio_dir,
dropdown_speaker_verification, text_base_audio_path, True)
if similarity_list is None:
raise Exception("相似度分析失败")
@ -657,8 +662,14 @@ def init_ui():
scale=4)
button_convert_from_list_result_dir = gr.Button(i18n("打开目录"), variant="primary", scale=1)
gr.Markdown(value=i18n("1.2:选择基准音频,执行相似度匹配,并分段随机抽样"))
text_sample_dir = gr.Text(label=i18n("参考音频抽样目录"), value=init.text_sample_dir_default,
interactive=True)
with gr.Row():
text_sample_dir = gr.Text(label=i18n("参考音频抽样目录"), value=init.text_sample_dir_default,
interactive=True)
dropdown_speaker_verification_1 = gr.Dropdown(label=i18n("说话人确认算法"),
choices=list(
model_manager.speaker_verification_models.keys()),
value='speech_campplus_sv_zh-cn_16k-common',
interactive=True)
button_convert_from_list_result_dir.click(open_file, [text_sample_dir], [])
button_convert_from_list.click(convert_from_list, [text_work_space_dir, text_role, text_list_input],
[text_convert_from_list_info, text_sample_dir])
@ -975,6 +986,11 @@ def init_ui():
with gr.Row():
text_base_audio_path = gr.Text(label=i18n("请输入基准音频"), value="")
text_compare_audio_dir = gr.Text(label=i18n("请输入待比较的音频文件目录"), value="")
dropdown_speaker_verification_2 = gr.Dropdown(label=i18n("说话人确认算法"),
choices=list(
model_manager.speaker_verification_models.keys()),
value='speech_campplus_sv_zh-cn_16k-common',
interactive=True)
with gr.Row():
button_similarity_audio_output = gr.Button(i18n("输出相似度-参考音频到临时目录"), variant="primary",
scale=4)
@ -983,7 +999,7 @@ def init_ui():
button_similarity_audio_output_result_open = gr.Button(i18n("打开目录"), variant="primary", scale=1)
button_similarity_audio_output.click(similarity_audio_output,
[text_work_space_dir, text_role, text_base_audio_path,
text_compare_audio_dir], [text_similarity_audio_output_info])
text_compare_audio_dir, dropdown_speaker_verification_2], [text_similarity_audio_output_info])
button_similarity_audio_output_result_open.click(lambda: open_file(hide_voice_similarity_dir), [], [])
gr.Markdown(value=i18n("4.2:如果发现存在低音质的推理音频,那么就去参考音频目录下,把原参考音频删了"))
gr.Markdown(value=i18n("4.3:删除参考音频之后,按下面的操作,会将推理音频目录下对应的音频也删掉"))
@ -1005,7 +1021,7 @@ def init_ui():
[text_work_space_dir, text_role, text_template, text_refer_audio_file_dir],
[text_create_config_info])
button_create_config_result_open.click(lambda: open_file(hide_config_file), [], [])
button_sample.click(sample, [text_work_space_dir, text_role, text_sample_dir, text_base_voice_path,
button_sample.click(sample, [text_work_space_dir, text_role, text_sample_dir, dropdown_speaker_verification_1, text_base_voice_path,
slider_subsection_num, slider_sample_num, checkbox_similarity_output],
[text_sample_info, text_refer_audio_file_dir])
button_sample_result_open.click(open_file, [text_refer_audio_file_dir], [])

View File

@ -6,18 +6,24 @@ import platform
import Ref_Audio_Selector.config_param.config_params as params
import Ref_Audio_Selector.config_param.log_config as log_config
from Ref_Audio_Selector.common.time_util import timeit_decorator
from Ref_Audio_Selector.common.model_manager import speaker_verification_models as models
from modelscope.pipelines import pipeline
sv_pipeline = pipeline(
task='speaker-verification',
model='Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
model_revision='v1.0.0'
)
def init_model(model_type='speech_campplus_sv_zh-cn_16k-common'):
log_config.logger.info(f'人声识别模型类型:{model_type}')
return pipeline(
task=models[model_type]['task'],
model=models[model_type]['model'],
model_revision=models[model_type]['model_revision']
)
@timeit_decorator
def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path, output_file_path):
def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path, output_file_path, model_type):
sv_pipeline = init_model(model_type)
# Step 1: 获取比较音频目录下所有音频文件的路径
comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if
f.endswith('.wav')]
@ -113,6 +119,10 @@ def parse_arguments():
parser.add_argument("-o", "--output_file", type=str, required=True,
help="Path to the output file where results will be written.")
# Model Type
parser.add_argument("-m", "--model_type", type=str, required=True,
help="Path to the model type.")
return parser.parse_args()
@ -122,6 +132,7 @@ if __name__ == '__main__':
reference_audio_path=cmd.reference_audio,
comparison_dir_path=cmd.comparison_dir,
output_file_path=cmd.output_file,
model_type=cmd.model_type,
)
# compare_audio_and_generate_report(