diff --git a/Ref_Audio_Selector/config.ini b/Ref_Audio_Selector/config.ini new file mode 100644 index 0000000..1aef25d --- /dev/null +++ b/Ref_Audio_Selector/config.ini @@ -0,0 +1,45 @@ +# config.ini + +[Base] +# 工作目录 +work_dir = +# 角色 +role = +# 参考音频目录 +reference_audio_dir = refer_audio + +[AudioSample] +# list转换待选参考音频目录 +list_to_convert_reference_audio_dir = refer_audio_all +# 音频相似度目录 +audio_similarity_dir = similarity + +[Inference] +# 默认测试文本位置 +default_test_text_path = Ref_Audio_Selector/file/test_content/test_content.txt +# 推理音频目录 +inference_audio_dir = inference_audio +# 推理音频文本聚合目录 +inference_audio_text_aggregation_dir = text +# 推理音频情绪聚合目录 +inference_audio_emotion_aggregation_dir = emotion + +[ResultCheck] +# asr输出文件 +asr_filename = asr +# 文本相似度输出目录 +text_similarity_output_dir = text_similarity +# 文本情绪平均相似度报告文件名 +text_emotion_average_similarity_report_filename = average_similarity +# 文本相似度按情绪聚合明细文件名 +text_similarity_by_emotion_detail_filename = emotion_group_detail +# 文本相似度按文本聚合明细文件名 +text_similarity_by_text_detail_filename = text_group_detail + +[AudioConfig] +# 默认模板文件位置 +default_template_path = Ref_Audio_Selector/file/config_template/ref_audio_template.txt +# 参考音频配置文件名 +reference_audio_config_filename = refer_audio + +[Other] \ No newline at end of file diff --git a/Ref_Audio_Selector/config/__init__.py b/Ref_Audio_Selector/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Ref_Audio_Selector/config/config_manager.py b/Ref_Audio_Selector/config/config_manager.py new file mode 100644 index 0000000..6c87ef9 --- /dev/null +++ b/Ref_Audio_Selector/config/config_manager.py @@ -0,0 +1,67 @@ +import configparser +import re + + +class ConfigManager: + def __init__(self): + self.config_path = 'Ref_Audio_Selector/config.ini' + self.comments = [] + self.config = None + self.read_with_comments() + + def read_with_comments(self): + with open(self.config_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + self.comments = [] + for i, line in enumerate(lines): + if line.startswith(';') or line.startswith('#'): + self.comments.append((i, line)) + + self.config = configparser.ConfigParser() + self.config.read_string(''.join(lines)) + + def write_with_comments(self): + output_lines = [] + + # 先写入配置项 + config_str = self.config.write() + output_lines.extend(config_str.splitlines(True)) # 保持换行 + + # 然后插入原有注释 + for index, comment in sorted(self.comments, reverse=True): # 从后往前插入,避免行号错乱 + while len(output_lines) < index + 1: + output_lines.append('\n') # 补充空行 + output_lines.insert(index, comment) + + with open(self.config_path, 'w', encoding='utf-8') as f: + f.writelines(output_lines) + + def get_base(self, key): + return self.config.get('Base', key) + + def set_base(self, key, value): + self.config.set('Base', key, value) + self.write_with_comments() + + def get_audio_sample(self, key): + return self.config.get('AudioSample', key) + + def get_inference(self, key): + return self.config.get('Inference', key) + + def get_result_check(self, key): + return self.config.get('ResultCheck', key) + + def get_audio_config(self, key): + return self.config.get('AudioConfig', key) + + def get_other(self, key): + return self.config.get('Other', key) + + +_config = ConfigManager() + + +def get_config(): + return _config diff --git a/Ref_Audio_Selector/ref_audio_selector_webui.py b/Ref_Audio_Selector/ref_audio_selector_webui.py index c96523a..8cb1a57 100644 --- a/Ref_Audio_Selector/ref_audio_selector_webui.py +++ b/Ref_Audio_Selector/ref_audio_selector_webui.py @@ -8,6 +8,7 @@ import Ref_Audio_Selector.tool.audio_inference as audio_inference import Ref_Audio_Selector.tool.audio_config as audio_config import Ref_Audio_Selector.tool.delete_inference_with_ref as delete_inference_with_ref import Ref_Audio_Selector.common.common as common +import Ref_Audio_Selector.config.config_manager as config_manager from tools.i18n.i18n import I18nAuto from config import python_exec, is_half from tools import my_utils @@ -15,6 +16,7 @@ from tools.asr.config import asr_dict from subprocess import Popen i18n = I18nAuto() +config = config_manager.get_config() p_similarity = None p_asr = None @@ -29,7 +31,8 @@ def check_base_info(text_work_space_dir): # 从list文件,提取参考音频 def convert_from_list(text_work_space_dir, text_list_input): - ref_audio_all = os.path.join(text_work_space_dir, 'refer_audio_all') + ref_audio_all = os.path.join(text_work_space_dir, + config.get_audio_sample('list_to_convert_reference_audio_dir')) text_convert_from_list_info = f"转换成功:生成目录{ref_audio_all}" text_sample_dir = ref_audio_all try: @@ -48,7 +51,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_ similarity_list = None similarity_file_dir = None - similarity_dir = os.path.join(work_space_dir, 'similarity') + similarity_dir = os.path.join(work_space_dir, config.get_audio_sample('audio_similarity_dir')) os.makedirs(similarity_dir, exist_ok=True) base_voice_file_name = common.get_filename_without_extension(base_voice_path) @@ -79,7 +82,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_ # 基于一个基准音频,从参考音频目录中进行分段抽样 def sample(text_work_space_dir, text_sample_dir, text_base_voice_path, text_subsection_num, text_sample_num, checkbox_similarity_output): - ref_audio_dir = os.path.join(text_work_space_dir, 'refer_audio') + ref_audio_dir = os.path.join(text_work_space_dir, config.get_base('reference_audio_dir')) text_sample_info = f"抽样成功:生成目录{ref_audio_dir}" try: check_base_info(text_work_space_dir) @@ -114,8 +117,9 @@ def sample(text_work_space_dir, text_sample_dir, text_base_voice_path, def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_url, text_text, text_ref_path, text_ref_text, text_emotion, text_test_content): - inference_dir = os.path.join(text_work_space_dir, 'inference_audio') - text_asr_audio_dir = os.path.join(inference_dir, 'text') + inference_dir = os.path.join(text_work_space_dir, config.get_inference('inference_audio_dir')) + text_asr_audio_dir = os.path.join(inference_dir, + config.get_inference('inference_audio_text_aggregation_dir')) text_model_inference_info = f"推理成功:生成目录{inference_dir}" try: check_base_info(text_work_space_dir) @@ -164,7 +168,7 @@ def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model, if dropdown_asr_lang is None or dropdown_asr_lang == '': raise Exception("asr语言不能为空") asr_file = open_asr(text_asr_audio_dir, text_work_space_dir, dropdown_asr_model, dropdown_asr_size, - dropdown_asr_lang) + dropdown_asr_lang) text_text_similarity_analysis_path = asr_file text_asr_info = f"asr成功:生成文件{asr_file}" except Exception as e: @@ -198,7 +202,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): output_dir_abs = os.path.abspath(asr_opt_dir) output_file_name = os.path.basename(asr_inp_dir) # 构造输出文件路径 - output_file_path = os.path.join(output_dir_abs, f'{output_file_name}_asr.list') + output_file_path = os.path.join(output_dir_abs, f'{config.get_result_check("asr_filename")}.list') return output_file_path else: @@ -208,7 +212,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): # 对asr生成的文件,与原本的文本内容,进行相似度分析 def text_similarity_analysis(text_work_space_dir, text_text_similarity_analysis_path): - similarity_dir = os.path.join(text_work_space_dir, 'text_similarity') + similarity_dir = os.path.join(text_work_space_dir, config.get_result_check('text_similarity_output_dir')) text_text_similarity_analysis_info = f"相似度分析成功:生成目录{similarity_dir}" try: check_base_info(text_work_space_dir) @@ -243,7 +247,7 @@ def open_text_similarity_analysis(asr_file_path, output_dir, similarity_enlarge_ # 根据一个参考音频,对指定目录下的音频进行相似度分析,并输出到另一个目录 def similarity_audio_output(text_work_space_dir, text_base_audio_path, text_compare_audio_dir): - text_similarity_audio_output_info = "相似度分析成功:生成目录XXX" + text_similarity_audio_output_info = None try: check_base_info(text_work_space_dir) if text_base_audio_path is None or text_base_audio_path == '': @@ -274,7 +278,8 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir, raise Exception("参考音频目录不能为空") if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '': raise Exception("推理生成的音频目录不能为空") - delete_text_wav_num, delete_emotion_dir_num = delete_inference_with_ref.sync_ref_audio(text_sync_ref_audio_dir, text_sync_inference_audio_dir) + delete_text_wav_num, delete_emotion_dir_num = delete_inference_with_ref.sync_ref_audio(text_sync_ref_audio_dir, + text_sync_inference_audio_dir) text_sync_ref_audio_info = f"推理音频目录{text_sync_inference_audio_dir}下,text目录删除了{delete_text_wav_num}个参考音频,emotion目录下,删除了{delete_emotion_dir_num}个目录" except Exception as e: traceback.print_exc() @@ -284,7 +289,7 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir, # 根据模板和参考音频目录,生成参考音频配置内容 def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2): - config_file = os.path.join(text_work_space_dir, 'refer_audio.json') + config_file = os.path.join(text_work_space_dir, f'{config.get_audio_config("reference_audio_config_filename")}.json') text_create_config_info = f"配置生成成功:生成文件{config_file}" try: check_base_info(text_work_space_dir) @@ -293,7 +298,8 @@ def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2): if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '': raise Exception("参考音频目录不能为空") ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2) - audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(), config_file) + audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(), + config_file) except Exception as e: traceback.print_exc() text_create_config_info = f"发生异常:{e}" @@ -353,7 +359,7 @@ with gr.Blocks() as app: text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion], [text_whole_url]) gr.Markdown(value=i18n("2.2:配置待推理文本,一句一行,不要太多,10条即可")) - default_test_content_path = 'Ref_Audio_Selector/tool/test_content/test_content.txt' + default_test_content_path = config.get_inference('default_test_text_path') text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path) gr.Markdown(value=i18n("2.3:启动推理服务,如果还没启动的话")) gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的")) @@ -421,7 +427,7 @@ with gr.Blocks() as app: text_sync_inference_audio_dir], [text_sync_ref_info]) with gr.Accordion("第四步:生成参考音频配置文本", open=False): gr.Markdown(value=i18n("4.1:编辑模板")) - default_template_path = 'Ref_Audio_Selector/tool/config_template/ref_audio_template.txt' + default_template_path = config.get_audio_config('default_template_path') default_template_content = common.read_file(default_template_path) text_template_path = gr.Text(label=i18n("模板文件路径"), value=default_template_path, interactive=False) text_template = gr.Text(label=i18n("模板内容"), value=default_template_content, lines=10) diff --git a/Ref_Audio_Selector/tool/asr/funasr_asr_multi_level_dir.py b/Ref_Audio_Selector/tool/asr/funasr_asr_multi_level_dir.py index a67822d..24753b5 100644 --- a/Ref_Audio_Selector/tool/asr/funasr_asr_multi_level_dir.py +++ b/Ref_Audio_Selector/tool/asr/funasr_asr_multi_level_dir.py @@ -3,9 +3,10 @@ import argparse import os import traceback +import Ref_Audio_Selector.config.config_manager as config_manager from tqdm import tqdm - from funasr import AutoModel +config = config_manager.get_config() path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch' @@ -84,7 +85,7 @@ def execute_asr_multi_level_dir(input_folder, output_folder, model_size, languag os.makedirs(output_dir_abs, exist_ok=True) # 构造输出文件路径 - output_file_path = os.path.join(output_dir_abs, f'{output_file_name}_asr.list') + output_file_path = os.path.join(output_dir_abs, f'{config.get_result_check("asr_filename")}.list') # 将输出写入文件 with open(output_file_path, "w", encoding="utf-8") as f: diff --git a/Ref_Audio_Selector/tool/audio_inference.py b/Ref_Audio_Selector/tool/audio_inference.py index 01adee7..f8bec4d 100644 --- a/Ref_Audio_Selector/tool/audio_inference.py +++ b/Ref_Audio_Selector/tool/audio_inference.py @@ -1,8 +1,11 @@ import os import requests import itertools +import Ref_Audio_Selector.config.config_manager as config_manager from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote +config = config_manager.get_config() + class URLComposer: def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name): @@ -78,9 +81,9 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path) os.makedirs(output_dir, exist_ok=True) # Create subdirectories for text and emotion categories - text_subdir = os.path.join(output_dir, 'text') + text_subdir = os.path.join(output_dir, config.get_inference('inference_audio_text_aggregation_dir')) os.makedirs(text_subdir, exist_ok=True) - emotion_subdir = os.path.join(output_dir, 'emotion') + emotion_subdir = os.path.join(output_dir, config.get_inference('inference_audio_emotion_aggregation_dir')) os.makedirs(emotion_subdir, exist_ok=True) # 计算笛卡尔积 diff --git a/Ref_Audio_Selector/tool/delete_inference_with_ref.py b/Ref_Audio_Selector/tool/delete_inference_with_ref.py index d653499..3a34c49 100644 --- a/Ref_Audio_Selector/tool/delete_inference_with_ref.py +++ b/Ref_Audio_Selector/tool/delete_inference_with_ref.py @@ -1,7 +1,9 @@ import os import shutil import Ref_Audio_Selector.common.common as common +import Ref_Audio_Selector.config.config_manager as config_manager +config = config_manager.get_config() def remove_matching_audio_files_in_text_dir(text_dir, emotions_list): count = 0 @@ -51,8 +53,8 @@ def delete_emotion_subdirectories(emotion_dir, emotions_list): def sync_ref_audio(ref_audio_dir, inference_audio_dir): ref_audio_manager = common.RefAudioListManager(ref_audio_dir) ref_list = ref_audio_manager.get_ref_audio_list() - text_dir = os.path.join(inference_audio_dir, 'text') - emotion_dir = os.path.join(inference_audio_dir, 'emotion') + text_dir = os.path.join(inference_audio_dir, config.get_inference('inference_audio_text_aggregation_dir')) + emotion_dir = os.path.join(inference_audio_dir, config.get_inference('inference_audio_emotion_aggregation_dir')) delete_text_wav_num = remove_matching_audio_files_in_text_dir(text_dir, ref_list) delete_emotion_dir_num = delete_emotion_subdirectories(emotion_dir, ref_list) return delete_text_wav_num, delete_emotion_dir_num diff --git a/Ref_Audio_Selector/tool/speaker_verification/voice_similarity.py b/Ref_Audio_Selector/tool/speaker_verification/voice_similarity.py index 4b8ded7..8a97d9b 100644 --- a/Ref_Audio_Selector/tool/speaker_verification/voice_similarity.py +++ b/Ref_Audio_Selector/tool/speaker_verification/voice_similarity.py @@ -2,6 +2,7 @@ import argparse import os from modelscope.pipelines import pipeline + sv_pipeline = pipeline( task='speaker-verification', model='Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common', @@ -11,7 +12,8 @@ sv_pipeline = pipeline( def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path, output_file_path): # Step 1: 获取比较音频目录下所有音频文件的路径 - comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if f.endswith('.wav')] + comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if + f.endswith('.wav')] # Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径 similarity_scores = [] @@ -31,10 +33,10 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path, open(output_file_path, 'w').close() # Create an empty file # Step 5: 将排序后的结果写入输出结果文件(支持中文) - formatted_scores = [f'{item["score"]}|{item["path"]}' for item in similarity_scores] + formatted_scores = [f'{item["score"]}|{item["path"]}' for item in similarity_scores] with open(output_file_path, 'w', encoding='utf-8') as f: # 使用'\n'将每个字符串分开,使其写入不同行 - content = '\n'.join(formatted_scores ) + content = '\n'.join(formatted_scores) f.write(content) @@ -60,7 +62,7 @@ if __name__ == '__main__': cmd = parse_arguments() print(cmd) compare_audio_and_generate_report( - reference_audio_path = cmd.reference_audio, - comparison_dir_path = cmd.comparison_dir, - output_file_path = cmd.output_file, - ) \ No newline at end of file + reference_audio_path=cmd.reference_audio, + comparison_dir_path=cmd.comparison_dir, + output_file_path=cmd.output_file, + ) diff --git a/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py b/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py index 7ec2bd9..bc5791f 100644 --- a/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py +++ b/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py @@ -3,8 +3,11 @@ import argparse from collections import defaultdict from operator import itemgetter import Ref_Audio_Selector.tool.text_comparison.text_comparison as text_comparison +import Ref_Audio_Selector.config.config_manager as config_manager import Ref_Audio_Selector.common.common as common +config = config_manager.get_config() + def parse_asr_file(file_path): output = [] @@ -96,19 +99,20 @@ def process(asr_file_path, output_dir, similarity_enlarge_boundary): calculate_similarity_and_append_to_list(records, similarity_enlarge_boundary) average_similarity_list = calculate_average_similarity_by_emotion(records) - average_similarity_file = os.path.join(output_dir, 'average_similarity.txt') + average_similarity_file = os.path.join(output_dir, + f'{config.get_result_check("text_emotion_average_similarity_report_filename")}.txt') average_similarity_content = \ '\n'.join([f"{item['average_similarity_score']}|{item['emotion']}" for item in average_similarity_list]) common.write_text_to_file(average_similarity_content, average_similarity_file) emotion_detail_list = group_and_sort_by_field(records, 'emotion') - emotion_detail_file = os.path.join(output_dir, 'emotion_group_detail.txt') + emotion_detail_file = os.path.join(output_dir, f'{config.get_result_check("emotion_group_detail")}.txt') format_list_to_text(emotion_detail_list, emotion_detail_file) original_text_detail_list = group_and_sort_by_field(records, 'original_text') - original_text_detail_file = os.path.join(output_dir, 'text_group_detail.txt') + original_text_detail_file = os.path.join(output_dir, f'{config.get_result_check("text_group_detail")}.txt') format_list_to_text(original_text_detail_list, original_text_detail_file) print('文本相似度分析完成。')