mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-05-21 13:39:17 +08:00
添加配置文件管理
This commit is contained in:
parent
b6f0bb36ef
commit
ecbc7d0b1e
45
Ref_Audio_Selector/config.ini
Normal file
45
Ref_Audio_Selector/config.ini
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# config.ini
|
||||||
|
|
||||||
|
[Base]
|
||||||
|
# 工作目录
|
||||||
|
work_dir =
|
||||||
|
# 角色
|
||||||
|
role =
|
||||||
|
# 参考音频目录
|
||||||
|
reference_audio_dir = refer_audio
|
||||||
|
|
||||||
|
[AudioSample]
|
||||||
|
# list转换待选参考音频目录
|
||||||
|
list_to_convert_reference_audio_dir = refer_audio_all
|
||||||
|
# 音频相似度目录
|
||||||
|
audio_similarity_dir = similarity
|
||||||
|
|
||||||
|
[Inference]
|
||||||
|
# 默认测试文本位置
|
||||||
|
default_test_text_path = Ref_Audio_Selector/file/test_content/test_content.txt
|
||||||
|
# 推理音频目录
|
||||||
|
inference_audio_dir = inference_audio
|
||||||
|
# 推理音频文本聚合目录
|
||||||
|
inference_audio_text_aggregation_dir = text
|
||||||
|
# 推理音频情绪聚合目录
|
||||||
|
inference_audio_emotion_aggregation_dir = emotion
|
||||||
|
|
||||||
|
[ResultCheck]
|
||||||
|
# asr输出文件
|
||||||
|
asr_filename = asr
|
||||||
|
# 文本相似度输出目录
|
||||||
|
text_similarity_output_dir = text_similarity
|
||||||
|
# 文本情绪平均相似度报告文件名
|
||||||
|
text_emotion_average_similarity_report_filename = average_similarity
|
||||||
|
# 文本相似度按情绪聚合明细文件名
|
||||||
|
text_similarity_by_emotion_detail_filename = emotion_group_detail
|
||||||
|
# 文本相似度按文本聚合明细文件名
|
||||||
|
text_similarity_by_text_detail_filename = text_group_detail
|
||||||
|
|
||||||
|
[AudioConfig]
|
||||||
|
# 默认模板文件位置
|
||||||
|
default_template_path = Ref_Audio_Selector/file/config_template/ref_audio_template.txt
|
||||||
|
# 参考音频配置文件名
|
||||||
|
reference_audio_config_filename = refer_audio
|
||||||
|
|
||||||
|
[Other]
|
0
Ref_Audio_Selector/config/__init__.py
Normal file
0
Ref_Audio_Selector/config/__init__.py
Normal file
67
Ref_Audio_Selector/config/config_manager.py
Normal file
67
Ref_Audio_Selector/config/config_manager.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import configparser
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
def __init__(self):
|
||||||
|
self.config_path = 'Ref_Audio_Selector/config.ini'
|
||||||
|
self.comments = []
|
||||||
|
self.config = None
|
||||||
|
self.read_with_comments()
|
||||||
|
|
||||||
|
def read_with_comments(self):
|
||||||
|
with open(self.config_path, 'r', encoding='utf-8') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
self.comments = []
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if line.startswith(';') or line.startswith('#'):
|
||||||
|
self.comments.append((i, line))
|
||||||
|
|
||||||
|
self.config = configparser.ConfigParser()
|
||||||
|
self.config.read_string(''.join(lines))
|
||||||
|
|
||||||
|
def write_with_comments(self):
|
||||||
|
output_lines = []
|
||||||
|
|
||||||
|
# 先写入配置项
|
||||||
|
config_str = self.config.write()
|
||||||
|
output_lines.extend(config_str.splitlines(True)) # 保持换行
|
||||||
|
|
||||||
|
# 然后插入原有注释
|
||||||
|
for index, comment in sorted(self.comments, reverse=True): # 从后往前插入,避免行号错乱
|
||||||
|
while len(output_lines) < index + 1:
|
||||||
|
output_lines.append('\n') # 补充空行
|
||||||
|
output_lines.insert(index, comment)
|
||||||
|
|
||||||
|
with open(self.config_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.writelines(output_lines)
|
||||||
|
|
||||||
|
def get_base(self, key):
|
||||||
|
return self.config.get('Base', key)
|
||||||
|
|
||||||
|
def set_base(self, key, value):
|
||||||
|
self.config.set('Base', key, value)
|
||||||
|
self.write_with_comments()
|
||||||
|
|
||||||
|
def get_audio_sample(self, key):
|
||||||
|
return self.config.get('AudioSample', key)
|
||||||
|
|
||||||
|
def get_inference(self, key):
|
||||||
|
return self.config.get('Inference', key)
|
||||||
|
|
||||||
|
def get_result_check(self, key):
|
||||||
|
return self.config.get('ResultCheck', key)
|
||||||
|
|
||||||
|
def get_audio_config(self, key):
|
||||||
|
return self.config.get('AudioConfig', key)
|
||||||
|
|
||||||
|
def get_other(self, key):
|
||||||
|
return self.config.get('Other', key)
|
||||||
|
|
||||||
|
|
||||||
|
_config = ConfigManager()
|
||||||
|
|
||||||
|
|
||||||
|
def get_config():
|
||||||
|
return _config
|
@ -8,6 +8,7 @@ import Ref_Audio_Selector.tool.audio_inference as audio_inference
|
|||||||
import Ref_Audio_Selector.tool.audio_config as audio_config
|
import Ref_Audio_Selector.tool.audio_config as audio_config
|
||||||
import Ref_Audio_Selector.tool.delete_inference_with_ref as delete_inference_with_ref
|
import Ref_Audio_Selector.tool.delete_inference_with_ref as delete_inference_with_ref
|
||||||
import Ref_Audio_Selector.common.common as common
|
import Ref_Audio_Selector.common.common as common
|
||||||
|
import Ref_Audio_Selector.config.config_manager as config_manager
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
from config import python_exec, is_half
|
from config import python_exec, is_half
|
||||||
from tools import my_utils
|
from tools import my_utils
|
||||||
@ -15,6 +16,7 @@ from tools.asr.config import asr_dict
|
|||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
|
|
||||||
i18n = I18nAuto()
|
i18n = I18nAuto()
|
||||||
|
config = config_manager.get_config()
|
||||||
|
|
||||||
p_similarity = None
|
p_similarity = None
|
||||||
p_asr = None
|
p_asr = None
|
||||||
@ -29,7 +31,8 @@ def check_base_info(text_work_space_dir):
|
|||||||
|
|
||||||
# 从list文件,提取参考音频
|
# 从list文件,提取参考音频
|
||||||
def convert_from_list(text_work_space_dir, text_list_input):
|
def convert_from_list(text_work_space_dir, text_list_input):
|
||||||
ref_audio_all = os.path.join(text_work_space_dir, 'refer_audio_all')
|
ref_audio_all = os.path.join(text_work_space_dir,
|
||||||
|
config.get_audio_sample('list_to_convert_reference_audio_dir'))
|
||||||
text_convert_from_list_info = f"转换成功:生成目录{ref_audio_all}"
|
text_convert_from_list_info = f"转换成功:生成目录{ref_audio_all}"
|
||||||
text_sample_dir = ref_audio_all
|
text_sample_dir = ref_audio_all
|
||||||
try:
|
try:
|
||||||
@ -48,7 +51,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_
|
|||||||
similarity_list = None
|
similarity_list = None
|
||||||
similarity_file_dir = None
|
similarity_file_dir = None
|
||||||
|
|
||||||
similarity_dir = os.path.join(work_space_dir, 'similarity')
|
similarity_dir = os.path.join(work_space_dir, config.get_audio_sample('audio_similarity_dir'))
|
||||||
os.makedirs(similarity_dir, exist_ok=True)
|
os.makedirs(similarity_dir, exist_ok=True)
|
||||||
|
|
||||||
base_voice_file_name = common.get_filename_without_extension(base_voice_path)
|
base_voice_file_name = common.get_filename_without_extension(base_voice_path)
|
||||||
@ -79,7 +82,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_
|
|||||||
# 基于一个基准音频,从参考音频目录中进行分段抽样
|
# 基于一个基准音频,从参考音频目录中进行分段抽样
|
||||||
def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
|
def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
|
||||||
text_subsection_num, text_sample_num, checkbox_similarity_output):
|
text_subsection_num, text_sample_num, checkbox_similarity_output):
|
||||||
ref_audio_dir = os.path.join(text_work_space_dir, 'refer_audio')
|
ref_audio_dir = os.path.join(text_work_space_dir, config.get_base('reference_audio_dir'))
|
||||||
text_sample_info = f"抽样成功:生成目录{ref_audio_dir}"
|
text_sample_info = f"抽样成功:生成目录{ref_audio_dir}"
|
||||||
try:
|
try:
|
||||||
check_base_info(text_work_space_dir)
|
check_base_info(text_work_space_dir)
|
||||||
@ -114,8 +117,9 @@ def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
|
|||||||
def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_url,
|
def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_url,
|
||||||
text_text, text_ref_path, text_ref_text, text_emotion,
|
text_text, text_ref_path, text_ref_text, text_emotion,
|
||||||
text_test_content):
|
text_test_content):
|
||||||
inference_dir = os.path.join(text_work_space_dir, 'inference_audio')
|
inference_dir = os.path.join(text_work_space_dir, config.get_inference('inference_audio_dir'))
|
||||||
text_asr_audio_dir = os.path.join(inference_dir, 'text')
|
text_asr_audio_dir = os.path.join(inference_dir,
|
||||||
|
config.get_inference('inference_audio_text_aggregation_dir'))
|
||||||
text_model_inference_info = f"推理成功:生成目录{inference_dir}"
|
text_model_inference_info = f"推理成功:生成目录{inference_dir}"
|
||||||
try:
|
try:
|
||||||
check_base_info(text_work_space_dir)
|
check_base_info(text_work_space_dir)
|
||||||
@ -164,7 +168,7 @@ def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model,
|
|||||||
if dropdown_asr_lang is None or dropdown_asr_lang == '':
|
if dropdown_asr_lang is None or dropdown_asr_lang == '':
|
||||||
raise Exception("asr语言不能为空")
|
raise Exception("asr语言不能为空")
|
||||||
asr_file = open_asr(text_asr_audio_dir, text_work_space_dir, dropdown_asr_model, dropdown_asr_size,
|
asr_file = open_asr(text_asr_audio_dir, text_work_space_dir, dropdown_asr_model, dropdown_asr_size,
|
||||||
dropdown_asr_lang)
|
dropdown_asr_lang)
|
||||||
text_text_similarity_analysis_path = asr_file
|
text_text_similarity_analysis_path = asr_file
|
||||||
text_asr_info = f"asr成功:生成文件{asr_file}"
|
text_asr_info = f"asr成功:生成文件{asr_file}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -198,7 +202,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
|
|||||||
output_dir_abs = os.path.abspath(asr_opt_dir)
|
output_dir_abs = os.path.abspath(asr_opt_dir)
|
||||||
output_file_name = os.path.basename(asr_inp_dir)
|
output_file_name = os.path.basename(asr_inp_dir)
|
||||||
# 构造输出文件路径
|
# 构造输出文件路径
|
||||||
output_file_path = os.path.join(output_dir_abs, f'{output_file_name}_asr.list')
|
output_file_path = os.path.join(output_dir_abs, f'{config.get_result_check("asr_filename")}.list')
|
||||||
return output_file_path
|
return output_file_path
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -208,7 +212,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
|
|||||||
# 对asr生成的文件,与原本的文本内容,进行相似度分析
|
# 对asr生成的文件,与原本的文本内容,进行相似度分析
|
||||||
def text_similarity_analysis(text_work_space_dir,
|
def text_similarity_analysis(text_work_space_dir,
|
||||||
text_text_similarity_analysis_path):
|
text_text_similarity_analysis_path):
|
||||||
similarity_dir = os.path.join(text_work_space_dir, 'text_similarity')
|
similarity_dir = os.path.join(text_work_space_dir, config.get_result_check('text_similarity_output_dir'))
|
||||||
text_text_similarity_analysis_info = f"相似度分析成功:生成目录{similarity_dir}"
|
text_text_similarity_analysis_info = f"相似度分析成功:生成目录{similarity_dir}"
|
||||||
try:
|
try:
|
||||||
check_base_info(text_work_space_dir)
|
check_base_info(text_work_space_dir)
|
||||||
@ -243,7 +247,7 @@ def open_text_similarity_analysis(asr_file_path, output_dir, similarity_enlarge_
|
|||||||
# 根据一个参考音频,对指定目录下的音频进行相似度分析,并输出到另一个目录
|
# 根据一个参考音频,对指定目录下的音频进行相似度分析,并输出到另一个目录
|
||||||
def similarity_audio_output(text_work_space_dir, text_base_audio_path,
|
def similarity_audio_output(text_work_space_dir, text_base_audio_path,
|
||||||
text_compare_audio_dir):
|
text_compare_audio_dir):
|
||||||
text_similarity_audio_output_info = "相似度分析成功:生成目录XXX"
|
text_similarity_audio_output_info = None
|
||||||
try:
|
try:
|
||||||
check_base_info(text_work_space_dir)
|
check_base_info(text_work_space_dir)
|
||||||
if text_base_audio_path is None or text_base_audio_path == '':
|
if text_base_audio_path is None or text_base_audio_path == '':
|
||||||
@ -274,7 +278,8 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir,
|
|||||||
raise Exception("参考音频目录不能为空")
|
raise Exception("参考音频目录不能为空")
|
||||||
if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '':
|
if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '':
|
||||||
raise Exception("推理生成的音频目录不能为空")
|
raise Exception("推理生成的音频目录不能为空")
|
||||||
delete_text_wav_num, delete_emotion_dir_num = delete_inference_with_ref.sync_ref_audio(text_sync_ref_audio_dir, text_sync_inference_audio_dir)
|
delete_text_wav_num, delete_emotion_dir_num = delete_inference_with_ref.sync_ref_audio(text_sync_ref_audio_dir,
|
||||||
|
text_sync_inference_audio_dir)
|
||||||
text_sync_ref_audio_info = f"推理音频目录{text_sync_inference_audio_dir}下,text目录删除了{delete_text_wav_num}个参考音频,emotion目录下,删除了{delete_emotion_dir_num}个目录"
|
text_sync_ref_audio_info = f"推理音频目录{text_sync_inference_audio_dir}下,text目录删除了{delete_text_wav_num}个参考音频,emotion目录下,删除了{delete_emotion_dir_num}个目录"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
@ -284,7 +289,7 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir,
|
|||||||
|
|
||||||
# 根据模板和参考音频目录,生成参考音频配置内容
|
# 根据模板和参考音频目录,生成参考音频配置内容
|
||||||
def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2):
|
def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2):
|
||||||
config_file = os.path.join(text_work_space_dir, 'refer_audio.json')
|
config_file = os.path.join(text_work_space_dir, f'{config.get_audio_config("reference_audio_config_filename")}.json')
|
||||||
text_create_config_info = f"配置生成成功:生成文件{config_file}"
|
text_create_config_info = f"配置生成成功:生成文件{config_file}"
|
||||||
try:
|
try:
|
||||||
check_base_info(text_work_space_dir)
|
check_base_info(text_work_space_dir)
|
||||||
@ -293,7 +298,8 @@ def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2):
|
|||||||
if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '':
|
if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '':
|
||||||
raise Exception("参考音频目录不能为空")
|
raise Exception("参考音频目录不能为空")
|
||||||
ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2)
|
ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2)
|
||||||
audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(), config_file)
|
audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(),
|
||||||
|
config_file)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
text_create_config_info = f"发生异常:{e}"
|
text_create_config_info = f"发生异常:{e}"
|
||||||
@ -353,7 +359,7 @@ with gr.Blocks() as app:
|
|||||||
text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
||||||
[text_whole_url])
|
[text_whole_url])
|
||||||
gr.Markdown(value=i18n("2.2:配置待推理文本,一句一行,不要太多,10条即可"))
|
gr.Markdown(value=i18n("2.2:配置待推理文本,一句一行,不要太多,10条即可"))
|
||||||
default_test_content_path = 'Ref_Audio_Selector/tool/test_content/test_content.txt'
|
default_test_content_path = config.get_inference('default_test_text_path')
|
||||||
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
|
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
|
||||||
gr.Markdown(value=i18n("2.3:启动推理服务,如果还没启动的话"))
|
gr.Markdown(value=i18n("2.3:启动推理服务,如果还没启动的话"))
|
||||||
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
|
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
|
||||||
@ -421,7 +427,7 @@ with gr.Blocks() as app:
|
|||||||
text_sync_inference_audio_dir], [text_sync_ref_info])
|
text_sync_inference_audio_dir], [text_sync_ref_info])
|
||||||
with gr.Accordion("第四步:生成参考音频配置文本", open=False):
|
with gr.Accordion("第四步:生成参考音频配置文本", open=False):
|
||||||
gr.Markdown(value=i18n("4.1:编辑模板"))
|
gr.Markdown(value=i18n("4.1:编辑模板"))
|
||||||
default_template_path = 'Ref_Audio_Selector/tool/config_template/ref_audio_template.txt'
|
default_template_path = config.get_audio_config('default_template_path')
|
||||||
default_template_content = common.read_file(default_template_path)
|
default_template_content = common.read_file(default_template_path)
|
||||||
text_template_path = gr.Text(label=i18n("模板文件路径"), value=default_template_path, interactive=False)
|
text_template_path = gr.Text(label=i18n("模板文件路径"), value=default_template_path, interactive=False)
|
||||||
text_template = gr.Text(label=i18n("模板内容"), value=default_template_content, lines=10)
|
text_template = gr.Text(label=i18n("模板内容"), value=default_template_content, lines=10)
|
||||||
|
@ -3,9 +3,10 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
|
import Ref_Audio_Selector.config.config_manager as config_manager
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from funasr import AutoModel
|
from funasr import AutoModel
|
||||||
|
config = config_manager.get_config()
|
||||||
|
|
||||||
path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
||||||
path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
||||||
@ -84,7 +85,7 @@ def execute_asr_multi_level_dir(input_folder, output_folder, model_size, languag
|
|||||||
os.makedirs(output_dir_abs, exist_ok=True)
|
os.makedirs(output_dir_abs, exist_ok=True)
|
||||||
|
|
||||||
# 构造输出文件路径
|
# 构造输出文件路径
|
||||||
output_file_path = os.path.join(output_dir_abs, f'{output_file_name}_asr.list')
|
output_file_path = os.path.join(output_dir_abs, f'{config.get_result_check("asr_filename")}.list')
|
||||||
|
|
||||||
# 将输出写入文件
|
# 将输出写入文件
|
||||||
with open(output_file_path, "w", encoding="utf-8") as f:
|
with open(output_file_path, "w", encoding="utf-8") as f:
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import itertools
|
import itertools
|
||||||
|
import Ref_Audio_Selector.config.config_manager as config_manager
|
||||||
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote
|
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote
|
||||||
|
|
||||||
|
config = config_manager.get_config()
|
||||||
|
|
||||||
|
|
||||||
class URLComposer:
|
class URLComposer:
|
||||||
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
|
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
|
||||||
@ -78,9 +81,9 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path)
|
|||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
# Create subdirectories for text and emotion categories
|
# Create subdirectories for text and emotion categories
|
||||||
text_subdir = os.path.join(output_dir, 'text')
|
text_subdir = os.path.join(output_dir, config.get_inference('inference_audio_text_aggregation_dir'))
|
||||||
os.makedirs(text_subdir, exist_ok=True)
|
os.makedirs(text_subdir, exist_ok=True)
|
||||||
emotion_subdir = os.path.join(output_dir, 'emotion')
|
emotion_subdir = os.path.join(output_dir, config.get_inference('inference_audio_emotion_aggregation_dir'))
|
||||||
os.makedirs(emotion_subdir, exist_ok=True)
|
os.makedirs(emotion_subdir, exist_ok=True)
|
||||||
|
|
||||||
# 计算笛卡尔积
|
# 计算笛卡尔积
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import Ref_Audio_Selector.common.common as common
|
import Ref_Audio_Selector.common.common as common
|
||||||
|
import Ref_Audio_Selector.config.config_manager as config_manager
|
||||||
|
|
||||||
|
config = config_manager.get_config()
|
||||||
|
|
||||||
def remove_matching_audio_files_in_text_dir(text_dir, emotions_list):
|
def remove_matching_audio_files_in_text_dir(text_dir, emotions_list):
|
||||||
count = 0
|
count = 0
|
||||||
@ -51,8 +53,8 @@ def delete_emotion_subdirectories(emotion_dir, emotions_list):
|
|||||||
def sync_ref_audio(ref_audio_dir, inference_audio_dir):
|
def sync_ref_audio(ref_audio_dir, inference_audio_dir):
|
||||||
ref_audio_manager = common.RefAudioListManager(ref_audio_dir)
|
ref_audio_manager = common.RefAudioListManager(ref_audio_dir)
|
||||||
ref_list = ref_audio_manager.get_ref_audio_list()
|
ref_list = ref_audio_manager.get_ref_audio_list()
|
||||||
text_dir = os.path.join(inference_audio_dir, 'text')
|
text_dir = os.path.join(inference_audio_dir, config.get_inference('inference_audio_text_aggregation_dir'))
|
||||||
emotion_dir = os.path.join(inference_audio_dir, 'emotion')
|
emotion_dir = os.path.join(inference_audio_dir, config.get_inference('inference_audio_emotion_aggregation_dir'))
|
||||||
delete_text_wav_num = remove_matching_audio_files_in_text_dir(text_dir, ref_list)
|
delete_text_wav_num = remove_matching_audio_files_in_text_dir(text_dir, ref_list)
|
||||||
delete_emotion_dir_num = delete_emotion_subdirectories(emotion_dir, ref_list)
|
delete_emotion_dir_num = delete_emotion_subdirectories(emotion_dir, ref_list)
|
||||||
return delete_text_wav_num, delete_emotion_dir_num
|
return delete_text_wav_num, delete_emotion_dir_num
|
||||||
|
@ -2,6 +2,7 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from modelscope.pipelines import pipeline
|
from modelscope.pipelines import pipeline
|
||||||
|
|
||||||
sv_pipeline = pipeline(
|
sv_pipeline = pipeline(
|
||||||
task='speaker-verification',
|
task='speaker-verification',
|
||||||
model='Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
|
model='Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
|
||||||
@ -11,7 +12,8 @@ sv_pipeline = pipeline(
|
|||||||
|
|
||||||
def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path, output_file_path):
|
def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path, output_file_path):
|
||||||
# Step 1: 获取比较音频目录下所有音频文件的路径
|
# Step 1: 获取比较音频目录下所有音频文件的路径
|
||||||
comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if f.endswith('.wav')]
|
comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if
|
||||||
|
f.endswith('.wav')]
|
||||||
|
|
||||||
# Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径
|
# Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径
|
||||||
similarity_scores = []
|
similarity_scores = []
|
||||||
@ -31,10 +33,10 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
|
|||||||
open(output_file_path, 'w').close() # Create an empty file
|
open(output_file_path, 'w').close() # Create an empty file
|
||||||
|
|
||||||
# Step 5: 将排序后的结果写入输出结果文件(支持中文)
|
# Step 5: 将排序后的结果写入输出结果文件(支持中文)
|
||||||
formatted_scores = [f'{item["score"]}|{item["path"]}' for item in similarity_scores]
|
formatted_scores = [f'{item["score"]}|{item["path"]}' for item in similarity_scores]
|
||||||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||||||
# 使用'\n'将每个字符串分开,使其写入不同行
|
# 使用'\n'将每个字符串分开,使其写入不同行
|
||||||
content = '\n'.join(formatted_scores )
|
content = '\n'.join(formatted_scores)
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|
||||||
|
|
||||||
@ -60,7 +62,7 @@ if __name__ == '__main__':
|
|||||||
cmd = parse_arguments()
|
cmd = parse_arguments()
|
||||||
print(cmd)
|
print(cmd)
|
||||||
compare_audio_and_generate_report(
|
compare_audio_and_generate_report(
|
||||||
reference_audio_path = cmd.reference_audio,
|
reference_audio_path=cmd.reference_audio,
|
||||||
comparison_dir_path = cmd.comparison_dir,
|
comparison_dir_path=cmd.comparison_dir,
|
||||||
output_file_path = cmd.output_file,
|
output_file_path=cmd.output_file,
|
||||||
)
|
)
|
||||||
|
@ -3,8 +3,11 @@ import argparse
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
import Ref_Audio_Selector.tool.text_comparison.text_comparison as text_comparison
|
import Ref_Audio_Selector.tool.text_comparison.text_comparison as text_comparison
|
||||||
|
import Ref_Audio_Selector.config.config_manager as config_manager
|
||||||
import Ref_Audio_Selector.common.common as common
|
import Ref_Audio_Selector.common.common as common
|
||||||
|
|
||||||
|
config = config_manager.get_config()
|
||||||
|
|
||||||
|
|
||||||
def parse_asr_file(file_path):
|
def parse_asr_file(file_path):
|
||||||
output = []
|
output = []
|
||||||
@ -96,19 +99,20 @@ def process(asr_file_path, output_dir, similarity_enlarge_boundary):
|
|||||||
calculate_similarity_and_append_to_list(records, similarity_enlarge_boundary)
|
calculate_similarity_and_append_to_list(records, similarity_enlarge_boundary)
|
||||||
average_similarity_list = calculate_average_similarity_by_emotion(records)
|
average_similarity_list = calculate_average_similarity_by_emotion(records)
|
||||||
|
|
||||||
average_similarity_file = os.path.join(output_dir, 'average_similarity.txt')
|
average_similarity_file = os.path.join(output_dir,
|
||||||
|
f'{config.get_result_check("text_emotion_average_similarity_report_filename")}.txt')
|
||||||
average_similarity_content = \
|
average_similarity_content = \
|
||||||
'\n'.join([f"{item['average_similarity_score']}|{item['emotion']}" for item in average_similarity_list])
|
'\n'.join([f"{item['average_similarity_score']}|{item['emotion']}" for item in average_similarity_list])
|
||||||
common.write_text_to_file(average_similarity_content, average_similarity_file)
|
common.write_text_to_file(average_similarity_content, average_similarity_file)
|
||||||
|
|
||||||
emotion_detail_list = group_and_sort_by_field(records, 'emotion')
|
emotion_detail_list = group_and_sort_by_field(records, 'emotion')
|
||||||
|
|
||||||
emotion_detail_file = os.path.join(output_dir, 'emotion_group_detail.txt')
|
emotion_detail_file = os.path.join(output_dir, f'{config.get_result_check("emotion_group_detail")}.txt')
|
||||||
format_list_to_text(emotion_detail_list, emotion_detail_file)
|
format_list_to_text(emotion_detail_list, emotion_detail_file)
|
||||||
|
|
||||||
original_text_detail_list = group_and_sort_by_field(records, 'original_text')
|
original_text_detail_list = group_and_sort_by_field(records, 'original_text')
|
||||||
|
|
||||||
original_text_detail_file = os.path.join(output_dir, 'text_group_detail.txt')
|
original_text_detail_file = os.path.join(output_dir, f'{config.get_result_check("text_group_detail")}.txt')
|
||||||
format_list_to_text(original_text_detail_list, original_text_detail_file)
|
format_list_to_text(original_text_detail_list, original_text_detail_file)
|
||||||
|
|
||||||
print('文本相似度分析完成。')
|
print('文本相似度分析完成。')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user