调整目录结构

This commit is contained in:
Downupanddownup 2024-04-24 18:57:36 +08:00
parent a1fc00a9d8
commit 4cbbe2a258
5 changed files with 52 additions and 60 deletions

View File

@ -12,20 +12,18 @@ i18n = I18nAuto()
# 校验基础信息
def check_base_info(text_work_space_dir, text_character):
def check_base_info(text_work_space_dir):
if text_work_space_dir is None or text_work_space_dir == '':
raise Exception(i18n("工作目录不能为空"))
if text_character is None or text_character == '':
raise Exception(i18n("角色名称不能为空"))
# 从list文件提取参考音频
def convert_from_list(text_work_space_dir, text_character, text_list_input):
def convert_from_list(text_work_space_dir, text_list_input):
ref_audio_all = os.path.join(text_work_space_dir, 'refer_audio_all')
text_convert_from_list_info = f"转换成功:生成目录{ref_audio_all}"
text_sample_dir = ref_audio_all
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_list_input is None or text_list_input == '':
raise Exception(i18n("list文件路径不能为空"))
audio_similarity.convert_from_list(text_list_input, ref_audio_all)
@ -36,12 +34,12 @@ def convert_from_list(text_work_space_dir, text_character, text_list_input):
# 基于一个基准音频,从参考音频目录中进行分段抽样
def sample(text_work_space_dir, text_character, text_sample_dir, text_base_voice_path,
def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
text_subsection_num, text_sample_num, checkbox_similarity_output):
ref_audio_dir = os.path.join(text_work_space_dir, 'refer_audio')
text_sample_info = f"抽样成功:生成目录{ref_audio_dir}"
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_sample_dir is None or text_sample_dir == '':
raise Exception(i18n("参考音频抽样目录不能为空,请先完成上一步操作"))
if text_base_voice_path is None or text_base_voice_path == '':
@ -70,14 +68,14 @@ def sample(text_work_space_dir, text_character, text_sample_dir, text_base_voice
# 根据参考音频和测试文本,执行批量推理
def model_inference(text_work_space_dir, text_character, text_model_inference_voice_dir, text_url,
def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_url,
text_text, text_ref_path, text_ref_text, text_emotion,
text_test_content):
inference_dir = os.path.join(text_work_space_dir, 'inference_audio')
text_asr_audio_dir = os.path.join(inference_dir, 'text')
text_model_inference_info = f"推理成功:生成目录{inference_dir}"
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_model_inference_voice_dir is None or text_model_inference_voice_dir == '':
raise Exception(i18n("待推理的参考音频所在目录不能为空,请先完成上一步操作"))
if text_url is None or text_url == '':
@ -106,13 +104,13 @@ def model_inference(text_work_space_dir, text_character, text_model_inference_vo
# 对推理生成音频执行asr
def asr(text_work_space_dir, text_character, text_asr_audio_dir, dropdown_asr_model,
def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model,
dropdown_asr_size, dropdown_asr_lang):
asr_file = None
text_text_similarity_analysis_path = None
text_asr_info = None
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_asr_audio_dir is None or text_asr_audio_dir == '':
raise Exception(i18n("待asr的音频所在目录不能为空请先完成上一步操作"))
if dropdown_asr_model is None or dropdown_asr_model == '':
@ -132,12 +130,12 @@ def asr(text_work_space_dir, text_character, text_asr_audio_dir, dropdown_asr_mo
# 对asr生成的文件与原本的文本内容进行相似度分析
def text_similarity_analysis(text_work_space_dir, text_character,
def text_similarity_analysis(text_work_space_dir,
text_text_similarity_analysis_path):
similarity_file = os.path.join(text_work_space_dir, 'similarity.txt')
text_text_similarity_analysis_info = f"相似度分析成功:生成文件{similarity_file}"
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_text_similarity_analysis_path is None or text_text_similarity_analysis_path == '':
raise Exception(i18n("asr生成的文件路径不能为空请先完成上一步操作"))
pass
@ -147,11 +145,11 @@ def text_similarity_analysis(text_work_space_dir, text_character,
# 根据一个参考音频,对指定目录下的音频进行相似度分析,并输出到另一个目录
def similarity_audio_output(text_work_space_dir, text_character, text_base_audio_path,
def similarity_audio_output(text_work_space_dir, text_base_audio_path,
text_compare_audio_dir):
text_similarity_audio_output_info = "相似度分析成功生成目录XXX"
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_base_audio_path is None or text_base_audio_path == '':
raise Exception(i18n("基准音频路径不能为空"))
if text_compare_audio_dir is None or text_compare_audio_dir == '':
@ -170,11 +168,11 @@ def similarity_audio_output(text_work_space_dir, text_character, text_base_audio
# 根据参考音频目录的删除情况,将其同步到推理生成的音频目录中,即参考音频目录下,删除了几个参考音频,就在推理目录下,将这些参考音频生成的音频文件移除
def sync_ref_audio(text_work_space_dir, text_character, text_sync_ref_audio_dir,
def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir,
text_sync_inference_audio_dir):
text_sync_ref_audio_info = "同步参考音频成功生成目录XXX"
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_sync_ref_audio_dir is None or text_sync_ref_audio_dir == '':
raise Exception(i18n("参考音频目录不能为空"))
if text_sync_inference_audio_dir is None or text_sync_inference_audio_dir == '':
@ -186,11 +184,11 @@ def sync_ref_audio(text_work_space_dir, text_character, text_sync_ref_audio_dir,
# 根据模板和参考音频目录,生成参考音频配置内容
def create_config(text_work_space_dir, text_character, text_template, text_sync_ref_audio_dir2):
def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2):
config_file = os.path.join(text_work_space_dir, 'refer_audio.json')
text_create_config_info = f"配置生成成功:生成文件{config_file}"
try:
check_base_info(text_work_space_dir, text_character)
check_base_info(text_work_space_dir)
if text_template is None or text_template == '':
raise Exception(i18n("参考音频抽样目录不能为空"))
if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '':
@ -214,9 +212,7 @@ def whole_url(text_url, text_text, text_ref_path, text_ref_text, text_emotion):
with gr.Blocks() as app:
gr.Markdown(value=i18n("基本介绍:这是一个从训练素材中,批量提取参考音频,并进行效果评估与配置生成的工具"))
with gr.Row():
text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"), value="")
text_character = gr.Text(label=i18n("请输入角色名称"), value="")
text_work_space_dir = gr.Text(label=i18n("工作目录,后续操作所生成文件都会保存在此目录下"), value="")
with gr.Accordion(label=i18n("第一步:基于训练素材,生成待选参考音频列表"), open=False):
gr.Markdown(value=i18n("1.1选择list文件并提取3-10秒的素材作为参考候选"))
text_list_input = gr.Text(label=i18n("请输入list文件路径"), value="")
@ -225,7 +221,7 @@ with gr.Blocks() as app:
text_convert_from_list_info = gr.Text(label=i18n("参考列表生成结果"), value="", interactive=False)
gr.Markdown(value=i18n("1.2:选择基准音频,执行相似度匹配,并分段随机抽样"))
text_sample_dir = gr.Text(label=i18n("参考音频抽样目录"), value="", interactive=False)
button_convert_from_list.click(convert_from_list, [text_work_space_dir, text_character, text_list_input],
button_convert_from_list.click(convert_from_list, [text_work_space_dir, text_list_input],
[text_convert_from_list_info, text_sample_dir])
with gr.Row():
text_base_voice_path = gr.Text(label=i18n("请输入基准音频路径"), value="")
@ -267,7 +263,7 @@ with gr.Blocks() as app:
gr.Markdown(value=i18n("3.1启动asr获取推理音频文本"))
text_asr_audio_dir = gr.Text(label=i18n("待asr的音频所在目录"), value="", interactive=False)
button_model_inference.click(model_inference,
[text_work_space_dir, text_character, text_model_inference_voice_dir, text_url,
[text_work_space_dir, text_model_inference_voice_dir, text_url,
text_text, text_ref_path, text_ref_text, text_emotion,
text_test_content], [text_model_inference_info, text_asr_audio_dir])
with gr.Row():
@ -294,13 +290,13 @@ with gr.Blocks() as app:
text_asr_info = gr.Text(label=i18n("asr结果"), value="", interactive=False)
gr.Markdown(value=i18n("3.2:启动文本相似度分析"))
text_text_similarity_analysis_path = gr.Text(label=i18n("待分析的文件路径"), value="", interactive=False)
button_asr.click(asr, [text_work_space_dir, text_character, text_asr_audio_dir, dropdown_asr_model,
button_asr.click(asr, [text_work_space_dir, text_asr_audio_dir, dropdown_asr_model,
dropdown_asr_size, dropdown_asr_lang],
[text_asr_info, text_text_similarity_analysis_path])
with gr.Row():
button_text_similarity_analysis = gr.Button(i18n("启动文本相似度分析"), variant="primary")
text_text_similarity_analysis_info = gr.Text(label=i18n("文本相似度分析结果"), value="", interactive=False)
button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir, text_character,
button_text_similarity_analysis.click(text_similarity_analysis, [text_work_space_dir,
text_text_similarity_analysis_path],
[text_text_similarity_analysis_info])
gr.Markdown(value=i18n("3.3:根据相似度分析结果,重点检查最后几条是否存在复读等问题"))
@ -312,7 +308,7 @@ with gr.Blocks() as app:
button_similarity_audio_output = gr.Button(i18n("输出相似度-参考音频到临时目录"), variant="primary")
text_similarity_audio_output_info = gr.Text(label=i18n("输出结果"), value="", interactive=False)
button_similarity_audio_output.click(similarity_audio_output,
[text_work_space_dir, text_character, text_base_audio_path,
[text_work_space_dir, text_base_audio_path,
text_compare_audio_dir], [text_similarity_audio_output_info])
with gr.Row():
text_sync_ref_audio_dir = gr.Text(label=i18n("参考音频路径"), value="", interactive=False)
@ -320,7 +316,7 @@ with gr.Blocks() as app:
with gr.Row():
button_sync_ref_audio = gr.Button(i18n("将参考音频的删除情况,同步到推理音频目录"), variant="primary")
text_sync_ref_info = gr.Text(label=i18n("同步结果"), value="", interactive=False)
button_sync_ref_audio.click(sync_ref_audio, [text_work_space_dir, text_character, text_sync_ref_audio_dir,
button_sync_ref_audio.click(sync_ref_audio, [text_work_space_dir, text_sync_ref_audio_dir,
text_sync_inference_audio_dir], [text_sync_ref_info])
with gr.Accordion("第四步:生成参考音频配置文本", open=False):
gr.Markdown(value=i18n("4.1:编辑模板"))
@ -332,9 +328,9 @@ with gr.Blocks() as app:
button_create_config = gr.Button(i18n("生成配置"), variant="primary")
text_create_config_info = gr.Text(label=i18n("生成结果"), value="", interactive=False)
button_create_config.click(create_config,
[text_work_space_dir, text_character, text_template, text_sync_ref_audio_dir2],
[text_work_space_dir, text_template, text_sync_ref_audio_dir2],
[text_create_config_info])
button_sample.click(sample, [text_work_space_dir, text_character, text_sample_dir, text_base_voice_path,
button_sample.click(sample, [text_work_space_dir, text_sample_dir, text_base_voice_path,
text_subsection_num, text_sample_num, checkbox_similarity_output],
[text_sample_info, text_model_inference_voice_dir, text_sync_ref_audio_dir,
text_sync_ref_audio_dir2])

View File

@ -1,12 +1,14 @@
import os
from config import python_exec,is_half
from config import python_exec, is_half
from tools import my_utils
from tools.asr.config import asr_dict
from subprocess import Popen
def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
global p_asr
if(p_asr==None):
asr_inp_dir=my_utils.clean_path(asr_inp_dir)
if (p_asr == None):
asr_inp_dir = my_utils.clean_path(asr_inp_dir)
asr_py_path = asr_dict[asr_model]["path"]
if asr_py_path == 'funasr_asr.py':
asr_py_path = 'funasr_asr_multi_level_dir.py'
@ -17,18 +19,18 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
cmd += f' -o "{asr_opt_dir}"'
cmd += f' -s {asr_model_size}'
cmd += f' -l {asr_lang}'
cmd += " -p %s"%("float16"if is_half==True else "float32")
cmd += " -p %s" % ("float16" if is_half == True else "float32")
print(cmd)
p_asr = Popen(cmd, shell=True)
p_asr.wait()
p_asr=None
p_asr = None
output_dir_abs = os.path.abspath(asr_opt_dir)
output_file_name = os.path.basename(asr_inp_dir)
# 构造输出文件路径
output_file_path = os.path.join(output_dir_abs, f'{output_file_name}.list')
return output_file_path
else:
return None
return None

View File

@ -2,6 +2,7 @@ import os
import requests
import urllib.parse
class URLComposer:
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
self.base_url = base_url
@ -9,18 +10,17 @@ class URLComposer:
self.text_param_name = text_param_name
self.ref_path_param_name = ref_path_param_name
self.ref_text_param_name = ref_text_param_name
def is_valid(self):
if self.base_url is None or self.base_url == '':
raise ValueError("请输入url")
if self.text_param_name is None or self.text_param_name == '':
raise ValueError("请输入text参数名")
if self.emotion_param_name is None and self.ref_path_param_name is None and self.ref_text_param_name is None:
raise ValueError("请输入至少一个参考or情绪的参数")
def is_emotion(self):
return self.emotion_param_name is not None and self.emotion_param_name != ''
@ -49,10 +49,9 @@ class URLComposer:
query_params = '&'.join([f"{k}={v}" for k, v in params.items()])
url_with_params += '?' + query_params if '?' not in self.base_url else '&' + query_params
return url_with_params
def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path):
def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path):
# Ensure the output directory exists
output_dir = Path(output_dir_path)
output_dir.mkdir(parents=True, exist_ok=True)
@ -65,12 +64,12 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path)
for text, emotion in zip(text_list, emotion_list):
# Generate audio byte stream using the create_audio function
if url_composer.is_emotion():
real_url = url_composer.build_url_with_emotion(text, emotion['emotion'])
else:
real_url = url_composer.build_url_with_ref(text, emotion['ref_path'], emotion['ref_text'])
audio_bytes = inference_audio_from_api(real_url)
emotion_name = emotion['emotion']
@ -88,11 +87,9 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path)
f.write(audio_bytes)
with open(emotion_file_path, 'wb') as f:
f.write(audio_bytes)
def inference_audio_from_api(url):
# 发起GET请求
response = requests.get(url, stream=True)
@ -101,4 +98,4 @@ def inference_audio_from_api(url):
# 返回音频数据的字节流
return response.content
else:
raise Exception(f"Failed to fetch audio from API. Server responded with status code {response.status_code}.")
raise Exception(f"Failed to fetch audio from API. Server responded with status code {response.status_code}.")

View File

@ -3,6 +3,7 @@ import shutil
from config import python_exec
from subprocess import Popen
def convert_from_list(list_file, output_dir):
# 创建输出目录,如果它不存在的话
if not os.path.exists(output_dir):
@ -61,15 +62,15 @@ def sample(output_audio_dir, similarity_list, subsection_num, sample_num):
start = i * step
end = (i + 1) * step
end = min(end, len(similarity_list)) # 防止最后一段越界
num = min(sample_num, len(similarity_list[start:end]))
# 随机采样
random.shuffle(similarity_list[start:end])
sampled_subsection = similarity_list[start:start+num]
sampled_subsection = similarity_list[start:start + num]
# 创建并进入子目录
subdir_name = f'subsection_{i+1}'
subdir_name = f'subsection_{i + 1}'
subdir_path = os.path.join(output_audio_dir, subdir_name)
os.makedirs(subdir_path, exist_ok=True)
@ -82,9 +83,7 @@ def sample(output_audio_dir, similarity_list, subsection_num, sample_num):
print("Sampling completed.")
def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_similarity_output):
similarity_list = None
similarity_dir = os.path.join(work_space_dir, 'similarity')
@ -94,7 +93,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_
similarity_file = os.path.join(similarity_dir, f'{base_voice_file_name}.txt')
global p_similarity
if(p_similarity==None):
if (p_similarity == None):
cmd = f'"{python_exec}" tools/speaker_verification/voice_similarity.py '
cmd += f' -r "{base_voice_path}"'
cmd += f' -c "{sample_dir}"'
@ -109,7 +108,7 @@ def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_
similarity_file_dir = os.path.dirname(similarity_dir, base_voice_file_name)
ref_audio_opt.copy_and_move(similarity_file_dir, similarity_list)
p_similarity=None
p_similarity = None
return similarity_list, similarity_file, similarity_file_dir
else:
return similarity_list, None, None
@ -145,7 +144,6 @@ def parse_similarity_file(file_path):
def copy_and_move(output_audio_directory, similarity_scores):
# 确保新目录存在
if not os.path.exists(output_audio_directory):
os.makedirs(output_audio_directory)
@ -178,5 +176,3 @@ def get_filename_without_extension(file_path):
base_name = os.path.basename(file_path) # Get the base name (file name with extension)
file_name, file_extension = os.path.splitext(base_name) # Split the base name into file name and extension
return file_name # Return the file name without extension

View File

@ -1,3 +1,4 @@
import os
import torch
from transformers import AutoTokenizer, AutoModel
from scipy.spatial.distance import cosine