配置文件生成、音频抽样、音频推理测试

This commit is contained in:
Downupanddownup 2024-04-25 00:22:58 +08:00
parent 4cbbe2a258
commit 2c8f6bd4c9
8 changed files with 169 additions and 88 deletions

View File

@ -19,6 +19,8 @@ class RefAudioListManager:
if file.endswith('.wav'):
# 将相对路径转换为绝对路径
audio_abs_path = os.path.join(subdir, file)
if category not in self.audio_dict:
self.audio_dict[category] = []
self.audio_dict[category].append(audio_abs_path)
def get_audio_list(self):
@ -64,4 +66,30 @@ def read_text_file_to_list(file_path):
with open(file_path, mode='r', encoding='utf-8') as file:
# 读取所有行并存储到一个列表中
lines = file.read().splitlines()
return lines
return lines
def get_filename_without_extension(file_path):
"""
Given a file path string, returns the file name without its extension.
Parameters:
file_path (str): The full path to the file.
Returns:
str: The file name without its extension.
"""
base_name = os.path.basename(file_path) # Get the base name (file name with extension)
file_name, file_extension = os.path.splitext(base_name) # Split the base name into file name and extension
return file_name # Return the file name without extension
def read_file(file_path):
# 使用with语句打开并读取文件
with open(file_path, 'r', encoding='utf-8') as file: # 'r' 表示以读取模式打开文件
# 一次性读取文件所有内容
file_content = file.read()
# 文件在with语句结束时会自动关闭
# 现在file_content变量中存储了文件的所有文本内容
return file_content

View File

@ -1,4 +1,5 @@
import os.path
import traceback
import gradio as gr
import Ref_Audio_Selector.tool.audio_similarity as audio_similarity
@ -7,9 +8,13 @@ import Ref_Audio_Selector.tool.audio_asr as audio_asr
import Ref_Audio_Selector.tool.audio_config as audio_config
import Ref_Audio_Selector.common.common as common
from tools.i18n.i18n import I18nAuto
from config import python_exec
from subprocess import Popen
i18n = I18nAuto()
p_similarity = None
# 校验基础信息
def check_base_info(text_work_space_dir):
@ -28,11 +33,44 @@ def convert_from_list(text_work_space_dir, text_list_input):
raise Exception(i18n("list文件路径不能为空"))
audio_similarity.convert_from_list(text_list_input, ref_audio_all)
except Exception as e:
traceback.print_exc()
text_convert_from_list_info = f"发生异常:{e}"
text_sample_dir = ''
return [text_convert_from_list_info, text_sample_dir]
def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_similarity_output):
similarity_list = None
similarity_file_dir = None
similarity_dir = os.path.join(work_space_dir, 'similarity')
os.makedirs(similarity_dir, exist_ok=True)
base_voice_file_name = common.get_filename_without_extension(base_voice_path)
similarity_file = os.path.join(similarity_dir, f'{base_voice_file_name}.txt')
global p_similarity
if p_similarity is None:
cmd = f'"{python_exec}" Ref_Audio_Selector/tool/speaker_verification/voice_similarity.py '
cmd += f' -r "{base_voice_path}"'
cmd += f' -c "{sample_dir}"'
cmd += f' -o {similarity_file}'
print(cmd)
p_similarity = Popen(cmd, shell=True)
p_similarity.wait()
if need_similarity_output:
similarity_list = audio_similarity.parse_similarity_file(similarity_file)
similarity_file_dir = os.path.join(similarity_dir, base_voice_file_name)
audio_similarity.copy_and_move(similarity_file_dir, similarity_list)
p_similarity = None
return similarity_list, similarity_file, similarity_file_dir
else:
return similarity_list, None, None
# 基于一个基准音频,从参考音频目录中进行分段抽样
def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
text_subsection_num, text_sample_num, checkbox_similarity_output):
@ -49,15 +87,16 @@ def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
if text_sample_num is None or text_sample_num == '':
raise Exception(i18n("每段随机抽样个数不能为空"))
similarity_list = audio_similarity.start_similarity_analysis(text_work_space_dir, text_sample_dir,
text_base_voice_path, checkbox_similarity_output)
similarity_list, _, _ = start_similarity_analysis(text_work_space_dir, text_sample_dir,
text_base_voice_path, checkbox_similarity_output)
if similarity_list is None:
raise Exception(i18n("相似度分析失败"))
audio_similarity.sample(ref_audio_dir, similarity_list, text_subsection_num, text_sample_num)
audio_similarity.sample(ref_audio_dir, similarity_list, int(text_subsection_num), int(text_sample_num))
except Exception as e:
traceback.print_exc()
text_sample_info = f"发生异常:{e}"
ref_audio_dir = ''
text_model_inference_voice_dir = ref_audio_dir
@ -98,6 +137,7 @@ def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_ur
audio_inference.generate_audio_files(url_composer, text_list, ref_audio_manager.get_ref_audio_list(),
inference_dir)
except Exception as e:
traceback.print_exc()
text_model_inference_info = f"发生异常:{e}"
text_asr_audio_dir = ''
return [text_model_inference_info, text_asr_audio_dir]
@ -124,6 +164,7 @@ def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model,
text_text_similarity_analysis_path = asr_file
text_asr_info = f"asr成功生成文件{asr_file}"
except Exception as e:
traceback.print_exc()
text_asr_info = f"发生异常:{e}"
text_text_similarity_analysis_path = ''
return [text_asr_info, text_text_similarity_analysis_path]
@ -140,6 +181,7 @@ def text_similarity_analysis(text_work_space_dir,
raise Exception(i18n("asr生成的文件路径不能为空请先完成上一步操作"))
pass
except Exception as e:
traceback.print_exc()
text_text_similarity_analysis_info = f"发生异常:{e}"
return text_text_similarity_analysis_info
@ -154,7 +196,7 @@ def similarity_audio_output(text_work_space_dir, text_base_audio_path,
raise Exception(i18n("基准音频路径不能为空"))
if text_compare_audio_dir is None or text_compare_audio_dir == '':
raise Exception(i18n("待分析的音频所在目录不能为空"))
similarity_list, similarity_file, similarity_file_dir = audio_similarity.start_similarity_analysis(
similarity_list, similarity_file, similarity_file_dir = start_similarity_analysis(
text_work_space_dir, text_compare_audio_dir, text_base_audio_path, True)
if similarity_list is None:
@ -163,6 +205,7 @@ def similarity_audio_output(text_work_space_dir, text_base_audio_path,
text_similarity_audio_output_info = f'相似度分析成功:生成目录{similarity_file_dir},文件{similarity_file}'
except Exception as e:
traceback.print_exc()
text_similarity_audio_output_info = f"发生异常:{e}"
return text_similarity_audio_output_info
@ -179,6 +222,7 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir,
raise Exception(i18n("推理生成的音频目录不能为空"))
pass
except Exception as e:
traceback.print_exc()
text_sync_ref_audio_info = f"发生异常:{e}"
return text_sync_ref_audio_info
@ -194,8 +238,9 @@ def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2):
if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '':
raise Exception(i18n("参考音频目录不能为空"))
ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2)
audio_config.generate_audio_config(text_template, ref_audio_manager.get_ref_audio_list(), config_file)
audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(), config_file)
except Exception as e:
traceback.print_exc()
text_create_config_info = f"发生异常:{e}"
return text_create_config_info
@ -234,14 +279,14 @@ with gr.Blocks() as app:
with gr.Accordion(label=i18n("第二步:基于参考音频和测试文本,执行批量推理"), open=False):
gr.Markdown(value=i18n("2.1:配置推理服务参数信息,参考音频路径/文本和角色情绪二选一,如果是角色情绪,需要先执行第四步,"
"将参考音频打包配置到推理服务下,在推理前,请确认完整请求地址是否与正常使用时的一致,包括角色名称,尤其是文本分隔符是否正确"))
text_model_inference_voice_dir = gr.Text(label=i18n("待推理的参考音频所在目录"), value="", interactive=False)
text_model_inference_voice_dir = gr.Text(label=i18n("待推理的参考音频所在目录"), value="", interactive=True)
text_url = gr.Text(label=i18n("请输入推理服务请求地址与参数"), value="")
with gr.Row():
text_text = gr.Text(label=i18n("请输入文本参数名"), value="text")
text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"), value="text")
text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"), value="text")
text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value="text")
text_whole_url = gr.Text(label=i18n("完整地址"), value="5555555555555555", interactive=False)
text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"), value="")
text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"), value="")
text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value="emotion")
text_whole_url = gr.Text(label=i18n("完整地址"), value="", interactive=False)
text_url.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
[text_whole_url])
text_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
@ -253,7 +298,8 @@ with gr.Blocks() as app:
text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
[text_whole_url])
gr.Markdown(value=i18n("2.2配置待推理文本一句一行不要太多10条即可"))
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value="text")
default_test_content_path = 'Ref_Audio_Selector/tool/test_content/test_content.txt'
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
gr.Markdown(value=i18n("2.3:启动推理服务,如果还没启动的话"))
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
with gr.Row():
@ -311,7 +357,7 @@ with gr.Blocks() as app:
[text_work_space_dir, text_base_audio_path,
text_compare_audio_dir], [text_similarity_audio_output_info])
with gr.Row():
text_sync_ref_audio_dir = gr.Text(label=i18n("参考音频路径"), value="", interactive=False)
text_sync_ref_audio_dir = gr.Text(label=i18n("参考音频路径"), value="", interactive=True)
text_sync_inference_audio_dir = gr.Text(label=i18n("被同步的推理音频路径"), value="", interactive=False)
with gr.Row():
button_sync_ref_audio = gr.Button(i18n("将参考音频的删除情况,同步到推理音频目录"), variant="primary")
@ -320,10 +366,12 @@ with gr.Blocks() as app:
text_sync_inference_audio_dir], [text_sync_ref_info])
with gr.Accordion("第四步:生成参考音频配置文本", open=False):
gr.Markdown(value=i18n("4.1:编辑模板"))
text_template_path = gr.Text(label=i18n("模板文件路径"), value="", interactive=False)
text_template = gr.Text(label=i18n("模板内容"), value="text", lines=10)
default_template_path = 'Ref_Audio_Selector/tool/config_template/ref_audio_template.txt'
default_template_content = common.read_file(default_template_path)
text_template_path = gr.Text(label=i18n("模板文件路径"), value=default_template_path, interactive=False)
text_template = gr.Text(label=i18n("模板内容"), value=default_template_content, lines=10)
gr.Markdown(value=i18n("4.2:生成配置"))
text_sync_ref_audio_dir2 = gr.Text(label=i18n("参考音频路径"), value="", interactive=False)
text_sync_ref_audio_dir2 = gr.Text(label=i18n("参考音频路径"), value="", interactive=True)
with gr.Row():
button_create_config = gr.Button(i18n("生成配置"), variant="primary")
text_create_config_info = gr.Text(label=i18n("生成结果"), value="", interactive=False)

View File

@ -1,7 +1,9 @@
import os
import platform
from tools import my_utils
def generate_audio_config(template_str, audio_list, output_file_path):
def generate_audio_config(work_space_dir, template_str, audio_list, output_file_path):
# 定义一个空字符串来存储最终要写入文件的内容
file_content = ""
@ -11,8 +13,12 @@ def generate_audio_config(template_str, audio_list, output_file_path):
ref_path = audio_info['ref_path']
ref_text = audio_info['ref_text']
relative_path = os.path.relpath(ref_path, work_space_dir)
if platform.system() == 'Windows':
relative_path = relative_path.replace('\\', '/')
# 使用字符串模板替换变量
formatted_line = template_str.replace('${emotion}', emotion).replace('${ref_path}', ref_path).replace(
formatted_line = template_str.replace('${emotion}', emotion).replace('${ref_path}', relative_path).replace(
'${ref_text}', ref_text)
# 将格式化后的行添加到内容中,使用逗号和换行符分隔

View File

@ -1,11 +1,12 @@
import os
import requests
import urllib.parse
from pathlib import Path
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote
class URLComposer:
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
self.base_url = base_url
self.base_url = safe_encode_query_params(base_url)
self.emotion_param_name = emotion_param_name
self.text_param_name = text_param_name
self.ref_path_param_name = ref_path_param_name
@ -28,8 +29,8 @@ class URLComposer:
if not self.emotion_param_name:
raise ValueError("Emotion parameter name is not set.")
params = {
self.text_param_name: urllib.parse.quote(text_value),
self.emotion_param_name: urllib.parse.quote(emotion_value),
self.text_param_name: quote(text_value),
self.emotion_param_name: quote(emotion_value),
}
return self._append_params_to_url(params)
@ -37,9 +38,9 @@ class URLComposer:
if self.emotion_param_name:
raise ValueError("Cannot use reference parameters when emotion parameter is set.")
params = {
self.text_param_name: urllib.parse.quote(text_value),
self.ref_path_param_name: urllib.parse.quote(ref_path_value),
self.ref_text_param_name: urllib.parse.quote(ref_text_value),
self.text_param_name: quote(text_value),
self.ref_path_param_name: quote(ref_path_value),
self.ref_text_param_name: quote(ref_text_value),
}
return self._append_params_to_url(params)
@ -51,16 +52,36 @@ class URLComposer:
return url_with_params
def safe_encode_query_params(original_url):
# 分析URL以获取查询字符串部分
parsed_url = urlparse(original_url)
query_params = parse_qs(parsed_url.query)
# 将查询参数转换为编码过的字典(键值对会被转码)
encoded_params = {k: quote(v[0]) for k, v in query_params.items()}
# 重新编码查询字符串
new_query_string = urlencode(encoded_params, doseq=False)
# 重建完整的URL
new_parsed_url = parsed_url._replace(query=new_query_string)
encoded_url = urlunparse(new_parsed_url)
print(encoded_url)
return encoded_url
def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path):
# Ensure the output directory exists
output_dir = Path(output_dir_path)
output_dir.mkdir(parents=True, exist_ok=True)
output_dir = os.path.abspath(output_dir_path)
os.makedirs(output_dir, exist_ok=True)
# Create subdirectories for text and emotion categories
text_subdir = os.path.join(output_dir, 'text')
text_subdir.mkdir(exist_ok=True)
os.makedirs(text_subdir, exist_ok=True)
emotion_subdir = os.path.join(output_dir, 'emotion')
emotion_subdir.mkdir(exist_ok=True)
os.makedirs(emotion_subdir, exist_ok=True)
for text, emotion in zip(text_list, emotion_list):
# Generate audio byte stream using the create_audio function
@ -74,18 +95,18 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path)
emotion_name = emotion['emotion']
# Save audio files in both directories with the desired structure
text_file_path = os.path.join(text_subdir, text, emotion_name, '.wav')
emotion_file_path = os.path.join(emotion_subdir, emotion_name, text, '.wav')
text_subdir_text = os.path.join(text_subdir, text)
os.makedirs(text_subdir_text, exist_ok=True)
text_subdir_text_file_path = os.path.join(text_subdir_text, emotion_name + '.wav')
# Ensure intermediate directories for nested file paths exist
text_file_path.parent.mkdir(parents=True, exist_ok=True)
emotion_file_path.parent.mkdir(parents=True, exist_ok=True)
emotion_subdir_emotion = os.path.join(emotion_subdir, emotion_name)
os.makedirs(emotion_subdir_emotion, exist_ok=True)
emotion_subdir_emotion_file_path = os.path.join(emotion_subdir_emotion, text + '.wav')
# Write audio bytes to the respective files
with open(text_file_path, 'wb') as f:
with open(text_subdir_text_file_path, 'wb') as f:
f.write(audio_bytes)
with open(emotion_file_path, 'wb') as f:
with open(emotion_subdir_emotion_file_path, 'wb') as f:
f.write(audio_bytes)

View File

@ -1,7 +1,7 @@
import os
import shutil
from config import python_exec
from subprocess import Popen
import random
def convert_from_list(list_file, output_dir):
@ -70,7 +70,7 @@ def sample(output_audio_dir, similarity_list, subsection_num, sample_num):
sampled_subsection = similarity_list[start:start + num]
# 创建并进入子目录
subdir_name = f'subsection_{i + 1}'
subdir_name = f'emotion_{i + 1}'
subdir_path = os.path.join(output_audio_dir, subdir_name)
os.makedirs(subdir_path, exist_ok=True)
@ -83,37 +83,6 @@ def sample(output_audio_dir, similarity_list, subsection_num, sample_num):
print("Sampling completed.")
def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_similarity_output):
similarity_list = None
similarity_dir = os.path.join(work_space_dir, 'similarity')
os.makedirs(similarity_dir, exist_ok=True)
base_voice_file_name = ref_audio_opt.get_filename_without_extension(base_voice_path)
similarity_file = os.path.join(similarity_dir, f'{base_voice_file_name}.txt')
global p_similarity
if (p_similarity == None):
cmd = f'"{python_exec}" tools/speaker_verification/voice_similarity.py '
cmd += f' -r "{base_voice_path}"'
cmd += f' -c "{sample_dir}"'
cmd += f' -o {similarity_file}'
print(cmd)
p_similarity = Popen(cmd, shell=True)
p_similarity.wait()
if need_similarity_output:
similarity_list = ref_audio_opt.parse_similarity_file(similarity_file)
similarity_file_dir = os.path.dirname(similarity_dir, base_voice_file_name)
ref_audio_opt.copy_and_move(similarity_file_dir, similarity_list)
p_similarity = None
return similarity_list, similarity_file, similarity_file_dir
else:
return similarity_list, None, None
def parse_similarity_file(file_path):
"""
解析指定文本文件将其中的内容以元组形式存入列表
@ -126,7 +95,7 @@ def parse_similarity_file(file_path):
"""
result_list = []
with open(file_path, 'r') as file:
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
# 去除行尾换行符并按'|'分割
score, filepath = line.strip().split('|')
@ -163,16 +132,6 @@ def copy_and_move(output_audio_directory, similarity_scores):
print("已完成复制和重命名操作。")
def get_filename_without_extension(file_path):
"""
Given a file path string, returns the file name without its extension.
Parameters:
file_path (str): The full path to the file.
Returns:
str: The file name without its extension.
"""
base_name = os.path.basename(file_path) # Get the base name (file name with extension)
file_name, file_extension = os.path.splitext(base_name) # Split the base name into file name and extension
return file_name # Return the file name without extension
if __name__ == '__main__':
similarity_list = parse_similarity_file("D:/tt/similarity/啊,除了伊甸和樱,竟然还有其他人会提起我?.txt")
sample('D:/tt/similarity/output', similarity_list, 10, 4)

View File

@ -0,0 +1,5 @@
"${emotion}": {
"ref_wav_path": "${ref_path}",
"prompt_text": "${ref_text}",
"prompt_language": "中文"
}

View File

@ -4,7 +4,7 @@ import os
from modelscope.pipelines import pipeline
sv_pipeline = pipeline(
task='speaker-verification',
model='/tools/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
model='Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
model_revision='v1.0.0'
)
@ -21,6 +21,7 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
'score': score,
'path': audio_path
})
print(f'similarity score: {score}, path: {audio_path}')
# Step 3: 根据相似度分数降序排列
similarity_scores.sort(key=lambda x: x['score'], reverse=True)
@ -57,8 +58,9 @@ def parse_arguments():
if __name__ == '__main__':
cmd = parse_arguments()
print(cmd)
compare_audio_and_generate_report(
reference_audio_path = cmd.reference_audio,
comparison_dir = cmd.comparison_dir,
output_file = cmd.output_file,
comparison_dir_path = cmd.comparison_dir,
output_file_path = cmd.output_file,
)

View File

@ -0,0 +1,12 @@
你知道这不可能!
如果有人故意破坏呢?
也不可能!同时改变三颗卫星和一个地面观测站的数据?那这破坏也有些超自然了。
汪淼点点头,比起宇宙闪烁来,他宁愿接受这个超自然。但沙瑞山立刻抽走了他怀中这唯一的一根救命稻草。
要想最终证实这一切,其实很简单。宇宙背景辐射这样幅度的波动,已经大到我们能用肉眼觉察的程度。
你胡说什么现在是你在违反常识了背景辐射的波长是7厘米比可见光大了七八个数量级怎么能看到
用特制眼镜。
特制眼镜?
是我们为首都天文馆做的一个科普小玩意儿。现在的技术,已经能将彭齐阿斯和威尔逊在四十多年前用于发现特制背景辐射的二十英尺的喇叭形天线做成眼镜大小,
并且在这个眼镜中设置一个转换系统将接收到的背景辐射的波长压缩七个数量级将7厘米波转换成红光。
这样,观众在夜里戴上这种眼镜,就能亲眼看到宇宙的特制背景辐射,现在,也能看到宇宙闪烁。
这东西现在哪儿?能告诉我吗