mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-05-21 22:02:32 +08:00
配置文件生成、音频抽样、音频推理测试
This commit is contained in:
parent
4cbbe2a258
commit
2c8f6bd4c9
@ -19,6 +19,8 @@ class RefAudioListManager:
|
|||||||
if file.endswith('.wav'):
|
if file.endswith('.wav'):
|
||||||
# 将相对路径转换为绝对路径
|
# 将相对路径转换为绝对路径
|
||||||
audio_abs_path = os.path.join(subdir, file)
|
audio_abs_path = os.path.join(subdir, file)
|
||||||
|
if category not in self.audio_dict:
|
||||||
|
self.audio_dict[category] = []
|
||||||
self.audio_dict[category].append(audio_abs_path)
|
self.audio_dict[category].append(audio_abs_path)
|
||||||
|
|
||||||
def get_audio_list(self):
|
def get_audio_list(self):
|
||||||
@ -64,4 +66,30 @@ def read_text_file_to_list(file_path):
|
|||||||
with open(file_path, mode='r', encoding='utf-8') as file:
|
with open(file_path, mode='r', encoding='utf-8') as file:
|
||||||
# 读取所有行并存储到一个列表中
|
# 读取所有行并存储到一个列表中
|
||||||
lines = file.read().splitlines()
|
lines = file.read().splitlines()
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def get_filename_without_extension(file_path):
|
||||||
|
"""
|
||||||
|
Given a file path string, returns the file name without its extension.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
file_path (str): The full path to the file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The file name without its extension.
|
||||||
|
"""
|
||||||
|
base_name = os.path.basename(file_path) # Get the base name (file name with extension)
|
||||||
|
file_name, file_extension = os.path.splitext(base_name) # Split the base name into file name and extension
|
||||||
|
return file_name # Return the file name without extension
|
||||||
|
|
||||||
|
|
||||||
|
def read_file(file_path):
|
||||||
|
# 使用with语句打开并读取文件
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file: # 'r' 表示以读取模式打开文件
|
||||||
|
# 一次性读取文件所有内容
|
||||||
|
file_content = file.read()
|
||||||
|
|
||||||
|
# 文件在with语句结束时会自动关闭
|
||||||
|
# 现在file_content变量中存储了文件的所有文本内容
|
||||||
|
return file_content
|
@ -1,4 +1,5 @@
|
|||||||
import os.path
|
import os.path
|
||||||
|
import traceback
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import Ref_Audio_Selector.tool.audio_similarity as audio_similarity
|
import Ref_Audio_Selector.tool.audio_similarity as audio_similarity
|
||||||
@ -7,9 +8,13 @@ import Ref_Audio_Selector.tool.audio_asr as audio_asr
|
|||||||
import Ref_Audio_Selector.tool.audio_config as audio_config
|
import Ref_Audio_Selector.tool.audio_config as audio_config
|
||||||
import Ref_Audio_Selector.common.common as common
|
import Ref_Audio_Selector.common.common as common
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
|
from config import python_exec
|
||||||
|
from subprocess import Popen
|
||||||
|
|
||||||
i18n = I18nAuto()
|
i18n = I18nAuto()
|
||||||
|
|
||||||
|
p_similarity = None
|
||||||
|
|
||||||
|
|
||||||
# 校验基础信息
|
# 校验基础信息
|
||||||
def check_base_info(text_work_space_dir):
|
def check_base_info(text_work_space_dir):
|
||||||
@ -28,11 +33,44 @@ def convert_from_list(text_work_space_dir, text_list_input):
|
|||||||
raise Exception(i18n("list文件路径不能为空"))
|
raise Exception(i18n("list文件路径不能为空"))
|
||||||
audio_similarity.convert_from_list(text_list_input, ref_audio_all)
|
audio_similarity.convert_from_list(text_list_input, ref_audio_all)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_convert_from_list_info = f"发生异常:{e}"
|
text_convert_from_list_info = f"发生异常:{e}"
|
||||||
text_sample_dir = ''
|
text_sample_dir = ''
|
||||||
return [text_convert_from_list_info, text_sample_dir]
|
return [text_convert_from_list_info, text_sample_dir]
|
||||||
|
|
||||||
|
|
||||||
|
def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_similarity_output):
|
||||||
|
similarity_list = None
|
||||||
|
similarity_file_dir = None
|
||||||
|
|
||||||
|
similarity_dir = os.path.join(work_space_dir, 'similarity')
|
||||||
|
os.makedirs(similarity_dir, exist_ok=True)
|
||||||
|
|
||||||
|
base_voice_file_name = common.get_filename_without_extension(base_voice_path)
|
||||||
|
similarity_file = os.path.join(similarity_dir, f'{base_voice_file_name}.txt')
|
||||||
|
|
||||||
|
global p_similarity
|
||||||
|
if p_similarity is None:
|
||||||
|
cmd = f'"{python_exec}" Ref_Audio_Selector/tool/speaker_verification/voice_similarity.py '
|
||||||
|
cmd += f' -r "{base_voice_path}"'
|
||||||
|
cmd += f' -c "{sample_dir}"'
|
||||||
|
cmd += f' -o {similarity_file}'
|
||||||
|
|
||||||
|
print(cmd)
|
||||||
|
p_similarity = Popen(cmd, shell=True)
|
||||||
|
p_similarity.wait()
|
||||||
|
|
||||||
|
if need_similarity_output:
|
||||||
|
similarity_list = audio_similarity.parse_similarity_file(similarity_file)
|
||||||
|
similarity_file_dir = os.path.join(similarity_dir, base_voice_file_name)
|
||||||
|
audio_similarity.copy_and_move(similarity_file_dir, similarity_list)
|
||||||
|
|
||||||
|
p_similarity = None
|
||||||
|
return similarity_list, similarity_file, similarity_file_dir
|
||||||
|
else:
|
||||||
|
return similarity_list, None, None
|
||||||
|
|
||||||
|
|
||||||
# 基于一个基准音频,从参考音频目录中进行分段抽样
|
# 基于一个基准音频,从参考音频目录中进行分段抽样
|
||||||
def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
|
def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
|
||||||
text_subsection_num, text_sample_num, checkbox_similarity_output):
|
text_subsection_num, text_sample_num, checkbox_similarity_output):
|
||||||
@ -49,15 +87,16 @@ def sample(text_work_space_dir, text_sample_dir, text_base_voice_path,
|
|||||||
if text_sample_num is None or text_sample_num == '':
|
if text_sample_num is None or text_sample_num == '':
|
||||||
raise Exception(i18n("每段随机抽样个数不能为空"))
|
raise Exception(i18n("每段随机抽样个数不能为空"))
|
||||||
|
|
||||||
similarity_list = audio_similarity.start_similarity_analysis(text_work_space_dir, text_sample_dir,
|
similarity_list, _, _ = start_similarity_analysis(text_work_space_dir, text_sample_dir,
|
||||||
text_base_voice_path, checkbox_similarity_output)
|
text_base_voice_path, checkbox_similarity_output)
|
||||||
|
|
||||||
if similarity_list is None:
|
if similarity_list is None:
|
||||||
raise Exception(i18n("相似度分析失败"))
|
raise Exception(i18n("相似度分析失败"))
|
||||||
|
|
||||||
audio_similarity.sample(ref_audio_dir, similarity_list, text_subsection_num, text_sample_num)
|
audio_similarity.sample(ref_audio_dir, similarity_list, int(text_subsection_num), int(text_sample_num))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_sample_info = f"发生异常:{e}"
|
text_sample_info = f"发生异常:{e}"
|
||||||
ref_audio_dir = ''
|
ref_audio_dir = ''
|
||||||
text_model_inference_voice_dir = ref_audio_dir
|
text_model_inference_voice_dir = ref_audio_dir
|
||||||
@ -98,6 +137,7 @@ def model_inference(text_work_space_dir, text_model_inference_voice_dir, text_ur
|
|||||||
audio_inference.generate_audio_files(url_composer, text_list, ref_audio_manager.get_ref_audio_list(),
|
audio_inference.generate_audio_files(url_composer, text_list, ref_audio_manager.get_ref_audio_list(),
|
||||||
inference_dir)
|
inference_dir)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_model_inference_info = f"发生异常:{e}"
|
text_model_inference_info = f"发生异常:{e}"
|
||||||
text_asr_audio_dir = ''
|
text_asr_audio_dir = ''
|
||||||
return [text_model_inference_info, text_asr_audio_dir]
|
return [text_model_inference_info, text_asr_audio_dir]
|
||||||
@ -124,6 +164,7 @@ def asr(text_work_space_dir, text_asr_audio_dir, dropdown_asr_model,
|
|||||||
text_text_similarity_analysis_path = asr_file
|
text_text_similarity_analysis_path = asr_file
|
||||||
text_asr_info = f"asr成功:生成文件{asr_file}"
|
text_asr_info = f"asr成功:生成文件{asr_file}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_asr_info = f"发生异常:{e}"
|
text_asr_info = f"发生异常:{e}"
|
||||||
text_text_similarity_analysis_path = ''
|
text_text_similarity_analysis_path = ''
|
||||||
return [text_asr_info, text_text_similarity_analysis_path]
|
return [text_asr_info, text_text_similarity_analysis_path]
|
||||||
@ -140,6 +181,7 @@ def text_similarity_analysis(text_work_space_dir,
|
|||||||
raise Exception(i18n("asr生成的文件路径不能为空,请先完成上一步操作"))
|
raise Exception(i18n("asr生成的文件路径不能为空,请先完成上一步操作"))
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_text_similarity_analysis_info = f"发生异常:{e}"
|
text_text_similarity_analysis_info = f"发生异常:{e}"
|
||||||
return text_text_similarity_analysis_info
|
return text_text_similarity_analysis_info
|
||||||
|
|
||||||
@ -154,7 +196,7 @@ def similarity_audio_output(text_work_space_dir, text_base_audio_path,
|
|||||||
raise Exception(i18n("基准音频路径不能为空"))
|
raise Exception(i18n("基准音频路径不能为空"))
|
||||||
if text_compare_audio_dir is None or text_compare_audio_dir == '':
|
if text_compare_audio_dir is None or text_compare_audio_dir == '':
|
||||||
raise Exception(i18n("待分析的音频所在目录不能为空"))
|
raise Exception(i18n("待分析的音频所在目录不能为空"))
|
||||||
similarity_list, similarity_file, similarity_file_dir = audio_similarity.start_similarity_analysis(
|
similarity_list, similarity_file, similarity_file_dir = start_similarity_analysis(
|
||||||
text_work_space_dir, text_compare_audio_dir, text_base_audio_path, True)
|
text_work_space_dir, text_compare_audio_dir, text_base_audio_path, True)
|
||||||
|
|
||||||
if similarity_list is None:
|
if similarity_list is None:
|
||||||
@ -163,6 +205,7 @@ def similarity_audio_output(text_work_space_dir, text_base_audio_path,
|
|||||||
text_similarity_audio_output_info = f'相似度分析成功:生成目录{similarity_file_dir},文件{similarity_file}'
|
text_similarity_audio_output_info = f'相似度分析成功:生成目录{similarity_file_dir},文件{similarity_file}'
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_similarity_audio_output_info = f"发生异常:{e}"
|
text_similarity_audio_output_info = f"发生异常:{e}"
|
||||||
return text_similarity_audio_output_info
|
return text_similarity_audio_output_info
|
||||||
|
|
||||||
@ -179,6 +222,7 @@ def sync_ref_audio(text_work_space_dir, text_sync_ref_audio_dir,
|
|||||||
raise Exception(i18n("推理生成的音频目录不能为空"))
|
raise Exception(i18n("推理生成的音频目录不能为空"))
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_sync_ref_audio_info = f"发生异常:{e}"
|
text_sync_ref_audio_info = f"发生异常:{e}"
|
||||||
return text_sync_ref_audio_info
|
return text_sync_ref_audio_info
|
||||||
|
|
||||||
@ -194,8 +238,9 @@ def create_config(text_work_space_dir, text_template, text_sync_ref_audio_dir2):
|
|||||||
if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '':
|
if text_sync_ref_audio_dir2 is None or text_sync_ref_audio_dir2 == '':
|
||||||
raise Exception(i18n("参考音频目录不能为空"))
|
raise Exception(i18n("参考音频目录不能为空"))
|
||||||
ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2)
|
ref_audio_manager = common.RefAudioListManager(text_sync_ref_audio_dir2)
|
||||||
audio_config.generate_audio_config(text_template, ref_audio_manager.get_ref_audio_list(), config_file)
|
audio_config.generate_audio_config(text_work_space_dir, text_template, ref_audio_manager.get_ref_audio_list(), config_file)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
text_create_config_info = f"发生异常:{e}"
|
text_create_config_info = f"发生异常:{e}"
|
||||||
return text_create_config_info
|
return text_create_config_info
|
||||||
|
|
||||||
@ -234,14 +279,14 @@ with gr.Blocks() as app:
|
|||||||
with gr.Accordion(label=i18n("第二步:基于参考音频和测试文本,执行批量推理"), open=False):
|
with gr.Accordion(label=i18n("第二步:基于参考音频和测试文本,执行批量推理"), open=False):
|
||||||
gr.Markdown(value=i18n("2.1:配置推理服务参数信息,参考音频路径/文本和角色情绪二选一,如果是角色情绪,需要先执行第四步,"
|
gr.Markdown(value=i18n("2.1:配置推理服务参数信息,参考音频路径/文本和角色情绪二选一,如果是角色情绪,需要先执行第四步,"
|
||||||
"将参考音频打包配置到推理服务下,在推理前,请确认完整请求地址是否与正常使用时的一致,包括角色名称,尤其是文本分隔符是否正确"))
|
"将参考音频打包配置到推理服务下,在推理前,请确认完整请求地址是否与正常使用时的一致,包括角色名称,尤其是文本分隔符是否正确"))
|
||||||
text_model_inference_voice_dir = gr.Text(label=i18n("待推理的参考音频所在目录"), value="", interactive=False)
|
text_model_inference_voice_dir = gr.Text(label=i18n("待推理的参考音频所在目录"), value="", interactive=True)
|
||||||
text_url = gr.Text(label=i18n("请输入推理服务请求地址与参数"), value="")
|
text_url = gr.Text(label=i18n("请输入推理服务请求地址与参数"), value="")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
text_text = gr.Text(label=i18n("请输入文本参数名"), value="text")
|
text_text = gr.Text(label=i18n("请输入文本参数名"), value="text")
|
||||||
text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"), value="text")
|
text_ref_path = gr.Text(label=i18n("请输入参考音频路径参数名"), value="")
|
||||||
text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"), value="text")
|
text_ref_text = gr.Text(label=i18n("请输入参考音频文本参数名"), value="")
|
||||||
text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value="text")
|
text_emotion = gr.Text(label=i18n("请输入角色情绪参数名"), value="emotion")
|
||||||
text_whole_url = gr.Text(label=i18n("完整地址"), value="5555555555555555", interactive=False)
|
text_whole_url = gr.Text(label=i18n("完整地址"), value="", interactive=False)
|
||||||
text_url.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
text_url.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
||||||
[text_whole_url])
|
[text_whole_url])
|
||||||
text_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
text_text.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
||||||
@ -253,7 +298,8 @@ with gr.Blocks() as app:
|
|||||||
text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
text_emotion.input(whole_url, [text_url, text_text, text_ref_path, text_ref_text, text_emotion],
|
||||||
[text_whole_url])
|
[text_whole_url])
|
||||||
gr.Markdown(value=i18n("2.2:配置待推理文本,一句一行,不要太多,10条即可"))
|
gr.Markdown(value=i18n("2.2:配置待推理文本,一句一行,不要太多,10条即可"))
|
||||||
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value="text")
|
default_test_content_path = 'Ref_Audio_Selector/tool/test_content/test_content.txt'
|
||||||
|
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
|
||||||
gr.Markdown(value=i18n("2.3:启动推理服务,如果还没启动的话"))
|
gr.Markdown(value=i18n("2.3:启动推理服务,如果还没启动的话"))
|
||||||
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
|
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
@ -311,7 +357,7 @@ with gr.Blocks() as app:
|
|||||||
[text_work_space_dir, text_base_audio_path,
|
[text_work_space_dir, text_base_audio_path,
|
||||||
text_compare_audio_dir], [text_similarity_audio_output_info])
|
text_compare_audio_dir], [text_similarity_audio_output_info])
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
text_sync_ref_audio_dir = gr.Text(label=i18n("参考音频路径"), value="", interactive=False)
|
text_sync_ref_audio_dir = gr.Text(label=i18n("参考音频路径"), value="", interactive=True)
|
||||||
text_sync_inference_audio_dir = gr.Text(label=i18n("被同步的推理音频路径"), value="", interactive=False)
|
text_sync_inference_audio_dir = gr.Text(label=i18n("被同步的推理音频路径"), value="", interactive=False)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
button_sync_ref_audio = gr.Button(i18n("将参考音频的删除情况,同步到推理音频目录"), variant="primary")
|
button_sync_ref_audio = gr.Button(i18n("将参考音频的删除情况,同步到推理音频目录"), variant="primary")
|
||||||
@ -320,10 +366,12 @@ with gr.Blocks() as app:
|
|||||||
text_sync_inference_audio_dir], [text_sync_ref_info])
|
text_sync_inference_audio_dir], [text_sync_ref_info])
|
||||||
with gr.Accordion("第四步:生成参考音频配置文本", open=False):
|
with gr.Accordion("第四步:生成参考音频配置文本", open=False):
|
||||||
gr.Markdown(value=i18n("4.1:编辑模板"))
|
gr.Markdown(value=i18n("4.1:编辑模板"))
|
||||||
text_template_path = gr.Text(label=i18n("模板文件路径"), value="", interactive=False)
|
default_template_path = 'Ref_Audio_Selector/tool/config_template/ref_audio_template.txt'
|
||||||
text_template = gr.Text(label=i18n("模板内容"), value="text", lines=10)
|
default_template_content = common.read_file(default_template_path)
|
||||||
|
text_template_path = gr.Text(label=i18n("模板文件路径"), value=default_template_path, interactive=False)
|
||||||
|
text_template = gr.Text(label=i18n("模板内容"), value=default_template_content, lines=10)
|
||||||
gr.Markdown(value=i18n("4.2:生成配置"))
|
gr.Markdown(value=i18n("4.2:生成配置"))
|
||||||
text_sync_ref_audio_dir2 = gr.Text(label=i18n("参考音频路径"), value="", interactive=False)
|
text_sync_ref_audio_dir2 = gr.Text(label=i18n("参考音频路径"), value="", interactive=True)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
button_create_config = gr.Button(i18n("生成配置"), variant="primary")
|
button_create_config = gr.Button(i18n("生成配置"), variant="primary")
|
||||||
text_create_config_info = gr.Text(label=i18n("生成结果"), value="", interactive=False)
|
text_create_config_info = gr.Text(label=i18n("生成结果"), value="", interactive=False)
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
|
from tools import my_utils
|
||||||
|
|
||||||
|
|
||||||
def generate_audio_config(template_str, audio_list, output_file_path):
|
def generate_audio_config(work_space_dir, template_str, audio_list, output_file_path):
|
||||||
# 定义一个空字符串来存储最终要写入文件的内容
|
# 定义一个空字符串来存储最终要写入文件的内容
|
||||||
file_content = ""
|
file_content = ""
|
||||||
|
|
||||||
@ -11,8 +13,12 @@ def generate_audio_config(template_str, audio_list, output_file_path):
|
|||||||
ref_path = audio_info['ref_path']
|
ref_path = audio_info['ref_path']
|
||||||
ref_text = audio_info['ref_text']
|
ref_text = audio_info['ref_text']
|
||||||
|
|
||||||
|
relative_path = os.path.relpath(ref_path, work_space_dir)
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
relative_path = relative_path.replace('\\', '/')
|
||||||
|
|
||||||
# 使用字符串模板替换变量
|
# 使用字符串模板替换变量
|
||||||
formatted_line = template_str.replace('${emotion}', emotion).replace('${ref_path}', ref_path).replace(
|
formatted_line = template_str.replace('${emotion}', emotion).replace('${ref_path}', relative_path).replace(
|
||||||
'${ref_text}', ref_text)
|
'${ref_text}', ref_text)
|
||||||
|
|
||||||
# 将格式化后的行添加到内容中,使用逗号和换行符分隔
|
# 将格式化后的行添加到内容中,使用逗号和换行符分隔
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import urllib.parse
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, quote
|
||||||
|
|
||||||
|
|
||||||
class URLComposer:
|
class URLComposer:
|
||||||
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
|
def __init__(self, base_url, emotion_param_name, text_param_name, ref_path_param_name, ref_text_param_name):
|
||||||
self.base_url = base_url
|
self.base_url = safe_encode_query_params(base_url)
|
||||||
self.emotion_param_name = emotion_param_name
|
self.emotion_param_name = emotion_param_name
|
||||||
self.text_param_name = text_param_name
|
self.text_param_name = text_param_name
|
||||||
self.ref_path_param_name = ref_path_param_name
|
self.ref_path_param_name = ref_path_param_name
|
||||||
@ -28,8 +29,8 @@ class URLComposer:
|
|||||||
if not self.emotion_param_name:
|
if not self.emotion_param_name:
|
||||||
raise ValueError("Emotion parameter name is not set.")
|
raise ValueError("Emotion parameter name is not set.")
|
||||||
params = {
|
params = {
|
||||||
self.text_param_name: urllib.parse.quote(text_value),
|
self.text_param_name: quote(text_value),
|
||||||
self.emotion_param_name: urllib.parse.quote(emotion_value),
|
self.emotion_param_name: quote(emotion_value),
|
||||||
}
|
}
|
||||||
return self._append_params_to_url(params)
|
return self._append_params_to_url(params)
|
||||||
|
|
||||||
@ -37,9 +38,9 @@ class URLComposer:
|
|||||||
if self.emotion_param_name:
|
if self.emotion_param_name:
|
||||||
raise ValueError("Cannot use reference parameters when emotion parameter is set.")
|
raise ValueError("Cannot use reference parameters when emotion parameter is set.")
|
||||||
params = {
|
params = {
|
||||||
self.text_param_name: urllib.parse.quote(text_value),
|
self.text_param_name: quote(text_value),
|
||||||
self.ref_path_param_name: urllib.parse.quote(ref_path_value),
|
self.ref_path_param_name: quote(ref_path_value),
|
||||||
self.ref_text_param_name: urllib.parse.quote(ref_text_value),
|
self.ref_text_param_name: quote(ref_text_value),
|
||||||
}
|
}
|
||||||
return self._append_params_to_url(params)
|
return self._append_params_to_url(params)
|
||||||
|
|
||||||
@ -51,16 +52,36 @@ class URLComposer:
|
|||||||
return url_with_params
|
return url_with_params
|
||||||
|
|
||||||
|
|
||||||
|
def safe_encode_query_params(original_url):
|
||||||
|
|
||||||
|
# 分析URL以获取查询字符串部分
|
||||||
|
parsed_url = urlparse(original_url)
|
||||||
|
query_params = parse_qs(parsed_url.query)
|
||||||
|
|
||||||
|
# 将查询参数转换为编码过的字典(键值对会被转码)
|
||||||
|
encoded_params = {k: quote(v[0]) for k, v in query_params.items()}
|
||||||
|
|
||||||
|
# 重新编码查询字符串
|
||||||
|
new_query_string = urlencode(encoded_params, doseq=False)
|
||||||
|
|
||||||
|
# 重建完整的URL
|
||||||
|
new_parsed_url = parsed_url._replace(query=new_query_string)
|
||||||
|
encoded_url = urlunparse(new_parsed_url)
|
||||||
|
|
||||||
|
print(encoded_url)
|
||||||
|
return encoded_url
|
||||||
|
|
||||||
|
|
||||||
def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path):
|
def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path):
|
||||||
# Ensure the output directory exists
|
# Ensure the output directory exists
|
||||||
output_dir = Path(output_dir_path)
|
output_dir = os.path.abspath(output_dir_path)
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
# Create subdirectories for text and emotion categories
|
# Create subdirectories for text and emotion categories
|
||||||
text_subdir = os.path.join(output_dir, 'text')
|
text_subdir = os.path.join(output_dir, 'text')
|
||||||
text_subdir.mkdir(exist_ok=True)
|
os.makedirs(text_subdir, exist_ok=True)
|
||||||
emotion_subdir = os.path.join(output_dir, 'emotion')
|
emotion_subdir = os.path.join(output_dir, 'emotion')
|
||||||
emotion_subdir.mkdir(exist_ok=True)
|
os.makedirs(emotion_subdir, exist_ok=True)
|
||||||
|
|
||||||
for text, emotion in zip(text_list, emotion_list):
|
for text, emotion in zip(text_list, emotion_list):
|
||||||
# Generate audio byte stream using the create_audio function
|
# Generate audio byte stream using the create_audio function
|
||||||
@ -74,18 +95,18 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path)
|
|||||||
|
|
||||||
emotion_name = emotion['emotion']
|
emotion_name = emotion['emotion']
|
||||||
|
|
||||||
# Save audio files in both directories with the desired structure
|
text_subdir_text = os.path.join(text_subdir, text)
|
||||||
text_file_path = os.path.join(text_subdir, text, emotion_name, '.wav')
|
os.makedirs(text_subdir_text, exist_ok=True)
|
||||||
emotion_file_path = os.path.join(emotion_subdir, emotion_name, text, '.wav')
|
text_subdir_text_file_path = os.path.join(text_subdir_text, emotion_name + '.wav')
|
||||||
|
|
||||||
# Ensure intermediate directories for nested file paths exist
|
emotion_subdir_emotion = os.path.join(emotion_subdir, emotion_name)
|
||||||
text_file_path.parent.mkdir(parents=True, exist_ok=True)
|
os.makedirs(emotion_subdir_emotion, exist_ok=True)
|
||||||
emotion_file_path.parent.mkdir(parents=True, exist_ok=True)
|
emotion_subdir_emotion_file_path = os.path.join(emotion_subdir_emotion, text + '.wav')
|
||||||
|
|
||||||
# Write audio bytes to the respective files
|
# Write audio bytes to the respective files
|
||||||
with open(text_file_path, 'wb') as f:
|
with open(text_subdir_text_file_path, 'wb') as f:
|
||||||
f.write(audio_bytes)
|
f.write(audio_bytes)
|
||||||
with open(emotion_file_path, 'wb') as f:
|
with open(emotion_subdir_emotion_file_path, 'wb') as f:
|
||||||
f.write(audio_bytes)
|
f.write(audio_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from config import python_exec
|
import random
|
||||||
from subprocess import Popen
|
|
||||||
|
|
||||||
|
|
||||||
def convert_from_list(list_file, output_dir):
|
def convert_from_list(list_file, output_dir):
|
||||||
@ -70,7 +70,7 @@ def sample(output_audio_dir, similarity_list, subsection_num, sample_num):
|
|||||||
sampled_subsection = similarity_list[start:start + num]
|
sampled_subsection = similarity_list[start:start + num]
|
||||||
|
|
||||||
# 创建并进入子目录
|
# 创建并进入子目录
|
||||||
subdir_name = f'subsection_{i + 1}'
|
subdir_name = f'emotion_{i + 1}'
|
||||||
subdir_path = os.path.join(output_audio_dir, subdir_name)
|
subdir_path = os.path.join(output_audio_dir, subdir_name)
|
||||||
os.makedirs(subdir_path, exist_ok=True)
|
os.makedirs(subdir_path, exist_ok=True)
|
||||||
|
|
||||||
@ -83,37 +83,6 @@ def sample(output_audio_dir, similarity_list, subsection_num, sample_num):
|
|||||||
print("Sampling completed.")
|
print("Sampling completed.")
|
||||||
|
|
||||||
|
|
||||||
def start_similarity_analysis(work_space_dir, sample_dir, base_voice_path, need_similarity_output):
|
|
||||||
similarity_list = None
|
|
||||||
|
|
||||||
similarity_dir = os.path.join(work_space_dir, 'similarity')
|
|
||||||
os.makedirs(similarity_dir, exist_ok=True)
|
|
||||||
|
|
||||||
base_voice_file_name = ref_audio_opt.get_filename_without_extension(base_voice_path)
|
|
||||||
similarity_file = os.path.join(similarity_dir, f'{base_voice_file_name}.txt')
|
|
||||||
|
|
||||||
global p_similarity
|
|
||||||
if (p_similarity == None):
|
|
||||||
cmd = f'"{python_exec}" tools/speaker_verification/voice_similarity.py '
|
|
||||||
cmd += f' -r "{base_voice_path}"'
|
|
||||||
cmd += f' -c "{sample_dir}"'
|
|
||||||
cmd += f' -o {similarity_file}'
|
|
||||||
|
|
||||||
print(cmd)
|
|
||||||
p_similarity = Popen(cmd, shell=True)
|
|
||||||
p_similarity.wait()
|
|
||||||
|
|
||||||
if need_similarity_output:
|
|
||||||
similarity_list = ref_audio_opt.parse_similarity_file(similarity_file)
|
|
||||||
similarity_file_dir = os.path.dirname(similarity_dir, base_voice_file_name)
|
|
||||||
ref_audio_opt.copy_and_move(similarity_file_dir, similarity_list)
|
|
||||||
|
|
||||||
p_similarity = None
|
|
||||||
return similarity_list, similarity_file, similarity_file_dir
|
|
||||||
else:
|
|
||||||
return similarity_list, None, None
|
|
||||||
|
|
||||||
|
|
||||||
def parse_similarity_file(file_path):
|
def parse_similarity_file(file_path):
|
||||||
"""
|
"""
|
||||||
解析指定文本文件,将其中的内容以元组形式存入列表。
|
解析指定文本文件,将其中的内容以元组形式存入列表。
|
||||||
@ -126,7 +95,7 @@ def parse_similarity_file(file_path):
|
|||||||
"""
|
"""
|
||||||
result_list = []
|
result_list = []
|
||||||
|
|
||||||
with open(file_path, 'r') as file:
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
for line in file:
|
for line in file:
|
||||||
# 去除行尾换行符并按'|'分割
|
# 去除行尾换行符并按'|'分割
|
||||||
score, filepath = line.strip().split('|')
|
score, filepath = line.strip().split('|')
|
||||||
@ -163,16 +132,6 @@ def copy_and_move(output_audio_directory, similarity_scores):
|
|||||||
print("已完成复制和重命名操作。")
|
print("已完成复制和重命名操作。")
|
||||||
|
|
||||||
|
|
||||||
def get_filename_without_extension(file_path):
|
if __name__ == '__main__':
|
||||||
"""
|
similarity_list = parse_similarity_file("D:/tt/similarity/啊,除了伊甸和樱,竟然还有其他人会提起我?.txt")
|
||||||
Given a file path string, returns the file name without its extension.
|
sample('D:/tt/similarity/output', similarity_list, 10, 4)
|
||||||
|
|
||||||
Parameters:
|
|
||||||
file_path (str): The full path to the file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The file name without its extension.
|
|
||||||
"""
|
|
||||||
base_name = os.path.basename(file_path) # Get the base name (file name with extension)
|
|
||||||
file_name, file_extension = os.path.splitext(base_name) # Split the base name into file name and extension
|
|
||||||
return file_name # Return the file name without extension
|
|
@ -0,0 +1,5 @@
|
|||||||
|
"${emotion}": {
|
||||||
|
"ref_wav_path": "${ref_path}",
|
||||||
|
"prompt_text": "${ref_text}",
|
||||||
|
"prompt_language": "中文"
|
||||||
|
}
|
@ -4,7 +4,7 @@ import os
|
|||||||
from modelscope.pipelines import pipeline
|
from modelscope.pipelines import pipeline
|
||||||
sv_pipeline = pipeline(
|
sv_pipeline = pipeline(
|
||||||
task='speaker-verification',
|
task='speaker-verification',
|
||||||
model='/tools/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
|
model='Ref_Audio_Selector/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common',
|
||||||
model_revision='v1.0.0'
|
model_revision='v1.0.0'
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -21,6 +21,7 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
|
|||||||
'score': score,
|
'score': score,
|
||||||
'path': audio_path
|
'path': audio_path
|
||||||
})
|
})
|
||||||
|
print(f'similarity score: {score}, path: {audio_path}')
|
||||||
|
|
||||||
# Step 3: 根据相似度分数降序排列
|
# Step 3: 根据相似度分数降序排列
|
||||||
similarity_scores.sort(key=lambda x: x['score'], reverse=True)
|
similarity_scores.sort(key=lambda x: x['score'], reverse=True)
|
||||||
@ -57,8 +58,9 @@ def parse_arguments():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
cmd = parse_arguments()
|
cmd = parse_arguments()
|
||||||
|
print(cmd)
|
||||||
compare_audio_and_generate_report(
|
compare_audio_and_generate_report(
|
||||||
reference_audio_path = cmd.reference_audio,
|
reference_audio_path = cmd.reference_audio,
|
||||||
comparison_dir = cmd.comparison_dir,
|
comparison_dir_path = cmd.comparison_dir,
|
||||||
output_file = cmd.output_file,
|
output_file_path = cmd.output_file,
|
||||||
)
|
)
|
12
Ref_Audio_Selector/tool/test_content/test_content.txt
Normal file
12
Ref_Audio_Selector/tool/test_content/test_content.txt
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
你知道这不可能!
|
||||||
|
如果有人故意破坏呢?
|
||||||
|
也不可能!同时改变三颗卫星和一个地面观测站的数据?那这破坏也有些超自然了。
|
||||||
|
汪淼点点头,比起宇宙闪烁来,他宁愿接受这个超自然。但沙瑞山立刻抽走了他怀中这唯一的一根救命稻草。
|
||||||
|
要想最终证实这一切,其实很简单。宇宙背景辐射这样幅度的波动,已经大到我们能用肉眼觉察的程度。
|
||||||
|
你胡说什么?现在是你在违反常识了:背景辐射的波长是7厘米,比可见光大了七八个数量级,怎么能看到?
|
||||||
|
用特制眼镜。
|
||||||
|
特制眼镜?
|
||||||
|
是我们为首都天文馆做的一个科普小玩意儿。现在的技术,已经能将彭齐阿斯和威尔逊在四十多年前用于发现特制背景辐射的二十英尺的喇叭形天线做成眼镜大小,
|
||||||
|
并且在这个眼镜中设置一个转换系统,将接收到的背景辐射的波长压缩七个数量级,将7厘米波转换成红光。
|
||||||
|
这样,观众在夜里戴上这种眼镜,就能亲眼看到宇宙的特制背景辐射,现在,也能看到宇宙闪烁。
|
||||||
|
这东西现在哪儿?能告诉我吗
|
Loading…
x
Reference in New Issue
Block a user