mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-06 03:57:44 +08:00
添加非中文语言的asr操作
This commit is contained in:
parent
9264f7e38e
commit
6cb3c15448
@ -10,3 +10,4 @@
|
|||||||
并且在这个眼镜中设置一个转换系统,将接收到的背景辐射的波长压缩七个数量级,将7厘米波转换成红光。
|
并且在这个眼镜中设置一个转换系统,将接收到的背景辐射的波长压缩七个数量级,将7厘米波转换成红光。
|
||||||
这样,观众在夜里戴上这种眼镜,就能亲眼看到宇宙的特制背景辐射,现在,也能看到宇宙闪烁。
|
这样,观众在夜里戴上这种眼镜,就能亲眼看到宇宙的特制背景辐射,现在,也能看到宇宙闪烁。
|
||||||
这东西现在哪儿?能告诉我吗
|
这东西现在哪儿?能告诉我吗
|
||||||
|
希望各位猫猫给视频三连支持一下猫窝,十分感谢支持喵~
|
@ -515,6 +515,12 @@ def delete_ref_audio_below_boundary(ref_audio_path, text_text_similarity_result_
|
|||||||
text_delete_ref_audio_below_boundary_info = f"发生异常:{e}"
|
text_delete_ref_audio_below_boundary_info = f"发生异常:{e}"
|
||||||
return text_delete_ref_audio_below_boundary_info
|
return text_delete_ref_audio_below_boundary_info
|
||||||
|
|
||||||
|
def change_lang_choices(key): #根据选择的模型修改可选的语言
|
||||||
|
# return gr.Dropdown(choices=asr_dict[key]['lang'])
|
||||||
|
return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]}
|
||||||
|
def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸
|
||||||
|
# return gr.Dropdown(choices=asr_dict[key]['size'])
|
||||||
|
return {"__type__": "update", "choices": asr_dict[key]['size']}
|
||||||
|
|
||||||
|
|
||||||
def save_generate_audio_url(generate_audio_url):
|
def save_generate_audio_url(generate_audio_url):
|
||||||
@ -687,7 +693,10 @@ if __name__ == '__main__':
|
|||||||
text_emotion.blur(save_emotion_param, [text_emotion], [])
|
text_emotion.blur(save_emotion_param, [text_emotion], [])
|
||||||
gr.Markdown(value=i18n("2.3:配置待推理文本,一句一行,不要太多,10条即可"))
|
gr.Markdown(value=i18n("2.3:配置待推理文本,一句一行,不要太多,10条即可"))
|
||||||
default_test_content_path = params.default_test_text_path
|
default_test_content_path = params.default_test_text_path
|
||||||
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
|
with gr.Row():
|
||||||
|
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
|
||||||
|
button_open_test_content_file = gr.Button(i18n("打开待推理文本文件"), variant="primary")
|
||||||
|
button_open_test_content_file.click(open_file, [text_test_content], [])
|
||||||
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
|
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
|
||||||
slider_request_concurrency_num = gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3,
|
slider_request_concurrency_num = gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3,
|
||||||
interactive=True)
|
interactive=True)
|
||||||
@ -703,7 +712,7 @@ if __name__ == '__main__':
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
dropdown_asr_model = gr.Dropdown(
|
dropdown_asr_model = gr.Dropdown(
|
||||||
label=i18n("ASR 模型"),
|
label=i18n("ASR 模型"),
|
||||||
choices=[],
|
choices=list(asr_dict.keys()),
|
||||||
interactive=True,
|
interactive=True,
|
||||||
value="达摩 ASR (中文)"
|
value="达摩 ASR (中文)"
|
||||||
)
|
)
|
||||||
@ -719,6 +728,8 @@ if __name__ == '__main__':
|
|||||||
interactive=True,
|
interactive=True,
|
||||||
value="zh"
|
value="zh"
|
||||||
)
|
)
|
||||||
|
dropdown_asr_model.change(change_lang_choices, [dropdown_asr_model], [dropdown_asr_lang])
|
||||||
|
dropdown_asr_model.change(change_size_choices, [dropdown_asr_model], [dropdown_asr_size])
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
button_asr = gr.Button(i18n("启动asr"), variant="primary")
|
button_asr = gr.Button(i18n("启动asr"), variant="primary")
|
||||||
text_asr_info = gr.Text(label=i18n("asr结果"), value="", interactive=False)
|
text_asr_info = gr.Text(label=i18n("asr结果"), value="", interactive=False)
|
||||||
|
119
Ref_Audio_Selector/tool/asr/fasterwhisper_asr_multi_level_dir.py
Normal file
119
Ref_Audio_Selector/tool/asr/fasterwhisper_asr_multi_level_dir.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
import Ref_Audio_Selector.config_param.config_params as params
|
||||||
|
|
||||||
|
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
||||||
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from tools.asr.config import check_fw_local_models
|
||||||
|
from Ref_Audio_Selector.config_param.log_config import logger
|
||||||
|
|
||||||
|
language_code_list = [
|
||||||
|
"af", "am", "ar", "as", "az",
|
||||||
|
"ba", "be", "bg", "bn", "bo",
|
||||||
|
"br", "bs", "ca", "cs", "cy",
|
||||||
|
"da", "de", "el", "en", "es",
|
||||||
|
"et", "eu", "fa", "fi", "fo",
|
||||||
|
"fr", "gl", "gu", "ha", "haw",
|
||||||
|
"he", "hi", "hr", "ht", "hu",
|
||||||
|
"hy", "id", "is", "it", "ja",
|
||||||
|
"jw", "ka", "kk", "km", "kn",
|
||||||
|
"ko", "la", "lb", "ln", "lo",
|
||||||
|
"lt", "lv", "mg", "mi", "mk",
|
||||||
|
"ml", "mn", "mr", "ms", "mt",
|
||||||
|
"my", "ne", "nl", "nn", "no",
|
||||||
|
"oc", "pa", "pl", "ps", "pt",
|
||||||
|
"ro", "ru", "sa", "sd", "si",
|
||||||
|
"sk", "sl", "sn", "so", "sq",
|
||||||
|
"sr", "su", "sv", "sw", "ta",
|
||||||
|
"te", "tg", "th", "tk", "tl",
|
||||||
|
"tr", "tt", "uk", "ur", "uz",
|
||||||
|
"vi", "yi", "yo", "zh", "yue",
|
||||||
|
"auto"]
|
||||||
|
|
||||||
|
|
||||||
|
def execute_asr_multi_level_dir(input_folder, output_folder, model_size, language, precision):
|
||||||
|
if '-local' in model_size:
|
||||||
|
model_size = model_size[:-6]
|
||||||
|
model_path = f'tools/asr/models/faster-whisper-{model_size}'
|
||||||
|
else:
|
||||||
|
model_path = model_size
|
||||||
|
if language == 'auto':
|
||||||
|
language = None # 不设置语种由模型自动输出概率最高的语种
|
||||||
|
logger.info("loading faster whisper model:", model_size, model_path)
|
||||||
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
try:
|
||||||
|
model = WhisperModel(model_path, device=device, compute_type=precision)
|
||||||
|
except:
|
||||||
|
return logger.error(traceback.format_exc())
|
||||||
|
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# 递归遍历输入目录及所有子目录
|
||||||
|
for root, dirs, files in os.walk(input_folder):
|
||||||
|
for file_name in sorted(files):
|
||||||
|
# 只处理wav文件(假设是wav文件)
|
||||||
|
if file_name.endswith(".wav"):
|
||||||
|
try:
|
||||||
|
file_path = os.path.join(input_folder, file_name)
|
||||||
|
original_text = os.path.basename(root)
|
||||||
|
segments, info = model.transcribe(
|
||||||
|
audio=file_path,
|
||||||
|
beam_size=5,
|
||||||
|
vad_filter=True,
|
||||||
|
vad_parameters=dict(min_silence_duration_ms=700),
|
||||||
|
language=language)
|
||||||
|
text = ''
|
||||||
|
|
||||||
|
if info.language == "zh":
|
||||||
|
logger.info("检测为中文文本, 转 FunASR 处理")
|
||||||
|
if ("only_asr" not in globals()):
|
||||||
|
from Ref_Audio_Selector.tool.asr.funasr_asr_multi_level_dir import \
|
||||||
|
only_asr # #如果用英文就不需要导入下载模型
|
||||||
|
text = only_asr(file_path)
|
||||||
|
|
||||||
|
if text == '':
|
||||||
|
for segment in segments:
|
||||||
|
text += segment.text
|
||||||
|
output.append(f"{file_path}|{original_text}|{info.language.upper()}|{text}")
|
||||||
|
except:
|
||||||
|
return logger.error(traceback.format_exc())
|
||||||
|
|
||||||
|
output_folder = output_folder
|
||||||
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
|
output_file_path = os.path.abspath(f'{output_folder}/{params.asr_filename}.list')
|
||||||
|
|
||||||
|
with open(output_file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write("\n".join(output))
|
||||||
|
logger.info(f"ASR 任务完成->标注文件路径: {output_file_path}\n")
|
||||||
|
return output_file_path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-i", "--input_folder", type=str, required=True,
|
||||||
|
help="Path to the folder containing WAV files.")
|
||||||
|
parser.add_argument("-o", "--output_folder", type=str, required=True,
|
||||||
|
help="Output folder to store transcriptions.")
|
||||||
|
parser.add_argument("-s", "--model_size", type=str, default='large-v3',
|
||||||
|
choices=check_fw_local_models(),
|
||||||
|
help="Model Size of Faster Whisper")
|
||||||
|
parser.add_argument("-l", "--language", type=str, default='ja',
|
||||||
|
choices=language_code_list,
|
||||||
|
help="Language of the audio files.")
|
||||||
|
parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16', 'float32'],
|
||||||
|
help="fp16 or fp32")
|
||||||
|
|
||||||
|
cmd = parser.parse_args()
|
||||||
|
output_file_path = execute_asr_multi_level_dir(
|
||||||
|
input_folder=cmd.input_folder,
|
||||||
|
output_folder=cmd.output_folder,
|
||||||
|
model_size=cmd.model_size,
|
||||||
|
language=cmd.language,
|
||||||
|
precision=cmd.precision,
|
||||||
|
)
|
@ -39,7 +39,6 @@ def only_asr(input_file):
|
|||||||
@timeit_decorator
|
@timeit_decorator
|
||||||
def execute_asr_multi_level_dir(input_folder, output_folder, model_size, language):
|
def execute_asr_multi_level_dir(input_folder, output_folder, model_size, language):
|
||||||
output = []
|
output = []
|
||||||
output_file_name = os.path.basename(input_folder)
|
|
||||||
# 递归遍历输入目录及所有子目录
|
# 递归遍历输入目录及所有子目录
|
||||||
for root, dirs, files in os.walk(input_folder):
|
for root, dirs, files in os.walk(input_folder):
|
||||||
for name in sorted(files):
|
for name in sorted(files):
|
||||||
@ -58,7 +57,7 @@ def execute_asr_multi_level_dir(input_folder, output_folder, model_size, languag
|
|||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
|
|
||||||
# 创建或打开指定的输出目录
|
# 创建或打开指定的输出目录
|
||||||
output_folder = output_folder or "output/asr_opt"
|
output_folder = output_folder
|
||||||
output_dir_abs = os.path.abspath(output_folder)
|
output_dir_abs = os.path.abspath(output_folder)
|
||||||
os.makedirs(output_dir_abs, exist_ok=True)
|
os.makedirs(output_dir_abs, exist_ok=True)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user