添加非中文语言的asr操作

This commit is contained in:
Downupanddownup 2024-04-28 15:20:10 +08:00
parent 9264f7e38e
commit 6cb3c15448
4 changed files with 135 additions and 5 deletions

View File

@ -9,4 +9,5 @@
是我们为首都天文馆做的一个科普小玩意儿。现在的技术,已经能将彭齐阿斯和威尔逊在四十多年前用于发现特制背景辐射的二十英尺的喇叭形天线做成眼镜大小,
并且在这个眼镜中设置一个转换系统将接收到的背景辐射的波长压缩七个数量级将7厘米波转换成红光。
这样,观众在夜里戴上这种眼镜,就能亲眼看到宇宙的特制背景辐射,现在,也能看到宇宙闪烁。
这东西现在哪儿?能告诉我吗
这东西现在哪儿?能告诉我吗
希望各位猫猫给视频三连支持一下猫窝,十分感谢支持喵~

View File

@ -515,6 +515,12 @@ def delete_ref_audio_below_boundary(ref_audio_path, text_text_similarity_result_
text_delete_ref_audio_below_boundary_info = f"发生异常:{e}"
return text_delete_ref_audio_below_boundary_info
def change_lang_choices(key): #根据选择的模型修改可选的语言
# return gr.Dropdown(choices=asr_dict[key]['lang'])
return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]}
def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸
# return gr.Dropdown(choices=asr_dict[key]['size'])
return {"__type__": "update", "choices": asr_dict[key]['size']}
def save_generate_audio_url(generate_audio_url):
@ -687,7 +693,10 @@ if __name__ == '__main__':
text_emotion.blur(save_emotion_param, [text_emotion], [])
gr.Markdown(value=i18n("2.3配置待推理文本一句一行不要太多10条即可"))
default_test_content_path = params.default_test_text_path
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
with gr.Row():
text_test_content = gr.Text(label=i18n("请输入待推理文本路径"), value=default_test_content_path)
button_open_test_content_file = gr.Button(i18n("打开待推理文本文件"), variant="primary")
button_open_test_content_file.click(open_file, [text_test_content], [])
gr.Markdown(value=i18n("2.4:开始批量推理,这个过程比较耗时,可以去干点别的"))
slider_request_concurrency_num = gr.Slider(minimum=1, maximum=10, step=1, label=i18n("请输入请求并发数,会根据此数创建对应数量的子进程并行发起推理请求"), value=3,
interactive=True)
@ -703,7 +712,7 @@ if __name__ == '__main__':
with gr.Row():
dropdown_asr_model = gr.Dropdown(
label=i18n("ASR 模型"),
choices=[],
choices=list(asr_dict.keys()),
interactive=True,
value="达摩 ASR (中文)"
)
@ -719,6 +728,8 @@ if __name__ == '__main__':
interactive=True,
value="zh"
)
dropdown_asr_model.change(change_lang_choices, [dropdown_asr_model], [dropdown_asr_lang])
dropdown_asr_model.change(change_size_choices, [dropdown_asr_model], [dropdown_asr_size])
with gr.Row():
button_asr = gr.Button(i18n("启动asr"), variant="primary")
text_asr_info = gr.Text(label=i18n("asr结果"), value="", interactive=False)

View File

@ -0,0 +1,119 @@
import argparse
import os
import traceback
import Ref_Audio_Selector.config_param.config_params as params
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import torch
from faster_whisper import WhisperModel
from tqdm import tqdm
from tools.asr.config import check_fw_local_models
from Ref_Audio_Selector.config_param.log_config import logger
language_code_list = [
"af", "am", "ar", "as", "az",
"ba", "be", "bg", "bn", "bo",
"br", "bs", "ca", "cs", "cy",
"da", "de", "el", "en", "es",
"et", "eu", "fa", "fi", "fo",
"fr", "gl", "gu", "ha", "haw",
"he", "hi", "hr", "ht", "hu",
"hy", "id", "is", "it", "ja",
"jw", "ka", "kk", "km", "kn",
"ko", "la", "lb", "ln", "lo",
"lt", "lv", "mg", "mi", "mk",
"ml", "mn", "mr", "ms", "mt",
"my", "ne", "nl", "nn", "no",
"oc", "pa", "pl", "ps", "pt",
"ro", "ru", "sa", "sd", "si",
"sk", "sl", "sn", "so", "sq",
"sr", "su", "sv", "sw", "ta",
"te", "tg", "th", "tk", "tl",
"tr", "tt", "uk", "ur", "uz",
"vi", "yi", "yo", "zh", "yue",
"auto"]
def execute_asr_multi_level_dir(input_folder, output_folder, model_size, language, precision):
if '-local' in model_size:
model_size = model_size[:-6]
model_path = f'tools/asr/models/faster-whisper-{model_size}'
else:
model_path = model_size
if language == 'auto':
language = None # 不设置语种由模型自动输出概率最高的语种
logger.info("loading faster whisper model:", model_size, model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
try:
model = WhisperModel(model_path, device=device, compute_type=precision)
except:
return logger.error(traceback.format_exc())
output = []
# 递归遍历输入目录及所有子目录
for root, dirs, files in os.walk(input_folder):
for file_name in sorted(files):
# 只处理wav文件假设是wav文件
if file_name.endswith(".wav"):
try:
file_path = os.path.join(input_folder, file_name)
original_text = os.path.basename(root)
segments, info = model.transcribe(
audio=file_path,
beam_size=5,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=700),
language=language)
text = ''
if info.language == "zh":
logger.info("检测为中文文本, 转 FunASR 处理")
if ("only_asr" not in globals()):
from Ref_Audio_Selector.tool.asr.funasr_asr_multi_level_dir import \
only_asr # #如果用英文就不需要导入下载模型
text = only_asr(file_path)
if text == '':
for segment in segments:
text += segment.text
output.append(f"{file_path}|{original_text}|{info.language.upper()}|{text}")
except:
return logger.error(traceback.format_exc())
output_folder = output_folder
os.makedirs(output_folder, exist_ok=True)
output_file_path = os.path.abspath(f'{output_folder}/{params.asr_filename}.list')
with open(output_file_path, "w", encoding="utf-8") as f:
f.write("\n".join(output))
logger.info(f"ASR 任务完成->标注文件路径: {output_file_path}\n")
return output_file_path
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input_folder", type=str, required=True,
help="Path to the folder containing WAV files.")
parser.add_argument("-o", "--output_folder", type=str, required=True,
help="Output folder to store transcriptions.")
parser.add_argument("-s", "--model_size", type=str, default='large-v3',
choices=check_fw_local_models(),
help="Model Size of Faster Whisper")
parser.add_argument("-l", "--language", type=str, default='ja',
choices=language_code_list,
help="Language of the audio files.")
parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16', 'float32'],
help="fp16 or fp32")
cmd = parser.parse_args()
output_file_path = execute_asr_multi_level_dir(
input_folder=cmd.input_folder,
output_folder=cmd.output_folder,
model_size=cmd.model_size,
language=cmd.language,
precision=cmd.precision,
)

View File

@ -39,7 +39,6 @@ def only_asr(input_file):
@timeit_decorator
def execute_asr_multi_level_dir(input_folder, output_folder, model_size, language):
output = []
output_file_name = os.path.basename(input_folder)
# 递归遍历输入目录及所有子目录
for root, dirs, files in os.walk(input_folder):
for name in sorted(files):
@ -58,7 +57,7 @@ def execute_asr_multi_level_dir(input_folder, output_folder, model_size, languag
logger.error(traceback.format_exc())
# 创建或打开指定的输出目录
output_folder = output_folder or "output/asr_opt"
output_folder = output_folder
output_dir_abs = os.path.abspath(output_folder)
os.makedirs(output_dir_abs, exist_ok=True)