diff --git a/docs/cn/README.md b/docs/cn/README.md index 8d3ca49a..28dbd36f 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -204,6 +204,9 @@ python tools/uvr5/webui.py "" ```` python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision ```` + +--- + 这是使用命令行完成数据集的音频切分的方式 ```` python audio_slicer.py \ @@ -214,17 +217,38 @@ python audio_slicer.py \ --min_interval --hop_size ```` -这是使用命令行完成数据集ASR处理的方式(仅限中文) -```` -python tools/damo_asr/cmd-asr.py "" -```` -通过Faster_Whisper进行ASR处理(除中文之外的ASR标记) -(没有进度条,GPU性能可能会导致时间延迟) +--- + +这是使用命令行完成数据集ASR处理的方式 + +通过 FunASR 进行 ASR 处理 (仅中文) ```` -python ./tools/damo_asr/WhisperASR.py -i -o -f -l +python tools/asr/funasr_asr.py \ + -i "" \ + -o "Output folder" ```` -启用自定义列表保存路径 + +参数: +- `-i/--input_file_or_folder`: 输入音频文件/含有音频的文件夹路径. +- `-o/--output_folder`: 输出文件夹路径, 输出文件将以输入文件所在目录名称命名, 后缀为 `.list`. +- ~~`-s/--model_size`~~: 模型尺寸, 基于 Paraformer-Large, 其他输入目前无效. +- ~~`-l/--language`~~: 识别语言, 仅支持中文, 其他输入目前无效. +- ~~`-p/--precision`~~: 计算精度, 其他输入目前无效. + +通过 Faster_Whisper 进行 ASR 处理 (其他语言, 中文会自动转到 FunASR 进行处理) + +```` +python tools/asr/fasterwhisper_asr.py -i -o -s -l -p +```` +参数: +- `-i/--input_file_or_folder`: 输入音频文件/含有音频的文件夹路径. +- `-o/--output_folder`: 输出文件夹路径, 输出文件将以输入文件所在目录名称命名, 后缀为 `.list`. +- `-s/--model_size`: 模型尺寸, 可选值与本地是否存在模型相关, 默认为 `large-v3`. +- `-l/--language`: 识别语言, 默认为 `auto` 自动识别语言. +- `-p/--precision`: 计算精度, 可选 `fp16`, `fp32`, 使用 CPU 时会自动调整. + + ## 致谢 特别感谢以下项目和贡献者: diff --git a/tools/asr/config.py b/tools/asr/config.py index 8fe68388..b224125a 100644 --- a/tools/asr/config.py +++ b/tools/asr/config.py @@ -1,31 +1,114 @@ import os +from datetime import datetime -def check_fw_local_models(): - ''' - 启动时检查本地是否有 Faster Whisper 模型. - ''' - model_size_list = [ - "tiny", "tiny.en", - "base", "base.en", - "small", "small.en", - "medium", "medium.en", - "large", "large-v1", - "large-v2", "large-v3"] - for i, size in enumerate(model_size_list): - if os.path.exists(f'tools/asr/models/faster-whisper-{size}'): - model_size_list[i] = size + '-local' - return model_size_list +from tqdm import tqdm +from tqdm.contrib.logging import logging_redirect_tqdm + +from tools.my_utils import ASR_Logger + +class BaseASR: + def __init__(self): + pass + + def check_local_model(self, model_name, model_file, cache_path): + ''' + 启动时检查本地是否有模型文件夹. + ''' + # 先检查当前项目是否有模型文件夹 + local_path = os.path.normpath('tools/asr/models') + model_path = '' + flag = '' + for root, dirs, files in os.walk(local_path): + if model_file in files and model_name + os.sep in os.path.join(root, model_file): + model_path, flag = root, 'local' + if not model_path: + # 当前项目没有则检索本地缓存 + for root, dirs, files in os.walk(cache_path): + if model_file in files and model_name + os.sep in os.path.join(root, model_file): + model_path, flag = root, 'cache' + return model_path, flag + + def load_model(self): + """ + 加载模型. + """ + raise NotImplementedError + + def inference(self): + """ + 对单个文件进行推理, 返回文本, 和相应的语言. + """ + raise NotImplementedError + + def inference_file_or_folder(self, input_file_or_folder, output_folder, language): + """ + 对文件夹/文件进行推理, 并保存结果. + """ + assert os.path.exists(input_file_or_folder), ASR_Logger.error('输入路径不存在.') + if os.path.isfile(input_file_or_folder): + # 若为文件获取其父目录的文件名 + file_path = input_file_or_folder + input_file_paths = [os.path.abspath(file_path)] + output_file_name = os.path.basename(os.path.dirname(file_path)) + else: + input_folder = input_file_or_folder + input_file_names = os.listdir(input_folder) + input_file_names.sort() + input_file_paths = [] + for input_file_name in input_file_names: + input_file_path = os.path.abspath(os.path.join(input_folder, input_file_name)) + if os.path.isfile(input_file_path): + input_file_paths.append(input_file_path) + + output_file_name = os.path.basename(input_folder) + + result = [] + + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + if language == 'auto': + language = None #不设置语种由模型自动输出概率最高的语种 + ASR_Logger.info("开始转写") + with logging_redirect_tqdm([ASR_Logger]): + for file_path in tqdm(input_file_paths, desc="转写进度 ", dynamic_ncols=True): + text, output_language = self.inference(file_path, language) + if text and output_language: + result.append(f"{file_path}|{output_file_name}|{output_language.upper()}|{text}") + if not result: + ASR_Logger.error("没有转写结果, 放弃保存.") + return + + output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list') + if os.path.exists(output_file_path): + ASR_Logger.info('输出文件路径已存在, 文件名添加时间戳.') + timestamp = datetime.now().strftime('%Y%m%d%H%M%S') + file_name, file_extension = os.path.splitext(output_file_path) + output_file_path = f"{file_name}-{timestamp}{file_extension}" + with open(output_file_path, "w", encoding="utf-8") as f: + f.write("\n".join(result)) + ASR_Logger.info(f"任务完成->标注文件路径: {output_file_path}\n") + return output_file_path + +fw_model_size_list = [ + "tiny", "tiny.en", + "base", "base.en", + "small", "small.en", + "medium", "medium.en", + "large", "large-v1", + "large-v2", "large-v3"] asr_dict = { "达摩 ASR (中文)": { + 'name': 'funasr', 'lang': ['zh'], 'size': ['large'], 'path': 'funasr_asr.py', }, "Faster Whisper (多语种)": { + 'name': 'fasterwhisper', 'lang': ['auto', 'zh', 'en', 'ja'], - 'size': check_fw_local_models(), + 'size': fw_model_size_list, 'path': 'fasterwhisper_asr.py' } -} - +} \ No newline at end of file diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index 5f49de70..c356207f 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -1,16 +1,15 @@ import argparse import os -os.environ["HF_ENDPOINT"]="https://hf-mirror.com" import traceback -import requests -from glob import glob +import torch from faster_whisper import WhisperModel -from tqdm import tqdm -from tools.asr.config import check_fw_local_models -from tools.asr.funasr_asr import only_asr +from tools.asr.config import fw_model_size_list, BaseASR +from tools.asr.funasr_asr import FunASR +from tools.my_utils import ASR_Logger +os.environ["HF_ENDPOINT"]="https://hf-mirror.com" os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" language_code_list = [ @@ -36,72 +35,106 @@ language_code_list = [ "vi", "yi", "yo", "zh", "yue", "auto"] -def execute_asr(input_folder, output_folder, model_size, language,precision): - if '-local' in model_size: - model_size = model_size[:-6] - model_path = f'tools/asr/models/faster-whisper-{model_size}' - else: - model_path = model_size - if language == 'auto': - language = None #不设置语种由模型自动输出概率最高的语种 - print("loading faster whisper model:",model_size,model_path) - try: - model = WhisperModel(model_path, device="cuda", compute_type=precision) - except: - return print(traceback.format_exc()) - output = [] - output_file_name = os.path.basename(input_folder) - output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list') +class FasterWhisperASR(BaseASR): - if not os.path.exists(output_folder): - os.makedirs(output_folder) + def __init__(self, model_size, device="cuda", precision="float16"): + device, precision = [device, precision] if torch.cuda.is_available() else ["cpu", "float32"] + self.check_local_models() + self.model = self.load_model(model_size, device, precision) + assert self.model is not None, ASR_Logger.error('模型不存在') + self.zh_model = None + + @classmethod + def check_local_models(self): + ''' + 启动时检查本地是否有 Faster Whisper 模型. + ''' + self.model_size_list = fw_model_size_list.copy() + self.model_path_dict = {} + for i, size in enumerate(self.model_size_list): + model_name = f"faster-whisper-{size}" + model_path, flag = super().check_local_model( + self, + model_name = model_name, + model_file = 'model.bin', + cache_path = os.path.normpath(os.path.expanduser(f"~/.cache/huggingface/hub/"))) + if flag: + self.model_size_list[i] = f"{size}-{flag}" + self.model_path_dict[self.model_size_list[i]] = model_path + return self.model_size_list + + def load_model(self, model_size, device="cuda", precision="float16"): + if '-local' in model_size or '-cache' in model_size: + model_path = self.model_path_dict[model_size] + model_size = model_size[:-6] + ASR_Logger.info(f"加载模型: 从 {model_path} 加载 faster-whisper-{model_size} 模型.") + if 'huggingface' in model_path: + ASR_Logger.warning(f"可将 {model_path} 移动到 tools/asr/models/ 文件夹下并重命名为 faster-whisper-{model_size}.") + else: + model_path = model_size + ASR_Logger.warning(f"下载模型: 从 https://hf-mirror.com/Systran/faster-whisper-{model_size} 下载 faster-whisper-{model_size} 模型.") - for file in tqdm(glob(os.path.join(input_folder, '**/*.wav'), recursive=True)): try: - segments, info = model.transcribe( - audio = file, + model = WhisperModel(model_path, device=device, compute_type=precision) + if model.model.device != 'cpu': + device_name = torch.cuda.get_device_name(model.model.device) + else: + device_name = 'CPU' + ASR_Logger.info(f"运行设备: {device_name}, 设定精度: {precision}.") + ASR_Logger.info(f"创建模型: Faster Whisper 完成.\n") + return model + except: + ASR_Logger.info(traceback.format_exc()) + ASR_Logger.error(f"模型加载失败 or 下载失败, 可访问 https://hf-mirror.com/Systran/faster-whisper-{model_size} 自行下载, 并放置于 tools/asr/models/ 文件夹下") + return + + def inference(self, file_path, language='auto'): + try: + if language == 'auto': + language = None + + segments, info = self.model.transcribe( + audio = file_path, beam_size = 5, vad_filter = True, vad_parameters = dict(min_silence_duration_ms=700), language = language) - text = '' if info.language == "zh": - print("检测为中文文本,转funasr处理") - text = only_asr(file) - - if text == '': - for segment in segments: - text += segment.text - output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}") + ASR_Logger.info("检测为中文文本, 转 FunASR 处理.") + if self.zh_model is None: + self.zh_model = FunASR() + text, language = self.zh_model.inference(file_path) + else: + text = ''.join([segment.text for segment in segments]) + return text, info.language except: - return print(traceback.format_exc()) - - with open(output_file_path, "w", encoding="utf-8") as f: - f.write("\n".join(output)) - print(f"ASR 任务完成->标注文件路径: {output_file_path}\n") - return output_file_path + ASR_Logger.error(f"当前文件 {file_path} 转写失败, 可能不是有效的音频文件.") + ASR_Logger.error(traceback.format_exc()) + return '', '' if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("-i", "--input_folder", type=str, required=True, - help="Path to the folder containing WAV files.") + parser.add_argument("-i", "--input_file_or_folder", type=str, required=True, + help="Input audio file path or folder contain audio files.") parser.add_argument("-o", "--output_folder", type=str, required=True, help="Output folder to store transcriptions.") parser.add_argument("-s", "--model_size", type=str, default='large-v3', - choices=check_fw_local_models(), + choices=FasterWhisperASR.check_local_models(), help="Model Size of Faster Whisper") - parser.add_argument("-l", "--language", type=str, default='ja', + parser.add_argument("-l", "--language", type=str, default='auto', choices=language_code_list, help="Language of the audio files.") - parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'], - help="fp16 or fp32") - + parser.add_argument("-p", "--precision", type=str, default='float16', + choices=['float16','float32'], help="fp16 or fp32") cmd = parser.parse_args() - output_file_path = execute_asr( - input_folder = cmd.input_folder, - output_folder = cmd.output_folder, - model_size = cmd.model_size, - language = cmd.language, - precision = cmd.precision, + ASR = FasterWhisperASR( + model_size = cmd.model_size, + precision = cmd.precision, ) + ASR.inference_file_or_folder( + input_file_or_folder = cmd.input_file_or_folder, + output_folder = cmd.output_folder, + language = cmd.language, + ) + diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index 6aa30381..6a0233a1 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -3,74 +3,114 @@ import argparse import os import traceback -from tqdm import tqdm +import torch from funasr import AutoModel -path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' -path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch' -path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch' -path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" -path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" -path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" +from tools.asr.config import BaseASR +from tools.my_utils import ASR_Logger -model = AutoModel( - model = path_asr, - model_revision = "v2.0.4", - vad_model = path_vad, - vad_model_revision = "v2.0.4", - punc_model = path_punc, - punc_model_revision = "v2.0.4", -) +funasr_component = { + 'asr': { + 'name': 'Paraformer-Large', + 'size': 'speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', + }, + 'vad': { + 'name': 'FSMN-Monophone VAD', + 'size': 'speech_fsmn_vad_zh-cn-16k-common-pytorch', + }, + 'punc': { + 'name': 'Controllable Time-delay Transformer', + 'size': 'punc_ct-transformer_zh-cn-common-vocab272727-pytorch', + } +} -def only_asr(input_file): - try: - text = model.generate(input=input_file)[0]["text"] - except: - text = '' - print(traceback.format_exc()) - return text +class FunASR(BaseASR): + def __init__(self, model_size='large', device="cuda", precision="float16"): + self.check_local_models() + self.model = self.load_model() + assert self.model is not None, ASR_Logger.error('模型不存在') -def execute_asr(input_folder, output_folder, model_size, language): - input_file_names = os.listdir(input_folder) - input_file_names.sort() - - output = [] - output_file_name = os.path.basename(input_folder) + @classmethod + def check_local_models(self): + ''' + 启动时检查本地是否有 FunASR 相关模型. + ''' + self.model_path_dict = funasr_component + for code, dic in self.model_path_dict.items(): + model_name = dic['size'] + model_path, flag = super().check_local_model( + self, + model_name = model_name, + model_file = 'model.pt', + cache_path = os.path.normpath(os.path.expanduser(f"~/.cache/modelscope/hub/")) + ) + if model_path: + self.model_path_dict[code]['path'] = model_path + else: + # 没有本地路径时, 路径设置为网络链接 + self.model_path_dict[code]['path'] = 'iic/' + model_name + return self.model_path_dict - for name in tqdm(input_file_names): + def load_model(self): try: - text = model.generate(input="%s/%s"%(input_folder, name))[0]["text"] - output.append(f"{input_folder}/{name}|{output_file_name}|{language.upper()}|{text}") + for code, dic in self.model_path_dict.items(): + if os.path.exists(dic['path']): + ASR_Logger.info(f"加载模型: 从 {dic['path']} 加载 {dic['name']} 模型.") + if 'modelscope' in dic['path']: + ASR_Logger.warning(f"可将 {dic['path']} 移动到 tools/asr/models/ 文件夹下.") + else: + ASR_Logger.warning(f"下载模型: 从 {dic['path']} 下载 {dic['name']} 模型.") + model = AutoModel( + model = self.model_path_dict['asr']['path'], + model_revision = "v2.0.4", + vad_model = self.model_path_dict['vad']['path'], + vad_model_revision = "v2.0.4", + punc_model = self.model_path_dict['punc']['path'], + punc_model_revision = "v2.0.4", + ) + ASR_Logger.propagate = False # 避免 FunASR 库导致打印重复日志 + + if model.kwargs['device'] != 'cpu': + device_name = torch.cuda.get_device_name(model.kwargs['device']) + else: + device_name = 'CPU' + ASR_Logger.info(f"运行设备: {device_name}, 设定精度: --.") + ASR_Logger.info(f"创建模型: FunASR 完成.\n") + return model except: - print(traceback.format_exc()) - - output_folder = output_folder or "output/asr_opt" - os.makedirs(output_folder, exist_ok=True) - output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list') - - with open(output_file_path, "w", encoding="utf-8") as f: - f.write("\n".join(output)) - print(f"ASR 任务完成->标注文件路径: {output_file_path}\n") - return output_file_path - + ASR_Logger.error(traceback.format_exc()) + raise ValueError(ASR_Logger.error(f"模型加载失败 or 下载失败, 可访问 https://modelscope.cn/organization/iic 自行下载, 并放置于 tools/asr/models/ 文件夹下")) + + def inference(self, file_path, language='zh'): + try: + text = self.model.generate(input=file_path)[0]["text"] + return text, language + except: + ASR_Logger.error(f"当前文件 {file_path} 转写失败, 可能不是有效的音频文件.") + ASR_Logger.error(traceback.format_exc()) + return '', '' + if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("-i", "--input_folder", type=str, required=True, - help="Path to the folder containing WAV files.") + parser.add_argument("-i", "--input_file_or_folder", type=str, required=True, + help="Input audio file path or folder contain audio files.") parser.add_argument("-o", "--output_folder", type=str, required=True, help="Output folder to store transcriptions.") parser.add_argument("-s", "--model_size", type=str, default='large', help="Model Size of FunASR is Large") parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh'], help="Language of the audio files.") - parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'], - help="fp16 or fp32")#还没接入 + parser.add_argument("-p", "--precision", type=str, default='float16', + choices=['float16','float32'], help="fp16 or fp32")#还没接入 cmd = parser.parse_args() - execute_asr( - input_folder = cmd.input_folder, - output_folder = cmd.output_folder, - model_size = cmd.model_size, - language = cmd.language, + ASR = FunASR( + model_size = cmd.model_size, + precision = cmd.precision, + ) + ASR.inference_file_or_folder( + input_file_or_folder = cmd.input_file_or_folder, + output_folder = cmd.output_folder, + language = cmd.language, ) diff --git a/tools/my_utils.py b/tools/my_utils.py index a7755d6d..cc468226 100644 --- a/tools/my_utils.py +++ b/tools/my_utils.py @@ -1,4 +1,8 @@ -import platform,os,traceback +import logging +import os +import platform +import traceback +import sys import ffmpeg import numpy as np @@ -29,3 +33,46 @@ def clean_path(path_str): if platform.system() == 'Windows': path_str = path_str.replace('/', '\\') return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + +COLORS = { + 'WARNING' : '\033[33m', # Yellow + 'ERROR' : '\033[31m', # Red + 'CRITICAL': '\033[35m', + 'RESET' : '\033[0m', # Reset color +} + +class ColoredConsoleHandler(logging.StreamHandler): + def emit(self, record): + # 获取日志级别对应的颜色 + color = COLORS.get(record.levelname, '') + # 重置颜色 + reset = COLORS['RESET'] + # 设置日志消息的颜色 + self.setFormatter(logging.Formatter(color + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + reset)) + # 输出日志消息 + super().emit(record) + +class Tools_Logger(): + def __init__(self, logger_name, log_level='info', log_path=None): + assert type(log_level) == str and log_level.upper() in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] + log_level = log_level.upper() + self.logger = logging.getLogger(logger_name) + self.logger.setLevel(log_level) + + if not self.logger.hasHandlers(): + ch = ColoredConsoleHandler() + ch.setLevel(log_level) + # formatter = logging.Formatter() + # ch.setFormatter(formatter) + self.logger.addHandler(ch) + + # if log_path is not None: + # fh = logging.FileHandler(log_path) + # fh.setLevel(log_level) + # fh.setFormatter(formatter) + # self.logger.addHandler(fh) + + def getLogger(self): + return self.logger + +ASR_Logger = Tools_Logger('ASR').getLogger() \ No newline at end of file diff --git a/webui.py b/webui.py index fa40c3af..2f3618ea 100644 --- a/webui.py +++ b/webui.py @@ -660,6 +660,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: with gr.Row(): if_uvr5 = gr.Checkbox(label=i18n("是否开启UVR5-WebUI"),show_label=True) uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息")) + gr.Markdown(value=i18n("0b-语音切分工具")) with gr.Row(): with gr.Row(): @@ -677,16 +678,17 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) - gr.Markdown(value=i18n("0c-中文批量离线ASR工具")) + + gr.Markdown(value=i18n("0c-离线批量语音识别工具")) with gr.Row(): open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True) close_asr_button = gr.Button(i18n("终止ASR进程"), variant="primary",visible=False) - with gr.Column(): + with gr.Column(scale=3): with gr.Row(): asr_inp_dir = gr.Textbox( - label=i18n("输入文件夹路径"), - value="D:\\GPT-SoVITS\\raw\\xxx", - interactive=True, + label = i18n("输入音频文件/文件夹路径"), + value = "D:\\GPT-SoVITS\\raw\\xxx", + interactive = True, ) asr_opt_dir = gr.Textbox( label = i18n("输出文件夹路径"), @@ -695,35 +697,42 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: ) with gr.Row(): asr_model = gr.Dropdown( - label = i18n("ASR 模型"), + label = i18n("ASR 模型选择"), choices = list(asr_dict.keys()), interactive = True, - value="达摩 ASR (中文)" + value = "达摩 ASR (中文)", + info = '右侧两项跟随此项更新' ) asr_size = gr.Dropdown( label = i18n("ASR 模型尺寸"), choices = ["large"], interactive = True, - value="large" + value = "large", + info = 'local (当前项目)/cache (本地缓存)/自动下载' ) asr_lang = gr.Dropdown( label = i18n("ASR 语言设置"), choices = ["zh"], interactive = True, - value="zh" + value = "zh", + info = '识别语言: 自动 (auto)/ 指定' ) - with gr.Row(): - asr_info = gr.Textbox(label=i18n("ASR进程输出信息")) - + asr_info = gr.Textbox(label=i18n("ASR 进程输出信息")) + def change_lang_choices(key): #根据选择的模型修改可选的语言 # return gr.Dropdown(choices=asr_dict[key]['lang']) - return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]} + return {"__type__": "update", "choices": asr_dict[key]['lang'], "value":asr_dict[key]['lang'][0]} def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 # return gr.Dropdown(choices=asr_dict[key]['size']) - return {"__type__": "update", "choices": asr_dict[key]['size']} + if asr_dict[key]['name'] == 'fasterwhisper': + from tools.asr.fasterwhisper_asr import FasterWhisperASR + choice = FasterWhisperASR.check_local_models() + else: + choice = asr_dict[key]['size'] + return {"__type__": "update", "choices": choice, "value":choice[0]} asr_model.change(change_lang_choices, [asr_model], [asr_lang]) asr_model.change(change_size_choices, [asr_model], [asr_size]) - + gr.Markdown(value=i18n("0d-语音文本校对标注工具")) with gr.Row(): if_label = gr.Checkbox(label=i18n("是否开启打标WebUI"),show_label=True)