mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-07 07:02:57 +08:00
fix ASR
This commit is contained in:
parent
8ed4d157b3
commit
94e34568dd
@ -204,6 +204,9 @@ python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
|
||||
````
|
||||
python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
|
||||
````
|
||||
|
||||
---
|
||||
|
||||
这是使用命令行完成数据集的音频切分的方式
|
||||
````
|
||||
python audio_slicer.py \
|
||||
@ -214,17 +217,38 @@ python audio_slicer.py \
|
||||
--min_interval <shortest_time_gap_between_adjacent_subclips>
|
||||
--hop_size <step_size_for_computing_volume_curve>
|
||||
````
|
||||
这是使用命令行完成数据集ASR处理的方式(仅限中文)
|
||||
````
|
||||
python tools/damo_asr/cmd-asr.py "<Path to the directory containing input audio files>"
|
||||
````
|
||||
通过Faster_Whisper进行ASR处理(除中文之外的ASR标记)
|
||||
|
||||
(没有进度条,GPU性能可能会导致时间延迟)
|
||||
---
|
||||
|
||||
这是使用命令行完成数据集ASR处理的方式
|
||||
|
||||
通过 FunASR 进行 ASR 处理 (仅中文)
|
||||
````
|
||||
python ./tools/damo_asr/WhisperASR.py -i <input> -o <output> -f <file_name.list> -l <language>
|
||||
python tools/asr/funasr_asr.py \
|
||||
-i "<Path of single audio file or Path to the directory containing input audio files>" \
|
||||
-o "Output folder"
|
||||
````
|
||||
启用自定义列表保存路径
|
||||
|
||||
参数:
|
||||
- `-i/--input_file_or_folder`: 输入音频文件/含有音频的文件夹路径.
|
||||
- `-o/--output_folder`: 输出文件夹路径, 输出文件将以输入文件所在目录名称命名, 后缀为 `.list`.
|
||||
- ~~`-s/--model_size`~~: 模型尺寸, 基于 Paraformer-Large, 其他输入目前无效.
|
||||
- ~~`-l/--language`~~: 识别语言, 仅支持中文, 其他输入目前无效.
|
||||
- ~~`-p/--precision`~~: 计算精度, 其他输入目前无效.
|
||||
|
||||
通过 Faster_Whisper 进行 ASR 处理 (其他语言, 中文会自动转到 FunASR 进行处理)
|
||||
|
||||
````
|
||||
python tools/asr/fasterwhisper_asr.py -i <input_file_or_folder> -o <output_folder> -s <model_size> -l <language> -p <precision>
|
||||
````
|
||||
参数:
|
||||
- `-i/--input_file_or_folder`: 输入音频文件/含有音频的文件夹路径.
|
||||
- `-o/--output_folder`: 输出文件夹路径, 输出文件将以输入文件所在目录名称命名, 后缀为 `.list`.
|
||||
- `-s/--model_size`: 模型尺寸, 可选值与本地是否存在模型相关, 默认为 `large-v3`.
|
||||
- `-l/--language`: 识别语言, 默认为 `auto` 自动识别语言.
|
||||
- `-p/--precision`: 计算精度, 可选 `fp16`, `fp32`, 使用 CPU 时会自动调整.
|
||||
|
||||
|
||||
## 致谢
|
||||
|
||||
特别感谢以下项目和贡献者:
|
||||
|
@ -1,31 +1,114 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
def check_fw_local_models():
|
||||
'''
|
||||
启动时检查本地是否有 Faster Whisper 模型.
|
||||
'''
|
||||
model_size_list = [
|
||||
"tiny", "tiny.en",
|
||||
"base", "base.en",
|
||||
"small", "small.en",
|
||||
"medium", "medium.en",
|
||||
"large", "large-v1",
|
||||
"large-v2", "large-v3"]
|
||||
for i, size in enumerate(model_size_list):
|
||||
if os.path.exists(f'tools/asr/models/faster-whisper-{size}'):
|
||||
model_size_list[i] = size + '-local'
|
||||
return model_size_list
|
||||
from tqdm import tqdm
|
||||
from tqdm.contrib.logging import logging_redirect_tqdm
|
||||
|
||||
from tools.my_utils import ASR_Logger
|
||||
|
||||
class BaseASR:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def check_local_model(self, model_name, model_file, cache_path):
|
||||
'''
|
||||
启动时检查本地是否有模型文件夹.
|
||||
'''
|
||||
# 先检查当前项目是否有模型文件夹
|
||||
local_path = os.path.normpath('tools/asr/models')
|
||||
model_path = ''
|
||||
flag = ''
|
||||
for root, dirs, files in os.walk(local_path):
|
||||
if model_file in files and model_name + os.sep in os.path.join(root, model_file):
|
||||
model_path, flag = root, 'local'
|
||||
if not model_path:
|
||||
# 当前项目没有则检索本地缓存
|
||||
for root, dirs, files in os.walk(cache_path):
|
||||
if model_file in files and model_name + os.sep in os.path.join(root, model_file):
|
||||
model_path, flag = root, 'cache'
|
||||
return model_path, flag
|
||||
|
||||
def load_model(self):
|
||||
"""
|
||||
加载模型.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def inference(self):
|
||||
"""
|
||||
对单个文件进行推理, 返回文本, 和相应的语言.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def inference_file_or_folder(self, input_file_or_folder, output_folder, language):
|
||||
"""
|
||||
对文件夹/文件进行推理, 并保存结果.
|
||||
"""
|
||||
assert os.path.exists(input_file_or_folder), ASR_Logger.error('输入路径不存在.')
|
||||
if os.path.isfile(input_file_or_folder):
|
||||
# 若为文件获取其父目录的文件名
|
||||
file_path = input_file_or_folder
|
||||
input_file_paths = [os.path.abspath(file_path)]
|
||||
output_file_name = os.path.basename(os.path.dirname(file_path))
|
||||
else:
|
||||
input_folder = input_file_or_folder
|
||||
input_file_names = os.listdir(input_folder)
|
||||
input_file_names.sort()
|
||||
input_file_paths = []
|
||||
for input_file_name in input_file_names:
|
||||
input_file_path = os.path.abspath(os.path.join(input_folder, input_file_name))
|
||||
if os.path.isfile(input_file_path):
|
||||
input_file_paths.append(input_file_path)
|
||||
|
||||
output_file_name = os.path.basename(input_folder)
|
||||
|
||||
result = []
|
||||
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
if language == 'auto':
|
||||
language = None #不设置语种由模型自动输出概率最高的语种
|
||||
ASR_Logger.info("开始转写")
|
||||
with logging_redirect_tqdm([ASR_Logger]):
|
||||
for file_path in tqdm(input_file_paths, desc="转写进度 ", dynamic_ncols=True):
|
||||
text, output_language = self.inference(file_path, language)
|
||||
if text and output_language:
|
||||
result.append(f"{file_path}|{output_file_name}|{output_language.upper()}|{text}")
|
||||
if not result:
|
||||
ASR_Logger.error("没有转写结果, 放弃保存.")
|
||||
return
|
||||
|
||||
output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
|
||||
if os.path.exists(output_file_path):
|
||||
ASR_Logger.info('输出文件路径已存在, 文件名添加时间戳.')
|
||||
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
||||
file_name, file_extension = os.path.splitext(output_file_path)
|
||||
output_file_path = f"{file_name}-{timestamp}{file_extension}"
|
||||
with open(output_file_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(result))
|
||||
ASR_Logger.info(f"任务完成->标注文件路径: {output_file_path}\n")
|
||||
return output_file_path
|
||||
|
||||
fw_model_size_list = [
|
||||
"tiny", "tiny.en",
|
||||
"base", "base.en",
|
||||
"small", "small.en",
|
||||
"medium", "medium.en",
|
||||
"large", "large-v1",
|
||||
"large-v2", "large-v3"]
|
||||
|
||||
asr_dict = {
|
||||
"达摩 ASR (中文)": {
|
||||
'name': 'funasr',
|
||||
'lang': ['zh'],
|
||||
'size': ['large'],
|
||||
'path': 'funasr_asr.py',
|
||||
},
|
||||
"Faster Whisper (多语种)": {
|
||||
'name': 'fasterwhisper',
|
||||
'lang': ['auto', 'zh', 'en', 'ja'],
|
||||
'size': check_fw_local_models(),
|
||||
'size': fw_model_size_list,
|
||||
'path': 'fasterwhisper_asr.py'
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,16 +1,15 @@
|
||||
import argparse
|
||||
import os
|
||||
os.environ["HF_ENDPOINT"]="https://hf-mirror.com"
|
||||
import traceback
|
||||
import requests
|
||||
from glob import glob
|
||||
|
||||
import torch
|
||||
from faster_whisper import WhisperModel
|
||||
from tqdm import tqdm
|
||||
|
||||
from tools.asr.config import check_fw_local_models
|
||||
from tools.asr.funasr_asr import only_asr
|
||||
from tools.asr.config import fw_model_size_list, BaseASR
|
||||
from tools.asr.funasr_asr import FunASR
|
||||
from tools.my_utils import ASR_Logger
|
||||
|
||||
os.environ["HF_ENDPOINT"]="https://hf-mirror.com"
|
||||
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
||||
|
||||
language_code_list = [
|
||||
@ -36,72 +35,106 @@ language_code_list = [
|
||||
"vi", "yi", "yo", "zh", "yue",
|
||||
"auto"]
|
||||
|
||||
def execute_asr(input_folder, output_folder, model_size, language,precision):
|
||||
if '-local' in model_size:
|
||||
model_size = model_size[:-6]
|
||||
model_path = f'tools/asr/models/faster-whisper-{model_size}'
|
||||
else:
|
||||
model_path = model_size
|
||||
if language == 'auto':
|
||||
language = None #不设置语种由模型自动输出概率最高的语种
|
||||
print("loading faster whisper model:",model_size,model_path)
|
||||
try:
|
||||
model = WhisperModel(model_path, device="cuda", compute_type=precision)
|
||||
except:
|
||||
return print(traceback.format_exc())
|
||||
output = []
|
||||
output_file_name = os.path.basename(input_folder)
|
||||
output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
|
||||
class FasterWhisperASR(BaseASR):
|
||||
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
def __init__(self, model_size, device="cuda", precision="float16"):
|
||||
device, precision = [device, precision] if torch.cuda.is_available() else ["cpu", "float32"]
|
||||
self.check_local_models()
|
||||
self.model = self.load_model(model_size, device, precision)
|
||||
assert self.model is not None, ASR_Logger.error('模型不存在')
|
||||
self.zh_model = None
|
||||
|
||||
@classmethod
|
||||
def check_local_models(self):
|
||||
'''
|
||||
启动时检查本地是否有 Faster Whisper 模型.
|
||||
'''
|
||||
self.model_size_list = fw_model_size_list.copy()
|
||||
self.model_path_dict = {}
|
||||
for i, size in enumerate(self.model_size_list):
|
||||
model_name = f"faster-whisper-{size}"
|
||||
model_path, flag = super().check_local_model(
|
||||
self,
|
||||
model_name = model_name,
|
||||
model_file = 'model.bin',
|
||||
cache_path = os.path.normpath(os.path.expanduser(f"~/.cache/huggingface/hub/")))
|
||||
if flag:
|
||||
self.model_size_list[i] = f"{size}-{flag}"
|
||||
self.model_path_dict[self.model_size_list[i]] = model_path
|
||||
return self.model_size_list
|
||||
|
||||
def load_model(self, model_size, device="cuda", precision="float16"):
|
||||
if '-local' in model_size or '-cache' in model_size:
|
||||
model_path = self.model_path_dict[model_size]
|
||||
model_size = model_size[:-6]
|
||||
ASR_Logger.info(f"加载模型: 从 {model_path} 加载 faster-whisper-{model_size} 模型.")
|
||||
if 'huggingface' in model_path:
|
||||
ASR_Logger.warning(f"可将 {model_path} 移动到 tools/asr/models/ 文件夹下并重命名为 faster-whisper-{model_size}.")
|
||||
else:
|
||||
model_path = model_size
|
||||
ASR_Logger.warning(f"下载模型: 从 https://hf-mirror.com/Systran/faster-whisper-{model_size} 下载 faster-whisper-{model_size} 模型.")
|
||||
|
||||
for file in tqdm(glob(os.path.join(input_folder, '**/*.wav'), recursive=True)):
|
||||
try:
|
||||
segments, info = model.transcribe(
|
||||
audio = file,
|
||||
model = WhisperModel(model_path, device=device, compute_type=precision)
|
||||
if model.model.device != 'cpu':
|
||||
device_name = torch.cuda.get_device_name(model.model.device)
|
||||
else:
|
||||
device_name = 'CPU'
|
||||
ASR_Logger.info(f"运行设备: {device_name}, 设定精度: {precision}.")
|
||||
ASR_Logger.info(f"创建模型: Faster Whisper 完成.\n")
|
||||
return model
|
||||
except:
|
||||
ASR_Logger.info(traceback.format_exc())
|
||||
ASR_Logger.error(f"模型加载失败 or 下载失败, 可访问 https://hf-mirror.com/Systran/faster-whisper-{model_size} 自行下载, 并放置于 tools/asr/models/ 文件夹下")
|
||||
return
|
||||
|
||||
def inference(self, file_path, language='auto'):
|
||||
try:
|
||||
if language == 'auto':
|
||||
language = None
|
||||
|
||||
segments, info = self.model.transcribe(
|
||||
audio = file_path,
|
||||
beam_size = 5,
|
||||
vad_filter = True,
|
||||
vad_parameters = dict(min_silence_duration_ms=700),
|
||||
language = language)
|
||||
text = ''
|
||||
|
||||
if info.language == "zh":
|
||||
print("检测为中文文本,转funasr处理")
|
||||
text = only_asr(file)
|
||||
|
||||
if text == '':
|
||||
for segment in segments:
|
||||
text += segment.text
|
||||
output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}")
|
||||
ASR_Logger.info("检测为中文文本, 转 FunASR 处理.")
|
||||
if self.zh_model is None:
|
||||
self.zh_model = FunASR()
|
||||
text, language = self.zh_model.inference(file_path)
|
||||
else:
|
||||
text = ''.join([segment.text for segment in segments])
|
||||
return text, info.language
|
||||
except:
|
||||
return print(traceback.format_exc())
|
||||
|
||||
with open(output_file_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(output))
|
||||
print(f"ASR 任务完成->标注文件路径: {output_file_path}\n")
|
||||
return output_file_path
|
||||
ASR_Logger.error(f"当前文件 {file_path} 转写失败, 可能不是有效的音频文件.")
|
||||
ASR_Logger.error(traceback.format_exc())
|
||||
return '', ''
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-i", "--input_folder", type=str, required=True,
|
||||
help="Path to the folder containing WAV files.")
|
||||
parser.add_argument("-i", "--input_file_or_folder", type=str, required=True,
|
||||
help="Input audio file path or folder contain audio files.")
|
||||
parser.add_argument("-o", "--output_folder", type=str, required=True,
|
||||
help="Output folder to store transcriptions.")
|
||||
parser.add_argument("-s", "--model_size", type=str, default='large-v3',
|
||||
choices=check_fw_local_models(),
|
||||
choices=FasterWhisperASR.check_local_models(),
|
||||
help="Model Size of Faster Whisper")
|
||||
parser.add_argument("-l", "--language", type=str, default='ja',
|
||||
parser.add_argument("-l", "--language", type=str, default='auto',
|
||||
choices=language_code_list,
|
||||
help="Language of the audio files.")
|
||||
parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'],
|
||||
help="fp16 or fp32")
|
||||
|
||||
parser.add_argument("-p", "--precision", type=str, default='float16',
|
||||
choices=['float16','float32'], help="fp16 or fp32")
|
||||
cmd = parser.parse_args()
|
||||
output_file_path = execute_asr(
|
||||
input_folder = cmd.input_folder,
|
||||
output_folder = cmd.output_folder,
|
||||
model_size = cmd.model_size,
|
||||
language = cmd.language,
|
||||
precision = cmd.precision,
|
||||
ASR = FasterWhisperASR(
|
||||
model_size = cmd.model_size,
|
||||
precision = cmd.precision,
|
||||
)
|
||||
ASR.inference_file_or_folder(
|
||||
input_file_or_folder = cmd.input_file_or_folder,
|
||||
output_folder = cmd.output_folder,
|
||||
language = cmd.language,
|
||||
)
|
||||
|
||||
|
@ -3,74 +3,114 @@
|
||||
import argparse
|
||||
import os
|
||||
import traceback
|
||||
from tqdm import tqdm
|
||||
|
||||
import torch
|
||||
from funasr import AutoModel
|
||||
|
||||
path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
||||
path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
||||
path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
|
||||
path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
|
||||
path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
||||
path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
||||
from tools.asr.config import BaseASR
|
||||
from tools.my_utils import ASR_Logger
|
||||
|
||||
model = AutoModel(
|
||||
model = path_asr,
|
||||
model_revision = "v2.0.4",
|
||||
vad_model = path_vad,
|
||||
vad_model_revision = "v2.0.4",
|
||||
punc_model = path_punc,
|
||||
punc_model_revision = "v2.0.4",
|
||||
)
|
||||
funasr_component = {
|
||||
'asr': {
|
||||
'name': 'Paraformer-Large',
|
||||
'size': 'speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
|
||||
},
|
||||
'vad': {
|
||||
'name': 'FSMN-Monophone VAD',
|
||||
'size': 'speech_fsmn_vad_zh-cn-16k-common-pytorch',
|
||||
},
|
||||
'punc': {
|
||||
'name': 'Controllable Time-delay Transformer',
|
||||
'size': 'punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
|
||||
}
|
||||
}
|
||||
|
||||
def only_asr(input_file):
|
||||
try:
|
||||
text = model.generate(input=input_file)[0]["text"]
|
||||
except:
|
||||
text = ''
|
||||
print(traceback.format_exc())
|
||||
return text
|
||||
class FunASR(BaseASR):
|
||||
def __init__(self, model_size='large', device="cuda", precision="float16"):
|
||||
self.check_local_models()
|
||||
self.model = self.load_model()
|
||||
assert self.model is not None, ASR_Logger.error('模型不存在')
|
||||
|
||||
def execute_asr(input_folder, output_folder, model_size, language):
|
||||
input_file_names = os.listdir(input_folder)
|
||||
input_file_names.sort()
|
||||
@classmethod
|
||||
def check_local_models(self):
|
||||
'''
|
||||
启动时检查本地是否有 FunASR 相关模型.
|
||||
'''
|
||||
self.model_path_dict = funasr_component
|
||||
for code, dic in self.model_path_dict.items():
|
||||
model_name = dic['size']
|
||||
model_path, flag = super().check_local_model(
|
||||
self,
|
||||
model_name = model_name,
|
||||
model_file = 'model.pt',
|
||||
cache_path = os.path.normpath(os.path.expanduser(f"~/.cache/modelscope/hub/"))
|
||||
)
|
||||
if model_path:
|
||||
self.model_path_dict[code]['path'] = model_path
|
||||
else:
|
||||
# 没有本地路径时, 路径设置为网络链接
|
||||
self.model_path_dict[code]['path'] = 'iic/' + model_name
|
||||
return self.model_path_dict
|
||||
|
||||
output = []
|
||||
output_file_name = os.path.basename(input_folder)
|
||||
|
||||
for name in tqdm(input_file_names):
|
||||
def load_model(self):
|
||||
try:
|
||||
text = model.generate(input="%s/%s"%(input_folder, name))[0]["text"]
|
||||
output.append(f"{input_folder}/{name}|{output_file_name}|{language.upper()}|{text}")
|
||||
for code, dic in self.model_path_dict.items():
|
||||
if os.path.exists(dic['path']):
|
||||
ASR_Logger.info(f"加载模型: 从 {dic['path']} 加载 {dic['name']} 模型.")
|
||||
if 'modelscope' in dic['path']:
|
||||
ASR_Logger.warning(f"可将 {dic['path']} 移动到 tools/asr/models/ 文件夹下.")
|
||||
else:
|
||||
ASR_Logger.warning(f"下载模型: 从 {dic['path']} 下载 {dic['name']} 模型.")
|
||||
model = AutoModel(
|
||||
model = self.model_path_dict['asr']['path'],
|
||||
model_revision = "v2.0.4",
|
||||
vad_model = self.model_path_dict['vad']['path'],
|
||||
vad_model_revision = "v2.0.4",
|
||||
punc_model = self.model_path_dict['punc']['path'],
|
||||
punc_model_revision = "v2.0.4",
|
||||
)
|
||||
ASR_Logger.propagate = False # 避免 FunASR 库导致打印重复日志
|
||||
|
||||
if model.kwargs['device'] != 'cpu':
|
||||
device_name = torch.cuda.get_device_name(model.kwargs['device'])
|
||||
else:
|
||||
device_name = 'CPU'
|
||||
ASR_Logger.info(f"运行设备: {device_name}, 设定精度: --.")
|
||||
ASR_Logger.info(f"创建模型: FunASR 完成.\n")
|
||||
return model
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
ASR_Logger.error(traceback.format_exc())
|
||||
raise ValueError(ASR_Logger.error(f"模型加载失败 or 下载失败, 可访问 https://modelscope.cn/organization/iic 自行下载, 并放置于 tools/asr/models/ 文件夹下"))
|
||||
|
||||
output_folder = output_folder or "output/asr_opt"
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
|
||||
|
||||
with open(output_file_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(output))
|
||||
print(f"ASR 任务完成->标注文件路径: {output_file_path}\n")
|
||||
return output_file_path
|
||||
def inference(self, file_path, language='zh'):
|
||||
try:
|
||||
text = self.model.generate(input=file_path)[0]["text"]
|
||||
return text, language
|
||||
except:
|
||||
ASR_Logger.error(f"当前文件 {file_path} 转写失败, 可能不是有效的音频文件.")
|
||||
ASR_Logger.error(traceback.format_exc())
|
||||
return '', ''
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-i", "--input_folder", type=str, required=True,
|
||||
help="Path to the folder containing WAV files.")
|
||||
parser.add_argument("-i", "--input_file_or_folder", type=str, required=True,
|
||||
help="Input audio file path or folder contain audio files.")
|
||||
parser.add_argument("-o", "--output_folder", type=str, required=True,
|
||||
help="Output folder to store transcriptions.")
|
||||
parser.add_argument("-s", "--model_size", type=str, default='large',
|
||||
help="Model Size of FunASR is Large")
|
||||
parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh'],
|
||||
help="Language of the audio files.")
|
||||
parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'],
|
||||
help="fp16 or fp32")#还没接入
|
||||
parser.add_argument("-p", "--precision", type=str, default='float16',
|
||||
choices=['float16','float32'], help="fp16 or fp32")#还没接入
|
||||
|
||||
cmd = parser.parse_args()
|
||||
execute_asr(
|
||||
input_folder = cmd.input_folder,
|
||||
output_folder = cmd.output_folder,
|
||||
model_size = cmd.model_size,
|
||||
language = cmd.language,
|
||||
ASR = FunASR(
|
||||
model_size = cmd.model_size,
|
||||
precision = cmd.precision,
|
||||
)
|
||||
ASR.inference_file_or_folder(
|
||||
input_file_or_folder = cmd.input_file_or_folder,
|
||||
output_folder = cmd.output_folder,
|
||||
language = cmd.language,
|
||||
)
|
||||
|
@ -1,4 +1,8 @@
|
||||
import platform,os,traceback
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
import sys
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
|
||||
@ -29,3 +33,46 @@ def clean_path(path_str):
|
||||
if platform.system() == 'Windows':
|
||||
path_str = path_str.replace('/', '\\')
|
||||
return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||
|
||||
COLORS = {
|
||||
'WARNING' : '\033[33m', # Yellow
|
||||
'ERROR' : '\033[31m', # Red
|
||||
'CRITICAL': '\033[35m',
|
||||
'RESET' : '\033[0m', # Reset color
|
||||
}
|
||||
|
||||
class ColoredConsoleHandler(logging.StreamHandler):
|
||||
def emit(self, record):
|
||||
# 获取日志级别对应的颜色
|
||||
color = COLORS.get(record.levelname, '')
|
||||
# 重置颜色
|
||||
reset = COLORS['RESET']
|
||||
# 设置日志消息的颜色
|
||||
self.setFormatter(logging.Formatter(color + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + reset))
|
||||
# 输出日志消息
|
||||
super().emit(record)
|
||||
|
||||
class Tools_Logger():
|
||||
def __init__(self, logger_name, log_level='info', log_path=None):
|
||||
assert type(log_level) == str and log_level.upper() in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']
|
||||
log_level = log_level.upper()
|
||||
self.logger = logging.getLogger(logger_name)
|
||||
self.logger.setLevel(log_level)
|
||||
|
||||
if not self.logger.hasHandlers():
|
||||
ch = ColoredConsoleHandler()
|
||||
ch.setLevel(log_level)
|
||||
# formatter = logging.Formatter()
|
||||
# ch.setFormatter(formatter)
|
||||
self.logger.addHandler(ch)
|
||||
|
||||
# if log_path is not None:
|
||||
# fh = logging.FileHandler(log_path)
|
||||
# fh.setLevel(log_level)
|
||||
# fh.setFormatter(formatter)
|
||||
# self.logger.addHandler(fh)
|
||||
|
||||
def getLogger(self):
|
||||
return self.logger
|
||||
|
||||
ASR_Logger = Tools_Logger('ASR').getLogger()
|
35
webui.py
35
webui.py
@ -660,6 +660,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
with gr.Row():
|
||||
if_uvr5 = gr.Checkbox(label=i18n("是否开启UVR5-WebUI"),show_label=True)
|
||||
uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息"))
|
||||
|
||||
gr.Markdown(value=i18n("0b-语音切分工具"))
|
||||
with gr.Row():
|
||||
with gr.Row():
|
||||
@ -677,16 +678,17 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True)
|
||||
n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True)
|
||||
slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息"))
|
||||
gr.Markdown(value=i18n("0c-中文批量离线ASR工具"))
|
||||
|
||||
gr.Markdown(value=i18n("0c-离线批量语音识别工具"))
|
||||
with gr.Row():
|
||||
open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True)
|
||||
close_asr_button = gr.Button(i18n("终止ASR进程"), variant="primary",visible=False)
|
||||
with gr.Column():
|
||||
with gr.Column(scale=3):
|
||||
with gr.Row():
|
||||
asr_inp_dir = gr.Textbox(
|
||||
label=i18n("输入文件夹路径"),
|
||||
value="D:\\GPT-SoVITS\\raw\\xxx",
|
||||
interactive=True,
|
||||
label = i18n("输入音频文件/文件夹路径"),
|
||||
value = "D:\\GPT-SoVITS\\raw\\xxx",
|
||||
interactive = True,
|
||||
)
|
||||
asr_opt_dir = gr.Textbox(
|
||||
label = i18n("输出文件夹路径"),
|
||||
@ -695,32 +697,39 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
)
|
||||
with gr.Row():
|
||||
asr_model = gr.Dropdown(
|
||||
label = i18n("ASR 模型"),
|
||||
label = i18n("ASR 模型选择"),
|
||||
choices = list(asr_dict.keys()),
|
||||
interactive = True,
|
||||
value="达摩 ASR (中文)"
|
||||
value = "达摩 ASR (中文)",
|
||||
info = '右侧两项跟随此项更新'
|
||||
)
|
||||
asr_size = gr.Dropdown(
|
||||
label = i18n("ASR 模型尺寸"),
|
||||
choices = ["large"],
|
||||
interactive = True,
|
||||
value="large"
|
||||
value = "large",
|
||||
info = 'local (当前项目)/cache (本地缓存)/自动下载'
|
||||
)
|
||||
asr_lang = gr.Dropdown(
|
||||
label = i18n("ASR 语言设置"),
|
||||
choices = ["zh"],
|
||||
interactive = True,
|
||||
value="zh"
|
||||
value = "zh",
|
||||
info = '识别语言: 自动 (auto)/ 指定'
|
||||
)
|
||||
with gr.Row():
|
||||
asr_info = gr.Textbox(label=i18n("ASR进程输出信息"))
|
||||
asr_info = gr.Textbox(label=i18n("ASR 进程输出信息"))
|
||||
|
||||
def change_lang_choices(key): #根据选择的模型修改可选的语言
|
||||
# return gr.Dropdown(choices=asr_dict[key]['lang'])
|
||||
return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]}
|
||||
return {"__type__": "update", "choices": asr_dict[key]['lang'], "value":asr_dict[key]['lang'][0]}
|
||||
def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸
|
||||
# return gr.Dropdown(choices=asr_dict[key]['size'])
|
||||
return {"__type__": "update", "choices": asr_dict[key]['size']}
|
||||
if asr_dict[key]['name'] == 'fasterwhisper':
|
||||
from tools.asr.fasterwhisper_asr import FasterWhisperASR
|
||||
choice = FasterWhisperASR.check_local_models()
|
||||
else:
|
||||
choice = asr_dict[key]['size']
|
||||
return {"__type__": "update", "choices": choice, "value":choice[0]}
|
||||
asr_model.change(change_lang_choices, [asr_model], [asr_lang])
|
||||
asr_model.change(change_size_choices, [asr_model], [asr_size])
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user