Adjust-asr

This commit is contained in:
KamioRinn 2024-02-11 01:34:25 +08:00
parent 41041715a4
commit ccc7757880
3 changed files with 20 additions and 5 deletions

View File

@ -13,7 +13,7 @@ def check_fw_local_models():
"large-v2", "large-v3"]
for i, size in enumerate(model_size_list):
if os.path.exists(f'tools/asr/models/faster-whisper-{size}'):
model_size_list[i] = size + '(local)'
model_size_list[i] = size + '-local'
return model_size_list
asr_dict = {

View File

@ -8,6 +8,7 @@ from faster_whisper import WhisperModel
from tqdm import tqdm
from tools.asr.config import check_fw_local_models
from tools.asr.funasr_asr import only_asr
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
@ -35,8 +36,8 @@ language_code_list = [
"auto"]
def execute_asr(input_folder, output_folder, model_size, language,precision):
if 'local' in model_size:
model_size = model_size.split('(')[0]
if '-local' in model_size:
model_size = model_size[:-6]
model_path = f'tools/asr/models/faster-whisper-{model_size}'
else:
model_path = model_size
@ -63,8 +64,14 @@ def execute_asr(input_folder, output_folder, model_size, language,precision):
vad_parameters = dict(min_silence_duration_ms=700),
language = language)
text = ''
for segment in segments:
text += segment.text
if info.language == "zh":
print("检测为中文文本,转funasr处理")
text = only_asr(file)
if text == '':
for segment in segments:
text += segment.text
output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}")
except:
return print(traceback.format_exc())

View File

@ -23,6 +23,14 @@ model = AutoModel(
punc_model_revision = "v2.0.4",
)
def only_asr(input_file):
try:
text = model.generate(input=input_file)[0]["text"]
except:
text = ''
print(traceback.format_exc())
return text
def execute_asr(input_folder, output_folder, model_size, language):
input_file_names = os.listdir(input_folder)
input_file_names.sort()