remove glob

This commit is contained in:
starylan 2024-04-10 20:00:02 +08:00
parent 4e43f6097f
commit cab47dbea9

View File

@ -1,16 +1,14 @@
import argparse
import os
os.environ["HF_ENDPOINT"]="https://hf-mirror.com"
import traceback
import requests
from glob import glob
import torch
import torch
from faster_whisper import WhisperModel
from tqdm import tqdm
from tools.asr.config import check_fw_local_models
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
language_code_list = [
@ -50,17 +48,18 @@ def execute_asr(input_folder, output_folder, model_size, language,precision):
model = WhisperModel(model_path, device=device, compute_type=precision)
except:
return print(traceback.format_exc())
input_file_names = os.listdir(input_folder)
input_file_names.sort()
output = []
output_file_name = os.path.basename(input_folder)
output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for file in tqdm(glob(os.path.join(input_folder, '**/*.wav'), recursive=True)):
for file_name in tqdm(input_file_names):
try:
file_path = os.path.join(input_folder, file_name)
segments, info = model.transcribe(
audio = file,
audio = file_path,
beam_size = 5,
vad_filter = True,
vad_parameters = dict(min_silence_duration_ms=700),
@ -68,18 +67,23 @@ def execute_asr(input_folder, output_folder, model_size, language,precision):
text = ''
if info.language == "zh":
print("检测为中文文本,转funasr处理")
print("检测为中文文本, 转 FunASR 处理")
if("only_asr"not in globals()):
from tools.asr.funasr_asr import only_asr##如果用英文就不需要导入下载模型
text = only_asr(file)
from tools.asr.funasr_asr import \
only_asr # #如果用英文就不需要导入下载模型
text = only_asr(file_path)
if text == '':
for segment in segments:
text += segment.text
output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}")
output.append(f"{file_path}|{output_file_name}|{info.language.upper()}|{text}")
except:
return print(traceback.format_exc())
output_folder = output_folder or "output/asr_opt"
os.makedirs(output_folder, exist_ok=True)
output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
with open(output_file_path, "w", encoding="utf-8") as f:
f.write("\n".join(output))
print(f"ASR 任务完成->标注文件路径: {output_file_path}\n")