Update cmd-asr.py

改造为多进程提高转写预测效率
This commit is contained in:
刘悦 2024-01-20 19:47:25 +08:00 committed by GitHub
parent a7b64b4d7e
commit 9886213592
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,11 +3,17 @@
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.models import Model from modelscope.models import Model
import multiprocessing
import sys,os,traceback import sys,os,traceback
from threading import Lock
lock = Lock()
# 进程数
processes = 2
dir=sys.argv[1] dir=sys.argv[1]
# opt_name=dir.split("\\")[-1].split("/")[-1] # opt_name=dir.split("\\")[-1].split("/")[-1]
opt_name=os.path.basename(dir) opt_name=os.path.basename(dir)
# FunAsr三语转写model # FunAsr三语转写model
lang2model = { lang2model = {
'zh': 'tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 'zh': 'tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
@ -24,15 +30,30 @@ inference_pipeline = pipeline(
punc_model='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', punc_model='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
) )
opt=[]
for name in os.listdir(dir): def process_audio_file(dir,name,opt_name):
try: try:
text = inference_pipeline(audio_in="%s/%s"%(dir,name))["text"] text = inference_pipeline(audio_in="%s/%s" % (dir, name))["text"]
opt.append("%s/%s|%s|ZH|%s"%(dir,name,opt_name,text))
with lock:
with open(filename,"a",encoding="utf-8")as f:f.write("%s/%s|%s|ZH|%s\n" % (dir, name, opt_name, text))
except: except:
print(traceback.format_exc()) print(traceback.format_exc())
opt_dir="output/asr_opt"
os.makedirs(opt_dir,exist_ok=True)
with open("%s/%s.list"%(opt_dir,opt_name),"w",encoding="utf-8")as f:f.write("\n".join(opt))
def run__process(): # 主进程
opt_dir="output/asr_opt"
os.makedirs(opt_dir,exist_ok=True)
filename = "%s/%s.list"%(opt_dir,opt_name)
os.remove(filename,exist_ok=True)
with multiprocessing.Pool(processes=processes) as pool:
pool.starmap(process_audio_file, [(dir, name ,opt_name) for name in os.listdir(dir)])
if __name__ == '__main__':
run__process()