mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-07 23:48:48 +08:00
多进程优化转写效率,提高效率
多进程优化转写效率,提高效率
This commit is contained in:
parent
813cf96e50
commit
2bbc37e5e8
@ -2,33 +2,60 @@
|
|||||||
|
|
||||||
from modelscope.pipelines import pipeline
|
from modelscope.pipelines import pipeline
|
||||||
from modelscope.utils.constant import Tasks
|
from modelscope.utils.constant import Tasks
|
||||||
|
from modelscope.models import Model
|
||||||
|
import multiprocessing
|
||||||
import sys,os,traceback
|
import sys,os,traceback
|
||||||
|
from threading import Lock
|
||||||
|
lock = Lock()
|
||||||
|
|
||||||
|
# 进程数
|
||||||
|
processes = 2
|
||||||
|
|
||||||
dir=sys.argv[1]
|
dir=sys.argv[1]
|
||||||
# opt_name=dir.split("\\")[-1].split("/")[-1]
|
# opt_name=dir.split("\\")[-1].split("/")[-1]
|
||||||
opt_name=os.path.basename(dir)
|
opt_name=os.path.basename(dir)
|
||||||
|
# FunAsr三语转写model
|
||||||
|
lang2model = {
|
||||||
|
'zh': 'tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
|
||||||
|
'ja': "tools/damo_asr/models/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline",
|
||||||
|
"en": "tools/damo_asr/models/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline",
|
||||||
|
}
|
||||||
|
|
||||||
|
model = Model.from_pretrained(lang2model["zh"])
|
||||||
|
|
||||||
path_asr='tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
|
||||||
path_vad='tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
|
||||||
path_punc='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
|
|
||||||
path_asr=path_asr if os.path.exists(path_asr)else "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
|
|
||||||
path_vad=path_vad if os.path.exists(path_vad)else "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
|
||||||
path_punc=path_punc if os.path.exists(path_punc)else "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
|
||||||
inference_pipeline = pipeline(
|
inference_pipeline = pipeline(
|
||||||
task=Tasks.auto_speech_recognition,
|
task=Tasks.auto_speech_recognition,
|
||||||
model=path_asr,
|
model=model,
|
||||||
vad_model=path_vad,
|
vad_model='tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch',
|
||||||
punc_model=path_punc,
|
punc_model='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
|
||||||
)
|
)
|
||||||
|
|
||||||
opt=[]
|
|
||||||
for name in os.listdir(dir):
|
def process_audio_file(dir,filename,name,opt_name):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text = inference_pipeline(audio_in="%s/%s"%(dir,name))["text"]
|
text = inference_pipeline(audio_in="%s/%s" % (dir, name))["text"]
|
||||||
opt.append("%s/%s|%s|ZH|%s"%(dir,name,opt_name,text))
|
|
||||||
|
with lock:
|
||||||
|
with open(filename,"a",encoding="utf-8")as f:f.write("%s/%s|%s|ZH|%s\n" % (dir, name, opt_name, text.strip()))
|
||||||
|
|
||||||
except:
|
except:
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
|
|
||||||
opt_dir="output/asr_opt"
|
|
||||||
os.makedirs(opt_dir,exist_ok=True)
|
def run__process(): # 主进程
|
||||||
with open("%s/%s.list"%(opt_dir,opt_name),"w",encoding="utf-8")as f:f.write("\n".join(opt))
|
|
||||||
|
opt_dir="output/asr_opt"
|
||||||
|
os.makedirs(opt_dir,exist_ok=True)
|
||||||
|
filename = "%s/%s.list"%(opt_dir,opt_name)
|
||||||
|
if os.path.exists(filename):
|
||||||
|
os.remove(filename)
|
||||||
|
|
||||||
|
with multiprocessing.Pool(processes=processes) as pool:
|
||||||
|
pool.starmap(process_audio_file, [(dir,filename,name ,opt_name) for name in os.listdir(dir)])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
run__process()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user