From 8a101474b5a4f913b4c94fca2e3ca87d0771bae3 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sat, 3 Aug 2024 00:44:25 +0800 Subject: [PATCH] funasr support cantonese funasr support cantonese --- tools/asr/config.py | 2 +- tools/asr/funasr_asr.py | 45 +++++++++++++++++++++++++---------------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/tools/asr/config.py b/tools/asr/config.py index b2e59ea..4b0d37a 100644 --- a/tools/asr/config.py +++ b/tools/asr/config.py @@ -18,7 +18,7 @@ def check_fw_local_models(): asr_dict = { "达摩 ASR (中文)": { - 'lang': ['zh'], + 'lang': ['zh','yue'], 'size': ['large'], 'path': 'funasr_asr.py', 'precision': ['float32'] diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index ec78678..103a2b9 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -8,22 +8,6 @@ from tqdm import tqdm # version_checker.check_for_update = lambda: None from funasr import AutoModel -path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' -path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch' -path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch' -path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" -path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" -path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" - - -model = AutoModel( - model = path_asr, - model_revision = "v2.0.4", - vad_model = path_vad, - vad_model_revision = "v2.0.4", - punc_model = path_punc, - punc_model_revision = "v2.0.4", -) def only_asr(input_file): try: @@ -42,6 +26,7 @@ def execute_asr(input_folder, output_folder, model_size, language): for file_name in tqdm(input_file_names): try: + print(file_name) file_path = os.path.join(input_folder, file_name) text = model.generate(input=file_path)[0]["text"] output.append(f"{file_path}|{output_file_name}|{language.upper()}|{text}") @@ -65,12 +50,38 @@ if __name__ == '__main__': help="Output folder to store transcriptions.") parser.add_argument("-s", "--model_size", type=str, default='large', help="Model Size of FunASR is Large") - parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh'], + parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh','yue'], help="Language of the audio files.") parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'], help="fp16 or fp32")#还没接入 cmd = parser.parse_args() + + path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch' + path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch' + path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" + path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" + vad_model_revision=punc_model_revision="v2.0.4" + + if(cmd.language=="zh"): + path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' + path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" + model_revision="v2.0.4" + else: + path_asr = 'tools/asr/models/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online' + path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online" + model_revision="master" + path_vad=path_punc=vad_model_revision=punc_model_revision=None###友情提示:粤语带VAD识别可能会有少量shape不对报错的,但是不带VAD可以.不带vad只能分阶段单独加标点。不过标点模型对粤语效果真的不行… + + model = AutoModel( + model=path_asr, + model_revision=model_revision, + vad_model=path_vad, + vad_model_revision=vad_model_revision, + punc_model=path_punc, + punc_model_revision=punc_model_revision, + ) + execute_asr( input_folder = cmd.input_folder, output_folder = cmd.output_folder,