diff --git a/README.md b/README.md index d8f67e2f..79387d5c 100644 --- a/README.md +++ b/README.md @@ -147,15 +147,7 @@ Users in China region can download these two models by entering the links below - [UVR5 Weights](https://www.icloud.com.cn/iclouddrive/0bekRKDiJXboFhbfm3lM2fVbA#UVR5_Weights) -For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`. - -For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint. - -Users in China region can download this model by entering the links below - -- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3) (clicking "Download a copy") - -- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3) (HuggingFace mirror site) +For Multilingual ASR, download models from [FunAudioLLM/SenseVoiceSmall](https://huggingface.co/FunAudioLLM/SenseVoiceSmall/tree/main) or [iic/SenseVoiceSmall](https://modelscope.cn/models/iic/SenseVoiceSmall/files) and place them in `tools/asr/models`. ## Dataset Format @@ -216,16 +208,11 @@ python audio_slicer.py \ --min_interval --hop_size ``` -This is how dataset ASR processing is done using the command line(Only Chinese) +This is how dataset ASR processing is done using the command line ``` -python tools/asr/funasr_asr.py -i -o +python tools/asr/sensevoice.py -i -o -l -d ``` -ASR processing is performed through Faster_Whisper(ASR marking except Chinese) -(No progress bars, GPU performance may cause time delays) -``` -python ./tools/asr/fasterwhisper_asr.py -i -o -l -``` A custom list save path is enabled ## Credits @@ -252,8 +239,7 @@ Special thanks to the following projects and contributors: - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) -- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) -- [FunASR](https://github.com/alibaba-damo-academy/FunASR) +- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) ## Thanks to all contributors for their efforts diff --git a/docs/cn/README.md b/docs/cn/README.md index dcca243e..642bab80 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -147,14 +147,8 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker - [UVR5 Weights](https://www.icloud.com.cn/iclouddrive/0bekRKDiJXboFhbfm3lM2fVbA#UVR5_Weights) -对于中文自动语音识别(附加),从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型,并将它们放置在 `tools/asr/models` 中。 +对于多语言自动语音识别(附加),从 [FunAudioLLM/SenseVoiceSmall](https://huggingface.co/FunAudioLLM/SenseVoiceSmall/tree/main) 或 [iic/SenseVoiceSmall](https://modelscope.cn/models/iic/SenseVoiceSmall/files) 下载模型,并将它们放置在 `tools/asr/models` 中。 -对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。 - -中国地区用户可以通过以下链接下载: -- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3)(点击“下载副本”) - -- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3)(Hugging Face镜像站) ## 数据集格式 @@ -216,16 +210,11 @@ python audio_slicer.py \ --min_interval --hop_size ```` -这是使用命令行完成数据集ASR处理的方式(仅限中文) +这是使用命令行完成数据集ASR处理的方式 ```` -python tools/asr/funasr_asr.py -i -o +python tools/asr/sensevoice.py -i -o -l -d ```` -通过Faster_Whisper进行ASR处理(除中文之外的ASR标记) -(没有进度条,GPU性能可能会导致时间延迟) -```` -python ./tools/asr/fasterwhisper_asr.py -i -o -l -```` 启用自定义列表保存路径 ## 致谢 @@ -252,8 +241,7 @@ python ./tools/asr/fasterwhisper_asr.py -i -o -l - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) -- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) -- [FunASR](https://github.com/alibaba-damo-academy/FunASR) +- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) ## 感谢所有贡献者的努力 diff --git a/requirements.txt b/requirements.txt index bf2e28a8..da600c58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ gradio_client==0.8.1 ffmpeg-python onnxruntime tqdm -funasr==1.0.0 +funasr=1.1.0 cn2an pypinyin pyopenjtalk @@ -24,5 +24,4 @@ psutil jieba_fast jieba LangSegment>=0.2.0 -Faster_Whisper wordsegment \ No newline at end of file diff --git a/webui.py b/webui.py index 0881e73f..0836e3d7 100644 --- a/webui.py +++ b/webui.py @@ -202,16 +202,17 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): cmd += f' -i "{asr_inp_dir}"' cmd += f' -o "{asr_opt_dir}"' cmd += f' -l {asr_lang}' - - - yield "ASR任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} + output_file_name = os.path.basename(asr_inp_dir) + output_folder = asr_opt_dir or "output/asr_opt" + output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list') + yield "ASR任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"} print(cmd) p_asr = Popen(cmd, shell=True) p_asr.wait() p_asr=None - yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False},{"__type__":"update","value":output_file_path} else: - yield "已有正在进行的ASR任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} + yield "已有正在进行的ASR任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"} # return None def close_asr(): @@ -732,7 +733,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: label = i18n("ASR 模型"), choices = ['SenseVoice'], interactive = True, - value="enseVoice" + value="SenseVoice" ) asr_size = gr.Dropdown( label = i18n("ASR 模型尺寸"), @@ -761,7 +762,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: label_info = gr.Textbox(label=i18n("打标工具进程输出信息")) if_label.change(change_label, [if_label,path_list], [label_info]) if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info]) - open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button]) + open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button,path_list]) close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button]) close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) @@ -869,3 +870,26 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: server_port=webui_port_main, quiet=True, ) + + + +def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision): + global p_asr + if(p_asr==None): + asr_inp_dir=my_utils.clean_path(asr_inp_dir) + cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}' + cmd += f' -i "{asr_inp_dir}"' + cmd += f' -o "{asr_opt_dir}"' + cmd += f' -s {asr_model_size}' + cmd += f' -l {asr_lang}' + cmd += f" -p {asr_precision}" + + yield "ASR任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"} + print(cmd) + p_asr = Popen(cmd, shell=True) + p_asr.wait() + p_asr=None + yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False}, + else: + yield "已有正在进行的ASR任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"} + # return None \ No newline at end of file