.

2026-01-15 08:31:17 +08:00 · 2024-07-10 00:35:38 +08:00 · 2024-07-10 00:35:38 +08:00 · 3cfb6edad0
commit 3cfb6edad0
parent 18af05b5f4
4 changed files with 40 additions and 43 deletions
--- a/README.md
+++ b/README.md
@ -147,15 +147,7 @@ Users in China region can download these two models by entering the links below

 - [UVR5 Weights](https://www.icloud.com.cn/iclouddrive/0bekRKDiJXboFhbfm3lM2fVbA#UVR5_Weights)

-For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
-
-For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint. 
-
-Users in China region can download this model by entering the links below
-
- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3) (clicking "Download a copy")
-
- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3) (HuggingFace mirror site)
+For Multilingual ASR, download models from [FunAudioLLM/SenseVoiceSmall](https://huggingface.co/FunAudioLLM/SenseVoiceSmall/tree/main) or [iic/SenseVoiceSmall](https://modelscope.cn/models/iic/SenseVoiceSmall/files) and place them in `tools/asr/models`.

 ## Dataset Format

@ -216,16 +208,11 @@ python audio_slicer.py \
    --min_interval <shortest_time_gap_between_adjacent_subclips> 
    --hop_size <step_size_for_computing_volume_curve>
 ```
-This is how dataset ASR processing is done using the command line(Only Chinese)
+This is how dataset ASR processing is done using the command line
 ```
-python tools/asr/funasr_asr.py -i <input> -o <output>
+python tools/asr/sensevoice.py -i <input> -o <output> -l <language> -d <device>
 ```
-ASR processing is performed through Faster_Whisper(ASR marking except Chinese)

-(No progress bars, GPU performance may cause time delays)
-```
-python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language>
-```
 A custom list save path is enabled

 ## Credits
@ -252,8 +239,7 @@ Special thanks to the following projects and contributors:
 - [SubFix](https://github.com/cronrpc/SubFix)
 - [FFmpeg](https://github.com/FFmpeg/FFmpeg)
 - [gradio](https://github.com/gradio-app/gradio)
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
- [FunASR](https://github.com/alibaba-damo-academy/FunASR)
+- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)
  
 ## Thanks to all contributors for their efforts

--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@ -147,14 +147,8 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker

 - [UVR5 Weights](https://www.icloud.com.cn/iclouddrive/0bekRKDiJXboFhbfm3lM2fVbA#UVR5_Weights)

-对于中文自动语音识别（附加），从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型，并将它们放置在 `tools/asr/models` 中。
+对于多语言自动语音识别（附加），从 [FunAudioLLM/SenseVoiceSmall](https://huggingface.co/FunAudioLLM/SenseVoiceSmall/tree/main) 或 [iic/SenseVoiceSmall](https://modelscope.cn/models/iic/SenseVoiceSmall/files) 下载模型，并将它们放置在 `tools/asr/models` 中。

-对于英语与日语自动语音识别（附加）,从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型，并将它们放置在 `tools/asr/models` 中。 此外，[其他模型](https://huggingface.co/Systran)可能具有类似效果，但占用更小的磁盘空间。
-
-中国地区用户可以通过以下链接下载：
- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3)(点击“下载副本”)
-  
- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3)(Hugging Face镜像站)


 ## 数据集格式
@ -216,16 +210,11 @@ python audio_slicer.py \
    --min_interval <shortest_time_gap_between_adjacent_subclips> 
    --hop_size <step_size_for_computing_volume_curve>
 ````
-这是使用命令行完成数据集ASR处理的方式（仅限中文）
+这是使用命令行完成数据集ASR处理的方式
 ````
-python tools/asr/funasr_asr.py -i <input> -o <output>
+python tools/asr/sensevoice.py -i <input> -o <output> -l <language> -d <device>
 ````
-通过Faster_Whisper进行ASR处理（除中文之外的ASR标记）

-（没有进度条，GPU性能可能会导致时间延迟）
-````
-python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language>
-````
 启用自定义列表保存路径

 ## 致谢
@ -252,8 +241,7 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language>
 - [SubFix](https://github.com/cronrpc/SubFix)
 - [FFmpeg](https://github.com/FFmpeg/FFmpeg)
 - [gradio](https://github.com/gradio-app/gradio)
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
- [FunASR](https://github.com/alibaba-damo-academy/FunASR)
+- [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)

 ## 感谢所有贡献者的努力

--- a/requirements.txt
+++ b/requirements.txt
@ -9,7 +9,7 @@ gradio_client==0.8.1
 ffmpeg-python
 onnxruntime
 tqdm
-funasr==1.0.0
+funasr=1.1.0
 cn2an
 pypinyin
 pyopenjtalk
@ -24,5 +24,4 @@ psutil
 jieba_fast
 jieba
 LangSegment>=0.2.0
-Faster_Whisper
 wordsegment
--- a/webui.py
+++ b/webui.py
@ -202,16 +202,17 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
        cmd += f' -i "{asr_inp_dir}"'
        cmd += f' -o "{asr_opt_dir}"'
        cmd += f' -l {asr_lang}'
-  
-
-        yield "ASR任务开启：%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True}
+        output_file_name = os.path.basename(asr_inp_dir)
+        output_folder = asr_opt_dir or "output/asr_opt"
+        output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
+        yield "ASR任务开启：%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"}
        print(cmd)
        p_asr = Popen(cmd, shell=True)
        p_asr.wait()
        p_asr=None
-        yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False}
+        yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False},{"__type__":"update","value":output_file_path}
    else:
-        yield "已有正在进行的ASR任务，需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True}
+        yield "已有正在进行的ASR任务，需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"}
        # return None

 def close_asr():
@ -732,7 +733,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
                            label       = i18n("ASR 模型"),
                            choices     = ['SenseVoice'],
                            interactive = True,
-                            value="enseVoice"
+                            value="SenseVoice"
                        )
                        asr_size = gr.Dropdown(
                            label       = i18n("ASR 模型尺寸"),
@ -761,7 +762,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
                label_info = gr.Textbox(label=i18n("打标工具进程输出信息"))
            if_label.change(change_label, [if_label,path_list], [label_info])
            if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info])
-            open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button])
+            open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button,path_list])
            close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button])
            open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button])
            close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button])
@ -869,3 +870,26 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
        server_port=webui_port_main,
        quiet=True,
    )
+
+
+
+def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision):
+    global p_asr
+    if(p_asr==None):
+        asr_inp_dir=my_utils.clean_path(asr_inp_dir)
+        cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}'
+        cmd += f' -i "{asr_inp_dir}"'
+        cmd += f' -o "{asr_opt_dir}"'
+        cmd += f' -s {asr_model_size}'
+        cmd += f' -l {asr_lang}'
+        cmd += f" -p {asr_precision}"
+
+        yield "ASR任务开启：%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"}
+        print(cmd)
+        p_asr = Popen(cmd, shell=True)
+        p_asr.wait()
+        p_asr=None
+        yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False},
+    else:
+        yield "已有正在进行的ASR任务，需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True},{"__type__":"update"}
+        # return None