diff --git a/README.md b/README.md index 6f42aa61..0b0e2d44 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,8 @@ Unseen speakers few-shot fine-tuning demo: https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb +[教程中文版](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) [User guide (EN)](https://rentry.co/GPT-SoVITS-guide#/) + ## Installation For users in China region, you can [click here](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official) to use AutoDL Cloud Docker to experience the full functionality online. @@ -173,7 +175,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] **High Priority:** - [x] Localization in Japanese and English. - - [ ] User guide. + - [x] User guide. - [x] Japanese and English dataset fine tune training. - [ ] **Features:** @@ -218,25 +220,34 @@ ASR processing is performed through Faster_Whisper(ASR marking except Chinese) python ./tools/damo_asr/WhisperASR.py -i -o -f -l ``` A custom list save path is enabled + ## Credits Special thanks to the following projects and contributors: +### Theoretical - [ar-vits](https://github.com/innnky/ar-vits) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [vits](https://github.com/jaywalnut310/vits) - [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556) -- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [contentvec](https://github.com/auspicious3000/contentvec/) - [hifi-gan](https://github.com/jik876/hifi-gan) -- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) +### Pretrained Models +- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) +- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) +### Text Frontend for Inference +- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) +- [LangSegment](https://github.com/juntaosun/LangSegment) +### WebUI Tools - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [audio-slicer](https://github.com/openvpi/audio-slicer) - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) - +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [FunASR](https://github.com/alibaba-damo-academy/FunASR) + ## Thanks to all contributors for their efforts diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 8afd3514..625e4782 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -127,7 +127,7 @@ ### 20240221更新 -1-数据处理添加语音降噪选项 +1-数据处理添加语音降噪选项(降噪为只剩16k采样率,除非底噪很大先不急着用哦。) 2-中文日文前端处理优化 https://github.com/RVC-Boss/GPT-SoVITS/pull/559 https://github.com/RVC-Boss/GPT-SoVITS/pull/556 https://github.com/RVC-Boss/GPT-SoVITS/pull/532 https://github.com/RVC-Boss/GPT-SoVITS/pull/507 https://github.com/RVC-Boss/GPT-SoVITS/pull/509 @@ -135,9 +135,22 @@ 4-colab修复不开启公网url +### 20240306更新 + +1-推理加速50%(RTX3090+pytorch2.2.1+cu11.8+win10+py39 tested)https://github.com/RVC-Boss/GPT-SoVITS/pull/672 + +2-如果用faster whisper非中文ASR不再需要先下中文funasr模型 + +3-修复uvr5去混响模型 是否混响 反的 https://github.com/RVC-Boss/GPT-SoVITS/pull/610 + +4-faster whisper如果无cuda可用自动cpu推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/675 + +5-修改is_half的判断使在Mac上能正常CPU推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/573 + + todolist: -1-中文多音字推理优化 +1-中文多音字推理优化(有没有人来测试的,欢迎把测试结果写在pr评论区里) https://github.com/RVC-Boss/GPT-SoVITS/pull/488 diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index 5f49de70..f7b31aab 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -4,12 +4,12 @@ os.environ["HF_ENDPOINT"]="https://hf-mirror.com" import traceback import requests from glob import glob +import torch from faster_whisper import WhisperModel from tqdm import tqdm from tools.asr.config import check_fw_local_models -from tools.asr.funasr_asr import only_asr os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" @@ -45,8 +45,9 @@ def execute_asr(input_folder, output_folder, model_size, language,precision): if language == 'auto': language = None #不设置语种由模型自动输出概率最高的语种 print("loading faster whisper model:",model_size,model_path) + device = 'cuda' if torch.cuda.is_available() else 'cpu' try: - model = WhisperModel(model_path, device="cuda", compute_type=precision) + model = WhisperModel(model_path, device=device, compute_type=precision) except: return print(traceback.format_exc()) output = [] @@ -68,6 +69,8 @@ def execute_asr(input_folder, output_folder, model_size, language,precision): if info.language == "zh": print("检测为中文文本,转funasr处理") + if("only_asr"not in globals()): + from tools.asr.funasr_asr import only_asr##如果用英文就不需要导入下载模型 text = only_asr(file) if text == '':