diff --git a/Dockerfile b/Dockerfile index 74e282c..80cd9f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,9 +34,6 @@ RUN if [ "$IMAGE_TYPE" != "elite" ]; then \ fi -# Copy the rest of the application -COPY . /workspace - # Copy the rest of the application COPY . /workspace diff --git a/GPT_SoVITS/AR/data/dataset.py b/GPT_SoVITS/AR/data/dataset.py index 1a2ffef..54b9278 100644 --- a/GPT_SoVITS/AR/data/dataset.py +++ b/GPT_SoVITS/AR/data/dataset.py @@ -64,7 +64,7 @@ class Text2SemanticDataset(Dataset): # get dict self.path2 = phoneme_path # "%s/2-name2text.txt"%exp_dir#phoneme_path self.path3 = "%s/3-bert" % ( - os.path.basename(phoneme_path) + os.path.dirname(phoneme_path) ) # "%s/3-bert"%exp_dir#bert_dir self.path6 = semantic_path # "%s/6-name2semantic.tsv"%exp_dir#semantic_path assert os.path.exists(self.path2) diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py index 0059033..58a21ee 100644 --- a/GPT_SoVITS/module/models.py +++ b/GPT_SoVITS/module/models.py @@ -907,7 +907,7 @@ class SynthesizerTrn(nn.Module): ge = self.ref_enc(y * y_mask, y_mask) with autocast(enabled=False): - maybe_no_grad = torch.no_grad() if self.freeze_quantizer else contextlib.nullcontext + maybe_no_grad = torch.no_grad() if self.freeze_quantizer else contextlib.nullcontext() with maybe_no_grad: if self.freeze_quantizer: self.ssl_proj.eval() diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index b241382..e01a63b 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -117,9 +117,12 @@ if os.path.exists(txt_path) == False: try: wav_name, spk_name, language, text = line.split("|") # todo.append([name,text,"zh"]) - todo.append( - [wav_name, text, language_v1_to_language_v2.get(language, language)] - ) + if language in language_v1_to_language_v2.keys(): + todo.append( + [wav_name, text, language_v1_to_language_v2.get(language, language)] + ) + else: + print(f"\033[33m[Waring] The {language = } of {wav_name} is not supported for training.\033[0m") except: print(line, traceback.format_exc()) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 9a2f73c..61c933a 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -82,7 +82,7 @@ def name2go(wav_name,wav_path): tensor_wav16 = tensor_wav16.to(device) ssl=model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu()#torch.Size([1, 768, 215]) if np.isnan(ssl.detach().numpy()).sum()!= 0: - nan_fails.append(wav_name) + nan_fails.append((wav_name,wav_path)) print("nan filtered:%s"%wav_name) return wavfile.write( @@ -90,7 +90,7 @@ def name2go(wav_name,wav_path): 32000, tmp_audio32.astype("int16"), ) - my_save(ssl,hubert_path ) + my_save(ssl,hubert_path) with open(inp_text,"r",encoding="utf8")as f: lines=f.read().strip("\n").split("\n") @@ -113,8 +113,8 @@ for line in lines[int(i_part)::int(all_parts)]: if(len(nan_fails)>0 and is_half==True): is_half=False model=model.float() - for wav_name in nan_fails: + for wav in nan_fails: try: - name2go(wav_name) + name2go(wav[0],wav[1]) except: print(wav_name,traceback.format_exc()) diff --git a/GPT_SoVITS/text/english.py b/GPT_SoVITS/text/english.py index 68ce789..30fafb5 100644 --- a/GPT_SoVITS/text/english.py +++ b/GPT_SoVITS/text/english.py @@ -320,7 +320,7 @@ class en_G2p(G2p): # 尝试分离所有格 if re.match(r"^([a-z]+)('s)$", word): - phones = self.qryword(word[:-2]) + phones = self.qryword(word[:-2])[:] # P T K F TH HH 无声辅音结尾 's 发 ['S'] if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']: phones.extend(['S']) @@ -359,4 +359,4 @@ def g2p(text): if __name__ == "__main__": print(g2p("hello")) print(g2p(text_normalize("e.g. I used openai's AI tool to draw a picture."))) - print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder."))) \ No newline at end of file + print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder."))) diff --git a/README.md b/README.md index 1122516..d8f67e2 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,7 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.

[![Huggingface](https://img.shields.io/badge/🤗%20-Models%20Repo-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) [![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG) - -[**English**](./README.md) | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) +**English** | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) | [**Türkçe**](./docs/tr/README.md) @@ -52,11 +51,11 @@ _Note: numba==0.56.4 requires py<3.11_ ### Windows -If you are a Windows user (tested with win>=10), you can directly download the [pre-packaged distribution](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI. +If you are a Windows user (tested with win>=10), you can download [the 0206fix3 packedge](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) or [the 0217fix2 packedge](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI. -Users in China region can download [the 0217 package](https://www.icloud.com.cn/iclouddrive/061bfkcVJcBfsMfLF5R2XKdTQ#GPT-SoVITS-beta0217) or [the 0306fix2 package](https://www.icloud.com.cn/iclouddrive/09aaTLf96aa92dbLe0fPNM5CQ#GPT-SoVITS-beta0306fix2) by clicking the links and then selecting "Download a copy." +Users in China region can download [the 0206fix3 package](https://www.icloud.com.cn/iclouddrive/075NNKIRC2zqnWn-9rhD63WGA#GPT-SoVITS-beta0206fix3) or [the 0217fix2 package](https://www.icloud.com.cn/iclouddrive/091QHaIbZMDZYQg7IX3g2kCqg#GPT-SoVITS-beta0217fix2) by clicking the links and then selecting "Download a copy." (Log out if you encounter errors while downloading.) -_Note: The 0306fix2 version doubles the inference speed and fixes all issues with the no reference text mode._ +_Note: The inference speed of version 0206 is faster, while the inference quality of the new 0217 version is better. You can choose according to your needs._ ### Linux @@ -198,7 +197,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] better sovits base model (enhanced audio quality) - [ ] model mix -## (Optional) If you need, here will provide the command line operation mode +## (Additional) Method for running from the command line Use the command line to open the WebUI for UVR5 ``` python tools/uvr5/webui.py "" @@ -233,7 +232,7 @@ A custom list save path is enabled Special thanks to the following projects and contributors: -### Theoretical +### Theoretical Research - [ar-vits](https://github.com/innnky/ar-vits) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [vits](https://github.com/jaywalnut310/vits) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 625e478..36c1db4 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -147,10 +147,33 @@ 5-修改is_half的判断使在Mac上能正常CPU推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/573 +### 202403/202404/202405更新 + +2个重点 + +1-修复sovits训练未冻结vq的问题(可能造成效果下降) + +2-增加一个快速推理分支 + +以下都是小修补 + +1-修复无参考文本模式问题 + +2-优化中英文文本前端 + +3-api格式优化 + +4-cmd格式问题修复 + +5-训练数据处理阶段不支持的语言提示报错 + +6-nan自动转fp32阶段的hubert提取bug修复 todolist: 1-中文多音字推理优化(有没有人来测试的,欢迎把测试结果写在pr评论区里) https://github.com/RVC-Boss/GPT-SoVITS/pull/488 - +(v2底模训练已经合了,下个版本发布就要合了) + +2-正在尝试解决低音质参考音频导致音质较差的问题,v2再试试如果能解决就发了,节点暂定高考后吧 diff --git a/docs/cn/README.md b/docs/cn/README.md index 2c48cbc..dcca243 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -10,8 +10,9 @@ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb) [![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE) [![Huggingface](https://img.shields.io/badge/🤗%20-Models%20Repo-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) +[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG) -[**English**](../../README.md) | [**中文简体**](./README.md) | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) +[**English**](../../README.md) | **中文简体** | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md) @@ -50,11 +51,11 @@ _注: numba==0.56.4 需要 python<3.11_ ### Windows -如果你是 Windows 用户(已在 win>=10 上测试),可以直接下载[预打包文件](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true),解压后双击 go-webui.bat 即可启动 GPT-SoVITS-WebUI。 +如果你是 Windows 用户(已在 win>=10 上测试),可以下载[0206fix3 整合包](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true)或[0217fix2 整合包](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true),解压后双击 go-webui.bat 即可启动 GPT-SoVITS-WebUI。 -中国地区用户可以通过点击链接并选择“下载副本”来下载[0217版本包](https://www.icloud.com.cn/iclouddrive/061bfkcVJcBfsMfLF5R2XKdTQ#GPT-SoVITS-beta0217)或[0306fix2版本包](https://www.icloud.com.cn/iclouddrive/09aaTLf96aa92dbLe0fPNM5CQ#GPT-SoVITS-beta0306fix2)。 +中国地区用户可以通过点击链接并选择“下载副本”来下载[0206fix3 整合包](https://www.icloud.com.cn/iclouddrive/075NNKIRC2zqnWn-9rhD63WGA#GPT-SoVITS-beta0206fix3)或[0217fix2 整合包](https://www.icloud.com.cn/iclouddrive/091QHaIbZMDZYQg7IX3g2kCqg#GPT-SoVITS-beta0217fix2)。(如果下载时遇到错误,请退出登录) -_注:0306fix2版本推理速度翻倍,节约生命。修复了无参考文本模式的所有问题。_ +_注:0206版本的推理速度更快,0217新版的推理效果更好,可按需选择_ ### Linux @@ -148,7 +149,7 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker 对于中文自动语音识别(附加),从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型,并将它们放置在 `tools/asr/models` 中。 -对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。 +对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。 中国地区用户可以通过以下链接下载: - [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3)(点击“下载副本”) @@ -184,7 +185,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] 用户指南。 - [x] 日语和英语数据集微调训练。 -- [ ] **Features:** +- [ ] **功能:** - [ ] 零样本声音转换(5 秒)/ 少样本声音转换(1 分钟)。 - [ ] TTS 语速控制。 - [ ] 增强的 TTS 情感控制。 @@ -196,7 +197,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] 更好的 sovits 基础模型(增强的音频质量)。 - [ ] 模型混合。 -## (可选)命令行的操作方式 +## (附加)命令行运行方式 使用命令行打开UVR5的WebUI ```` python tools/uvr5/webui.py "" @@ -226,24 +227,33 @@ python tools/asr/funasr_asr.py -i -o python ./tools/asr/fasterwhisper_asr.py -i -o -l ```` 启用自定义列表保存路径 + ## 致谢 特别感谢以下项目和贡献者: +### 理论研究 - [ar-vits](https://github.com/innnky/ar-vits) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [vits](https://github.com/jaywalnut310/vits) - [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556) -- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [contentvec](https://github.com/auspicious3000/contentvec/) - [hifi-gan](https://github.com/jik876/hifi-gan) -- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) +### 预训练模型 +- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) +- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) +### 推理用文本前端 +- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) +- [LangSegment](https://github.com/juntaosun/LangSegment) +### WebUI 工具 - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [audio-slicer](https://github.com/openvpi/audio-slicer) - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [FunASR](https://github.com/alibaba-damo-academy/FunASR) ## 感谢所有贡献者的努力 diff --git a/docs/ja/README.md b/docs/ja/README.md index a910f94..d7cd8b5 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -10,8 +10,9 @@ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb) [![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE) [![Huggingface](https://img.shields.io/badge/🤗%20-Models%20Repo-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) +[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG) -[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](./README.md) | [**한국어**](../ko/README.md) +[**English**](../../README.md) | [**中文简体**](../cn/README.md) | **日本語** | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md) @@ -33,6 +34,8 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb +**ユーザーマニュアル: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)** + ## インストール ### テスト済みの環境 @@ -46,7 +49,9 @@ _注記: numba==0.56.4 は py<3.11 が必要です_ ### Windows -Windows ユーザーの場合(win>=10 でテスト済み)、[事前にパッケージ化されたディストリビューション](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true)を直接ダウンロードし、_go-webui.bat_ をダブルクリックして GPT-SoVITS-WebUI を起動することができます。 +Windows ユーザーの場合(win>=10 でテスト済み)、[0206fix3 パッケージ](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) または [0217fix2 パッケージ](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true) をダウンロードして、解凍後に _go-webui.bat_ をダブルクリックするだけで GPT-SoVITS-WebUI を起動できます。 + +_注:0206バージョンの推論速度は速いですが、0217の新バージョンの推論品質は優れています。必要に応じて選択してください。_ ### Linux @@ -174,7 +179,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] より良い sovits ベースモデル(音質向上) - [ ] モデルミックス -## (オプション) 必要に応じて、コマンドライン操作モードが提供されます。 +## (追加の) コマンドラインから実行する方法 コマンド ラインを使用して UVR5 の WebUI を開きます ``` python tools/uvr5/webui.py "" @@ -204,24 +209,33 @@ ASR処理はFaster_Whisperを通じて実行されます(中国語を除くASR python ./tools/asr/fasterwhisper_asr.py -i -o -l ``` カスタムリストの保存パスが有効になっています + ## クレジット -以下のプロジェクトとコントリビューターに感謝します: +特に以下のプロジェクトと貢献者に感謝します: +### 理論研究 - [ar-vits](https://github.com/innnky/ar-vits) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [vits](https://github.com/jaywalnut310/vits) - [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556) -- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [contentvec](https://github.com/auspicious3000/contentvec/) - [hifi-gan](https://github.com/jik876/hifi-gan) -- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) +### 事前学習モデル +- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) +- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) +### 推論用テキストフロントエンド +- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) +- [LangSegment](https://github.com/juntaosun/LangSegment) +### WebUI ツール - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [audio-slicer](https://github.com/openvpi/audio-slicer) - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [FunASR](https://github.com/alibaba-damo-academy/FunASR) ## すべてのコントリビューターに感謝します diff --git a/docs/ko/README.md b/docs/ko/README.md index 57696f1..9e2bd1c 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -10,8 +10,9 @@ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb) [![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE) [![Huggingface](https://img.shields.io/badge/🤗%20-Models%20Repo-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) +[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG) -[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | [**한국어**](./README.md) +[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | **한국어** | [**Türkçe**](../tr/README.md) @@ -33,6 +34,8 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb +**사용자 설명서: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)** + ## 설치 ### 테스트 통과 환경 @@ -46,7 +49,9 @@ _참고: numba==0.56.4 는 python<3.11 을 필요로 합니다._ ### Windows -Windows 사용자이며 (win>=10에서 테스트 완료) [미리 패키지된 배포판](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true)을 직접 다운로드하여 _go-webui.bat_을 더블클릭하면 GPT-SoVITS-WebUI를 시작할 수 있습니다. +Windows 사용자라면 (win>=10에서 테스트됨), [0206fix3 패키지](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) 또는 [0217fix2 패키지](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true)를 다운로드하고 압축을 풀어 _go-webui.bat_ 파일을 더블 클릭하면 GPT-SoVITS-WebUI를 시작할 수 있습니다. + +_참고: 0206 버전은 추론 속도가 더 빠르지만, 0217 새 버전은 추론 품질이 더 좋습니다. 필요에 따라 선택할 수 있습니다._ ### Linux @@ -178,7 +183,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] 더 나은 sovits 기본 모델 (향상된 오디오 품질). - [ ] 모델 블렌딩. -## (선택 사항) 필요한 경우 여기에서 명령줄 작업 모드를 제공합니다. +## (추가적인) 명령줄에서 실행하는 방법 명령줄을 사용하여 UVR5용 WebUI 열기 ``` python tools/uvr5/webui.py "" @@ -208,24 +213,34 @@ ASR 처리는 Faster_Whisper(중국어를 제외한 ASR 마킹)를 통해 수행 python ./tools/asr/fasterwhisper_asr.py -i -o -l ``` 사용자 정의 목록 저장 경로가 활성화되었습니다. + ## 감사의 말 -특별히 다음 프로젝트와 기여자에게 감사드립니다: +다음 프로젝트와 기여자들에게 특별히 감사드립니다: +### 이론 연구 - [ar-vits](https://github.com/innnky/ar-vits) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [vits](https://github.com/jaywalnut310/vits) - [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556) -- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [contentvec](https://github.com/auspicious3000/contentvec/) - [hifi-gan](https://github.com/jik876/hifi-gan) -- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) +### 사전 학습 모델 +- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) +- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) +### 추론용 텍스트 프론트엔드 +- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) +- [LangSegment](https://github.com/juntaosun/LangSegment) +### WebUI 도구 - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [audio-slicer](https://github.com/openvpi/audio-slicer) - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) +- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) +- [FunASR](https://github.com/alibaba-damo-academy/FunASR) + ## 모든 기여자들에게 감사드립니다 ;) diff --git a/docs/tr/README.md b/docs/tr/README.md index 0d54557..b9da738 100644 --- a/docs/tr/README.md +++ b/docs/tr/README.md @@ -12,8 +12,7 @@ Güçlü Birkaç Örnekli Ses Dönüştürme ve Metinden Konuşmaya Web Arayüz [![Huggingface](https://img.shields.io/badge/🤗%20-Models%20Repo-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) [![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG) - -[**İngilizce**](./README.md) | [**Çince (Basitleştirilmiş)**](./docs/cn/README.md) | [**Japonca**](./docs/ja/README.md) | [**Korece**](./docs/ko/README.md) +[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | **Türkçe** @@ -35,12 +34,10 @@ Görünmeyen konuşmacılar birkaç örnekli ince ayar demosu: https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb -**Kullanıcı kılavuzu: [Basitleştirilmiş Çince](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [İngilizce](https://rentry.co/GPT-SoVITS-guide#/)** +**Kullanıcı Kılavuzu: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)** ## Kurulum -Çin bölgesindeki kullanıcılar için, tam işlevselliği çevrimiçi olarak deneyimlemek üzere AutoDL Bulut Docker'ı kullanmak için [buraya tıklayabilirsiniz](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official). - ### Test Edilmiş Ortamlar - Python 3.9, PyTorch 2.0.1, CUDA 11 @@ -52,11 +49,9 @@ _Not: numba==0.56.4, py<3.11 gerektirir_ ### Windows -Bir Windows kullanıcısıysanız (win>=10 ile test edilmiştir), [önceden paketlenmiş dağıtımı](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) indirebilir ve GPT-SoVITS-WebUI'yi başlatmak için _go-webui.bat_ dosyasını çift tıklayabilirsiniz. +Eğer bir Windows kullanıcısıysanız (win>=10 ile test edilmiştir), [0206fix3 paketini](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) veya [0217fix2 paketini](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true) indirip _go-webui.bat_ dosyasına çift tıklayarak GPT-SoVITS-WebUI'yi başlatabilirsiniz. -Çin bölgesindeki kullanıcılar, aşağıdaki bağlantılara tıklayıp "Bir kopya indir"i seçerek [0217 paketini](https://www.icloud.com.cn/iclouddrive/061bfkcVJcBfsMfLF5R2XKdTQ#GPT-SoVITS-beta0217) veya [0306fix2 paketini](https://www.icloud.com.cn/iclouddrive/09aaTLf96aa92dbLe0fPNM5CQ#GPT-SoVITS-beta0306fix2) indirebilirler. - -_Not: 0306fix2 sürümü çıkarım hızını iki katına çıkarır ve referans metni olmayan moddaki tüm sorunları giderir._ +_Not: 0206 sürümünün çıkarım hızı daha hızlıdır, 0217 yeni sürümünün çıkarım kalitesi ise daha iyidir. İhtiyacınıza göre seçim yapabilirsiniz._ ### Linux @@ -198,7 +193,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] daha iyi sovits temel modeli (geliştirilmiş ses kalitesi) - [ ] model karışımı -## (İsteğe Bağlı) İhtiyacınız varsa, burada komut satırı işlem modu sağlanacaktır +## (Ekstra) Komut satırından çalıştırma yöntemi UVR5 için Web Arayüzünü açmak için komut satırını kullanın ``` python tools/uvr5/webui.py "" @@ -229,11 +224,11 @@ python ./tools/asr/fasterwhisper_asr.py -i -o <çıktı> -l ``` Özel bir liste kaydetme yolu etkinleştirildi -## Teşekkürler +## Katkı Verenler -Aşağıdaki projeler ve katkıda bulunanlara özel teşekkürler: +Özellikle aşağıdaki projelere ve katkıda bulunanlara teşekkür ederiz: -### Teorik +### Teorik Araştırma - [ar-vits](https://github.com/innnky/ar-vits) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [vits](https://github.com/jaywalnut310/vits) @@ -242,12 +237,12 @@ Aşağıdaki projeler ve katkıda bulunanlara özel teşekkürler: - [hifi-gan](https://github.com/jik876/hifi-gan) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) ### Önceden Eğitilmiş Modeller -- [Çince Konuşma Ön Eğitimi](https://github.com/TencentGameMate/chinese_speech_pretrain) -- [Çince-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) -### Çıkarım için Metin Ön Ucu +- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) +- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) +### Tahmin İçin Metin Ön Ucu - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) - [LangSegment](https://github.com/juntaosun/LangSegment) -### Web Arayüzü Araçları +### WebUI Araçları - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [audio-slicer](https://github.com/openvpi/audio-slicer) - [SubFix](https://github.com/cronrpc/SubFix) diff --git a/webui.py b/webui.py index e1c36e1..c71c1ca 100644 --- a/webui.py +++ b/webui.py @@ -418,7 +418,10 @@ def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir): with open(path_text, "w", encoding="utf8") as f: f.write("\n".join(opt) + "\n") ps1a=[] - yield "文本进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + if len("".join(opt)) > 0: + yield "文本进程成功", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} + else: + yield "文本进程失败", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} else: yield "已有正在进行的文本任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} @@ -583,7 +586,7 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb os.remove(txt_path) with open(path_text, "w",encoding="utf8") as f: f.write("\n".join(opt) + "\n") - + assert len("".join(opt)) > 0, "1Aa-文本获取进程失败" yield "进度:1a-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} ps1abc=[] #############################1b