mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Merge branch 'main' into ref_audio_selector_tool
This commit is contained in:
commit
56d6ae6b3b
@ -34,9 +34,6 @@ RUN if [ "$IMAGE_TYPE" != "elite" ]; then \
|
||||
fi
|
||||
|
||||
|
||||
# Copy the rest of the application
|
||||
COPY . /workspace
|
||||
|
||||
# Copy the rest of the application
|
||||
COPY . /workspace
|
||||
|
||||
|
@ -64,7 +64,7 @@ class Text2SemanticDataset(Dataset):
|
||||
# get dict
|
||||
self.path2 = phoneme_path # "%s/2-name2text.txt"%exp_dir#phoneme_path
|
||||
self.path3 = "%s/3-bert" % (
|
||||
os.path.basename(phoneme_path)
|
||||
os.path.dirname(phoneme_path)
|
||||
) # "%s/3-bert"%exp_dir#bert_dir
|
||||
self.path6 = semantic_path # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
|
||||
assert os.path.exists(self.path2)
|
||||
|
@ -907,7 +907,7 @@ class SynthesizerTrn(nn.Module):
|
||||
ge = self.ref_enc(y * y_mask, y_mask)
|
||||
|
||||
with autocast(enabled=False):
|
||||
maybe_no_grad = torch.no_grad() if self.freeze_quantizer else contextlib.nullcontext
|
||||
maybe_no_grad = torch.no_grad() if self.freeze_quantizer else contextlib.nullcontext()
|
||||
with maybe_no_grad:
|
||||
if self.freeze_quantizer:
|
||||
self.ssl_proj.eval()
|
||||
|
@ -117,9 +117,12 @@ if os.path.exists(txt_path) == False:
|
||||
try:
|
||||
wav_name, spk_name, language, text = line.split("|")
|
||||
# todo.append([name,text,"zh"])
|
||||
todo.append(
|
||||
[wav_name, text, language_v1_to_language_v2.get(language, language)]
|
||||
)
|
||||
if language in language_v1_to_language_v2.keys():
|
||||
todo.append(
|
||||
[wav_name, text, language_v1_to_language_v2.get(language, language)]
|
||||
)
|
||||
else:
|
||||
print(f"\033[33m[Waring] The {language = } of {wav_name} is not supported for training.\033[0m")
|
||||
except:
|
||||
print(line, traceback.format_exc())
|
||||
|
||||
|
@ -82,7 +82,7 @@ def name2go(wav_name,wav_path):
|
||||
tensor_wav16 = tensor_wav16.to(device)
|
||||
ssl=model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu()#torch.Size([1, 768, 215])
|
||||
if np.isnan(ssl.detach().numpy()).sum()!= 0:
|
||||
nan_fails.append(wav_name)
|
||||
nan_fails.append((wav_name,wav_path))
|
||||
print("nan filtered:%s"%wav_name)
|
||||
return
|
||||
wavfile.write(
|
||||
@ -90,7 +90,7 @@ def name2go(wav_name,wav_path):
|
||||
32000,
|
||||
tmp_audio32.astype("int16"),
|
||||
)
|
||||
my_save(ssl,hubert_path )
|
||||
my_save(ssl,hubert_path)
|
||||
|
||||
with open(inp_text,"r",encoding="utf8")as f:
|
||||
lines=f.read().strip("\n").split("\n")
|
||||
@ -113,8 +113,8 @@ for line in lines[int(i_part)::int(all_parts)]:
|
||||
if(len(nan_fails)>0 and is_half==True):
|
||||
is_half=False
|
||||
model=model.float()
|
||||
for wav_name in nan_fails:
|
||||
for wav in nan_fails:
|
||||
try:
|
||||
name2go(wav_name)
|
||||
name2go(wav[0],wav[1])
|
||||
except:
|
||||
print(wav_name,traceback.format_exc())
|
||||
|
@ -320,7 +320,7 @@ class en_G2p(G2p):
|
||||
|
||||
# 尝试分离所有格
|
||||
if re.match(r"^([a-z]+)('s)$", word):
|
||||
phones = self.qryword(word[:-2])
|
||||
phones = self.qryword(word[:-2])[:]
|
||||
# P T K F TH HH 无声辅音结尾 's 发 ['S']
|
||||
if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']:
|
||||
phones.extend(['S'])
|
||||
@ -359,4 +359,4 @@ def g2p(text):
|
||||
if __name__ == "__main__":
|
||||
print(g2p("hello"))
|
||||
print(g2p(text_normalize("e.g. I used openai's AI tool to draw a picture.")))
|
||||
print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder.")))
|
||||
print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder.")))
|
||||
|
13
README.md
13
README.md
@ -12,8 +12,7 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
|
||||
[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
|
||||
[](https://discord.gg/dnrgs5GHfG)
|
||||
|
||||
|
||||
[**English**](./README.md) | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md)
|
||||
**English** | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) | [**Türkçe**](./docs/tr/README.md)
|
||||
|
||||
</div>
|
||||
|
||||
@ -52,11 +51,11 @@ _Note: numba==0.56.4 requires py<3.11_
|
||||
|
||||
### Windows
|
||||
|
||||
If you are a Windows user (tested with win>=10), you can directly download the [pre-packaged distribution](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI.
|
||||
If you are a Windows user (tested with win>=10), you can download [the 0206fix3 packedge](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) or [the 0217fix2 packedge](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI.
|
||||
|
||||
Users in China region can download [the 0217 package](https://www.icloud.com.cn/iclouddrive/061bfkcVJcBfsMfLF5R2XKdTQ#GPT-SoVITS-beta0217) or [the 0306fix2 package](https://www.icloud.com.cn/iclouddrive/09aaTLf96aa92dbLe0fPNM5CQ#GPT-SoVITS-beta0306fix2) by clicking the links and then selecting "Download a copy."
|
||||
Users in China region can download [the 0206fix3 package](https://www.icloud.com.cn/iclouddrive/075NNKIRC2zqnWn-9rhD63WGA#GPT-SoVITS-beta0206fix3) or [the 0217fix2 package](https://www.icloud.com.cn/iclouddrive/091QHaIbZMDZYQg7IX3g2kCqg#GPT-SoVITS-beta0217fix2) by clicking the links and then selecting "Download a copy." (Log out if you encounter errors while downloading.)
|
||||
|
||||
_Note: The 0306fix2 version doubles the inference speed and fixes all issues with the no reference text mode._
|
||||
_Note: The inference speed of version 0206 is faster, while the inference quality of the new 0217 version is better. You can choose according to your needs._
|
||||
|
||||
### Linux
|
||||
|
||||
@ -198,7 +197,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
|
||||
- [ ] better sovits base model (enhanced audio quality)
|
||||
- [ ] model mix
|
||||
|
||||
## (Optional) If you need, here will provide the command line operation mode
|
||||
## (Additional) Method for running from the command line
|
||||
Use the command line to open the WebUI for UVR5
|
||||
```
|
||||
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
|
||||
@ -233,7 +232,7 @@ A custom list save path is enabled
|
||||
|
||||
Special thanks to the following projects and contributors:
|
||||
|
||||
### Theoretical
|
||||
### Theoretical Research
|
||||
- [ar-vits](https://github.com/innnky/ar-vits)
|
||||
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
|
||||
- [vits](https://github.com/jaywalnut310/vits)
|
||||
|
@ -147,10 +147,33 @@
|
||||
|
||||
5-修改is_half的判断使在Mac上能正常CPU推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/573
|
||||
|
||||
### 202403/202404/202405更新
|
||||
|
||||
2个重点
|
||||
|
||||
1-修复sovits训练未冻结vq的问题(可能造成效果下降)
|
||||
|
||||
2-增加一个快速推理分支
|
||||
|
||||
以下都是小修补
|
||||
|
||||
1-修复无参考文本模式问题
|
||||
|
||||
2-优化中英文文本前端
|
||||
|
||||
3-api格式优化
|
||||
|
||||
4-cmd格式问题修复
|
||||
|
||||
5-训练数据处理阶段不支持的语言提示报错
|
||||
|
||||
6-nan自动转fp32阶段的hubert提取bug修复
|
||||
|
||||
todolist:
|
||||
|
||||
1-中文多音字推理优化(有没有人来测试的,欢迎把测试结果写在pr评论区里) https://github.com/RVC-Boss/GPT-SoVITS/pull/488
|
||||
|
||||
(v2底模训练已经合了,下个版本发布就要合了)
|
||||
|
||||
2-正在尝试解决低音质参考音频导致音质较差的问题,v2再试试如果能解决就发了,节点暂定高考后吧
|
||||
|
||||
|
||||
|
@ -10,8 +10,9 @@
|
||||
[](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
|
||||
[](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
|
||||
[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
|
||||
[](https://discord.gg/dnrgs5GHfG)
|
||||
|
||||
[**English**](../../README.md) | [**中文简体**](./README.md) | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md)
|
||||
[**English**](../../README.md) | **中文简体** | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
|
||||
|
||||
</div>
|
||||
|
||||
@ -50,11 +51,11 @@ _注: numba==0.56.4 需要 python<3.11_
|
||||
|
||||
### Windows
|
||||
|
||||
如果你是 Windows 用户(已在 win>=10 上测试),可以直接下载[预打包文件](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true),解压后双击 go-webui.bat 即可启动 GPT-SoVITS-WebUI。
|
||||
如果你是 Windows 用户(已在 win>=10 上测试),可以下载[0206fix3 整合包](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true)或[0217fix2 整合包](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true),解压后双击 go-webui.bat 即可启动 GPT-SoVITS-WebUI。
|
||||
|
||||
中国地区用户可以通过点击链接并选择“下载副本”来下载[0217版本包](https://www.icloud.com.cn/iclouddrive/061bfkcVJcBfsMfLF5R2XKdTQ#GPT-SoVITS-beta0217)或[0306fix2版本包](https://www.icloud.com.cn/iclouddrive/09aaTLf96aa92dbLe0fPNM5CQ#GPT-SoVITS-beta0306fix2)。
|
||||
中国地区用户可以通过点击链接并选择“下载副本”来下载[0206fix3 整合包](https://www.icloud.com.cn/iclouddrive/075NNKIRC2zqnWn-9rhD63WGA#GPT-SoVITS-beta0206fix3)或[0217fix2 整合包](https://www.icloud.com.cn/iclouddrive/091QHaIbZMDZYQg7IX3g2kCqg#GPT-SoVITS-beta0217fix2)。(如果下载时遇到错误,请退出登录)
|
||||
|
||||
_注:0306fix2版本推理速度翻倍,节约生命。修复了无参考文本模式的所有问题。_
|
||||
_注:0206版本的推理速度更快,0217新版的推理效果更好,可按需选择_
|
||||
|
||||
### Linux
|
||||
|
||||
@ -148,7 +149,7 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
|
||||
|
||||
对于中文自动语音识别(附加),从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型,并将它们放置在 `tools/asr/models` 中。
|
||||
|
||||
对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。
|
||||
对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。
|
||||
|
||||
中国地区用户可以通过以下链接下载:
|
||||
- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3)(点击“下载副本”)
|
||||
@ -184,7 +185,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
|
||||
- [ ] 用户指南。
|
||||
- [x] 日语和英语数据集微调训练。
|
||||
|
||||
- [ ] **Features:**
|
||||
- [ ] **功能:**
|
||||
- [ ] 零样本声音转换(5 秒)/ 少样本声音转换(1 分钟)。
|
||||
- [ ] TTS 语速控制。
|
||||
- [ ] 增强的 TTS 情感控制。
|
||||
@ -196,7 +197,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
|
||||
- [ ] 更好的 sovits 基础模型(增强的音频质量)。
|
||||
- [ ] 模型混合。
|
||||
|
||||
## (可选)命令行的操作方式
|
||||
## (附加)命令行运行方式
|
||||
使用命令行打开UVR5的WebUI
|
||||
````
|
||||
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
|
||||
@ -226,24 +227,33 @@ python tools/asr/funasr_asr.py -i <input> -o <output>
|
||||
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language>
|
||||
````
|
||||
启用自定义列表保存路径
|
||||
|
||||
## 致谢
|
||||
|
||||
特别感谢以下项目和贡献者:
|
||||
|
||||
### 理论研究
|
||||
- [ar-vits](https://github.com/innnky/ar-vits)
|
||||
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
|
||||
- [vits](https://github.com/jaywalnut310/vits)
|
||||
- [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556)
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [contentvec](https://github.com/auspicious3000/contentvec/)
|
||||
- [hifi-gan](https://github.com/jik876/hifi-gan)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
|
||||
### 预训练模型
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
### 推理用文本前端
|
||||
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
|
||||
- [LangSegment](https://github.com/juntaosun/LangSegment)
|
||||
### WebUI 工具
|
||||
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
|
||||
- [audio-slicer](https://github.com/openvpi/audio-slicer)
|
||||
- [SubFix](https://github.com/cronrpc/SubFix)
|
||||
- [FFmpeg](https://github.com/FFmpeg/FFmpeg)
|
||||
- [gradio](https://github.com/gradio-app/gradio)
|
||||
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
|
||||
- [FunASR](https://github.com/alibaba-damo-academy/FunASR)
|
||||
|
||||
## 感谢所有贡献者的努力
|
||||
|
||||
|
@ -10,8 +10,9 @@
|
||||
[](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
|
||||
[](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
|
||||
[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
|
||||
[](https://discord.gg/dnrgs5GHfG)
|
||||
|
||||
[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](./README.md) | [**한국어**](../ko/README.md)
|
||||
[**English**](../../README.md) | [**中文简体**](../cn/README.md) | **日本語** | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
|
||||
|
||||
</div>
|
||||
|
||||
@ -33,6 +34,8 @@
|
||||
|
||||
https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
|
||||
|
||||
**ユーザーマニュアル: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
|
||||
|
||||
## インストール
|
||||
|
||||
### テスト済みの環境
|
||||
@ -46,7 +49,9 @@ _注記: numba==0.56.4 は py<3.11 が必要です_
|
||||
|
||||
### Windows
|
||||
|
||||
Windows ユーザーの場合(win>=10 でテスト済み)、[事前にパッケージ化されたディストリビューション](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true)を直接ダウンロードし、_go-webui.bat_ をダブルクリックして GPT-SoVITS-WebUI を起動することができます。
|
||||
Windows ユーザーの場合(win>=10 でテスト済み)、[0206fix3 パッケージ](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) または [0217fix2 パッケージ](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true) をダウンロードして、解凍後に _go-webui.bat_ をダブルクリックするだけで GPT-SoVITS-WebUI を起動できます。
|
||||
|
||||
_注:0206バージョンの推論速度は速いですが、0217の新バージョンの推論品質は優れています。必要に応じて選択してください。_
|
||||
|
||||
### Linux
|
||||
|
||||
@ -174,7 +179,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
|
||||
- [ ] より良い sovits ベースモデル(音質向上)
|
||||
- [ ] モデルミックス
|
||||
|
||||
## (オプション) 必要に応じて、コマンドライン操作モードが提供されます。
|
||||
## (追加の) コマンドラインから実行する方法
|
||||
コマンド ラインを使用して UVR5 の WebUI を開きます
|
||||
```
|
||||
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
|
||||
@ -204,24 +209,33 @@ ASR処理はFaster_Whisperを通じて実行されます(中国語を除くASR
|
||||
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language>
|
||||
```
|
||||
カスタムリストの保存パスが有効になっています
|
||||
|
||||
## クレジット
|
||||
|
||||
以下のプロジェクトとコントリビューターに感謝します:
|
||||
特に以下のプロジェクトと貢献者に感謝します:
|
||||
|
||||
### 理論研究
|
||||
- [ar-vits](https://github.com/innnky/ar-vits)
|
||||
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
|
||||
- [vits](https://github.com/jaywalnut310/vits)
|
||||
- [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556)
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [contentvec](https://github.com/auspicious3000/contentvec/)
|
||||
- [hifi-gan](https://github.com/jik876/hifi-gan)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
|
||||
### 事前学習モデル
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
### 推論用テキストフロントエンド
|
||||
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
|
||||
- [LangSegment](https://github.com/juntaosun/LangSegment)
|
||||
### WebUI ツール
|
||||
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
|
||||
- [audio-slicer](https://github.com/openvpi/audio-slicer)
|
||||
- [SubFix](https://github.com/cronrpc/SubFix)
|
||||
- [FFmpeg](https://github.com/FFmpeg/FFmpeg)
|
||||
- [gradio](https://github.com/gradio-app/gradio)
|
||||
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
|
||||
- [FunASR](https://github.com/alibaba-damo-academy/FunASR)
|
||||
|
||||
## すべてのコントリビューターに感謝します
|
||||
|
||||
|
@ -10,8 +10,9 @@
|
||||
[](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
|
||||
[](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
|
||||
[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
|
||||
[](https://discord.gg/dnrgs5GHfG)
|
||||
|
||||
[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | [**한국어**](./README.md)
|
||||
[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | **한국어** | [**Türkçe**](../tr/README.md)
|
||||
|
||||
</div>
|
||||
|
||||
@ -33,6 +34,8 @@
|
||||
|
||||
https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
|
||||
|
||||
**사용자 설명서: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
|
||||
|
||||
## 설치
|
||||
|
||||
### 테스트 통과 환경
|
||||
@ -46,7 +49,9 @@ _참고: numba==0.56.4 는 python<3.11 을 필요로 합니다._
|
||||
|
||||
### Windows
|
||||
|
||||
Windows 사용자이며 (win>=10에서 테스트 완료) [미리 패키지된 배포판](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true)을 직접 다운로드하여 _go-webui.bat_을 더블클릭하면 GPT-SoVITS-WebUI를 시작할 수 있습니다.
|
||||
Windows 사용자라면 (win>=10에서 테스트됨), [0206fix3 패키지](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) 또는 [0217fix2 패키지](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true)를 다운로드하고 압축을 풀어 _go-webui.bat_ 파일을 더블 클릭하면 GPT-SoVITS-WebUI를 시작할 수 있습니다.
|
||||
|
||||
_참고: 0206 버전은 추론 속도가 더 빠르지만, 0217 새 버전은 추론 품질이 더 좋습니다. 필요에 따라 선택할 수 있습니다._
|
||||
|
||||
### Linux
|
||||
|
||||
@ -178,7 +183,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
|
||||
- [ ] 더 나은 sovits 기본 모델 (향상된 오디오 품질).
|
||||
- [ ] 모델 블렌딩.
|
||||
|
||||
## (선택 사항) 필요한 경우 여기에서 명령줄 작업 모드를 제공합니다.
|
||||
## (추가적인) 명령줄에서 실행하는 방법
|
||||
명령줄을 사용하여 UVR5용 WebUI 열기
|
||||
```
|
||||
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
|
||||
@ -208,24 +213,34 @@ ASR 처리는 Faster_Whisper(중국어를 제외한 ASR 마킹)를 통해 수행
|
||||
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language>
|
||||
```
|
||||
사용자 정의 목록 저장 경로가 활성화되었습니다.
|
||||
|
||||
## 감사의 말
|
||||
|
||||
특별히 다음 프로젝트와 기여자에게 감사드립니다:
|
||||
다음 프로젝트와 기여자들에게 특별히 감사드립니다:
|
||||
|
||||
### 이론 연구
|
||||
- [ar-vits](https://github.com/innnky/ar-vits)
|
||||
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
|
||||
- [vits](https://github.com/jaywalnut310/vits)
|
||||
- [TransferTTS](https://github.com/hcy71o/TransferTTS/blob/master/models.py#L556)
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [contentvec](https://github.com/auspicious3000/contentvec/)
|
||||
- [hifi-gan](https://github.com/jik876/hifi-gan)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
|
||||
### 사전 학습 모델
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
### 추론용 텍스트 프론트엔드
|
||||
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
|
||||
- [LangSegment](https://github.com/juntaosun/LangSegment)
|
||||
### WebUI 도구
|
||||
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
|
||||
- [audio-slicer](https://github.com/openvpi/audio-slicer)
|
||||
- [SubFix](https://github.com/cronrpc/SubFix)
|
||||
- [FFmpeg](https://github.com/FFmpeg/FFmpeg)
|
||||
- [gradio](https://github.com/gradio-app/gradio)
|
||||
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
|
||||
- [FunASR](https://github.com/alibaba-damo-academy/FunASR)
|
||||
|
||||
|
||||
## 모든 기여자들에게 감사드립니다 ;)
|
||||
|
||||
|
@ -12,8 +12,7 @@ Güçlü Birkaç Örnekli Ses Dönüştürme ve Metinden Konuşmaya Web Arayüz
|
||||
[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
|
||||
[](https://discord.gg/dnrgs5GHfG)
|
||||
|
||||
|
||||
[**İngilizce**](./README.md) | [**Çince (Basitleştirilmiş)**](./docs/cn/README.md) | [**Japonca**](./docs/ja/README.md) | [**Korece**](./docs/ko/README.md)
|
||||
[**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | **Türkçe**
|
||||
|
||||
</div>
|
||||
|
||||
@ -35,12 +34,10 @@ Görünmeyen konuşmacılar birkaç örnekli ince ayar demosu:
|
||||
|
||||
https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
|
||||
|
||||
**Kullanıcı kılavuzu: [Basitleştirilmiş Çince](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [İngilizce](https://rentry.co/GPT-SoVITS-guide#/)**
|
||||
**Kullanıcı Kılavuzu: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
|
||||
|
||||
## Kurulum
|
||||
|
||||
Çin bölgesindeki kullanıcılar için, tam işlevselliği çevrimiçi olarak deneyimlemek üzere AutoDL Bulut Docker'ı kullanmak için [buraya tıklayabilirsiniz](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official).
|
||||
|
||||
### Test Edilmiş Ortamlar
|
||||
|
||||
- Python 3.9, PyTorch 2.0.1, CUDA 11
|
||||
@ -52,11 +49,9 @@ _Not: numba==0.56.4, py<3.11 gerektirir_
|
||||
|
||||
### Windows
|
||||
|
||||
Bir Windows kullanıcısıysanız (win>=10 ile test edilmiştir), [önceden paketlenmiş dağıtımı](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) indirebilir ve GPT-SoVITS-WebUI'yi başlatmak için _go-webui.bat_ dosyasını çift tıklayabilirsiniz.
|
||||
Eğer bir Windows kullanıcısıysanız (win>=10 ile test edilmiştir), [0206fix3 paketini](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta-fast-inference-branch.7z?download=true) veya [0217fix2 paketini](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta0217fix2.7z?download=true) indirip _go-webui.bat_ dosyasına çift tıklayarak GPT-SoVITS-WebUI'yi başlatabilirsiniz.
|
||||
|
||||
Çin bölgesindeki kullanıcılar, aşağıdaki bağlantılara tıklayıp "Bir kopya indir"i seçerek [0217 paketini](https://www.icloud.com.cn/iclouddrive/061bfkcVJcBfsMfLF5R2XKdTQ#GPT-SoVITS-beta0217) veya [0306fix2 paketini](https://www.icloud.com.cn/iclouddrive/09aaTLf96aa92dbLe0fPNM5CQ#GPT-SoVITS-beta0306fix2) indirebilirler.
|
||||
|
||||
_Not: 0306fix2 sürümü çıkarım hızını iki katına çıkarır ve referans metni olmayan moddaki tüm sorunları giderir._
|
||||
_Not: 0206 sürümünün çıkarım hızı daha hızlıdır, 0217 yeni sürümünün çıkarım kalitesi ise daha iyidir. İhtiyacınıza göre seçim yapabilirsiniz._
|
||||
|
||||
### Linux
|
||||
|
||||
@ -198,7 +193,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
|
||||
- [ ] daha iyi sovits temel modeli (geliştirilmiş ses kalitesi)
|
||||
- [ ] model karışımı
|
||||
|
||||
## (İsteğe Bağlı) İhtiyacınız varsa, burada komut satırı işlem modu sağlanacaktır
|
||||
## (Ekstra) Komut satırından çalıştırma yöntemi
|
||||
UVR5 için Web Arayüzünü açmak için komut satırını kullanın
|
||||
```
|
||||
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
|
||||
@ -229,11 +224,11 @@ python ./tools/asr/fasterwhisper_asr.py -i <girdi> -o <çıktı> -l <dil>
|
||||
```
|
||||
Özel bir liste kaydetme yolu etkinleştirildi
|
||||
|
||||
## Teşekkürler
|
||||
## Katkı Verenler
|
||||
|
||||
Aşağıdaki projeler ve katkıda bulunanlara özel teşekkürler:
|
||||
Özellikle aşağıdaki projelere ve katkıda bulunanlara teşekkür ederiz:
|
||||
|
||||
### Teorik
|
||||
### Teorik Araştırma
|
||||
- [ar-vits](https://github.com/innnky/ar-vits)
|
||||
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
|
||||
- [vits](https://github.com/jaywalnut310/vits)
|
||||
@ -242,12 +237,12 @@ Aşağıdaki projeler ve katkıda bulunanlara özel teşekkürler:
|
||||
- [hifi-gan](https://github.com/jik876/hifi-gan)
|
||||
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
|
||||
### Önceden Eğitilmiş Modeller
|
||||
- [Çince Konuşma Ön Eğitimi](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [Çince-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
### Çıkarım için Metin Ön Ucu
|
||||
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
|
||||
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
|
||||
### Tahmin İçin Metin Ön Ucu
|
||||
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
|
||||
- [LangSegment](https://github.com/juntaosun/LangSegment)
|
||||
### Web Arayüzü Araçları
|
||||
### WebUI Araçları
|
||||
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
|
||||
- [audio-slicer](https://github.com/openvpi/audio-slicer)
|
||||
- [SubFix](https://github.com/cronrpc/SubFix)
|
||||
|
7
webui.py
7
webui.py
@ -418,7 +418,10 @@ def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir):
|
||||
with open(path_text, "w", encoding="utf8") as f:
|
||||
f.write("\n".join(opt) + "\n")
|
||||
ps1a=[]
|
||||
yield "文本进程结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False}
|
||||
if len("".join(opt)) > 0:
|
||||
yield "文本进程成功", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
|
||||
else:
|
||||
yield "文本进程失败", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
|
||||
else:
|
||||
yield "已有正在进行的文本任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
|
||||
|
||||
@ -583,7 +586,7 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
|
||||
os.remove(txt_path)
|
||||
with open(path_text, "w",encoding="utf8") as f:
|
||||
f.write("\n".join(opt) + "\n")
|
||||
|
||||
assert len("".join(opt)) > 0, "1Aa-文本获取进程失败"
|
||||
yield "进度:1a-done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
|
||||
ps1abc=[]
|
||||
#############################1b
|
||||
|
Loading…
x
Reference in New Issue
Block a user