mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-12-19 19:47:03 +08:00
Merge branch 'main' of https://github.com/zZxztxZz/GPT-SoVITS-VoiceLoader
This commit is contained in:
commit
9a7eabb3e5
3
Docker/damo.sha256
Normal file
3
Docker/damo.sha256
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
5bba782a5e9196166233b9ab12ba04cadff9ef9212b4ff6153ed9290ff679025 /workspace/tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb
|
||||||
|
b3be75be477f0780277f3bae0fe489f48718f585f3a6e45d7dd1fbb1a4255fc5 /workspace/tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch/model.pb
|
||||||
|
a5818bb9d933805a916eebe41eb41648f7f9caad30b4bd59d56f3ca135421916 /workspace/tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/model.pb
|
||||||
11
Docker/download.sh
Normal file
11
Docker/download.sh
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
echo "Downloading models..."
|
||||||
|
|
||||||
|
aria2c --disable-ipv6 --input-file /workspace/Docker/links.txt --dir /workspace --continue
|
||||||
|
|
||||||
|
echo "Checking SHA256..."
|
||||||
|
|
||||||
|
parallel --will-cite -a /workspace/Docker/links.sha256 "echo -n {} | sha256sum -c"
|
||||||
12
Docker/links.sha256
Normal file
12
Docker/links.sha256
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
b1c1e17e9c99547a89388f72048cd6e1b41b5a18b170e86a46dfde0324d63eb1 /workspace/GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||||
|
fc579c1db3c1e21b721001cf99d7a584214280df19b002e200b630a34fa06eb8 /workspace/GPT_SoVITS/pretrained_models/s2D488k.pth
|
||||||
|
020a014e1e01e550e510f2f61fae5e5f5b6aab40f15c22f1f12f724df507e835 /workspace/GPT_SoVITS/pretrained_models/s2G488k.pth
|
||||||
|
24164f129c66499d1346e2aa55f183250c223161ec2770c0da3d3b08cf432d3c /workspace/GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin
|
||||||
|
e53a693acc59ace251d143d068096ae0d7b79e4b1b503fa84c9dcf576448c1d8 /workspace/GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin
|
||||||
|
39796caa5db18d7f9382d8ac997ac967bfd85f7761014bb807d2543cc844ef05 /workspace/tools/uvr5/uvr5_weights/HP2_all_vocals.pth
|
||||||
|
45e6b65199e781b4a6542002699be9f19cd3d1cb7d1558bc2bfbcd84674dfe28 /workspace/tools/uvr5/uvr5_weights/HP3_all_vocals.pth
|
||||||
|
5908891829634926119720241e8573d97cbeb8277110a7512bdb0bd7563258ee /workspace/tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth
|
||||||
|
8c8fd1582f9aabc363e47af62ddb88df6cae7e064cae75bbf041a067a5e0aee2 /workspace/tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth
|
||||||
|
01376dd2a571bf3cb9cced680732726d2d732609d09216a610b0d110f133febe /workspace/tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth
|
||||||
|
56aba59db3bcdd14a14464e62f3129698ecdea62eee0f003b9360923eb3ac79e /workspace/tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth
|
||||||
|
233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80 /workspace/tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
|
||||||
34
Docker/links.txt
Normal file
34
Docker/links.txt
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# GPT-SoVITS models
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s1bert25hz-2kh-longer-epoch%3D68e-step%3D50232.ckpt
|
||||||
|
out=GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2D488k.pth
|
||||||
|
out=GPT_SoVITS/pretrained_models/s2D488k.pth
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2G488k.pth
|
||||||
|
out=GPT_SoVITS/pretrained_models/s2G488k.pth
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/config.json
|
||||||
|
out=GPT_SoVITS/pretrained_models/chinese-hubert-base/config.json
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/preprocessor_config.json
|
||||||
|
out=GPT_SoVITS/pretrained_models/chinese-hubert-base/preprocessor_config.json
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/pytorch_model.bin
|
||||||
|
out=GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/config.json
|
||||||
|
out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/config.json
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/pytorch_model.bin
|
||||||
|
out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin
|
||||||
|
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/tokenizer.json
|
||||||
|
out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/tokenizer.json
|
||||||
|
# UVR5
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth
|
||||||
|
out=tools/uvr5/uvr5_weights/HP2_all_vocals.pth
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth
|
||||||
|
out=tools/uvr5/uvr5_weights/HP3_all_vocals.pth
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth
|
||||||
|
out=tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth
|
||||||
|
out=tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth
|
||||||
|
out=tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth
|
||||||
|
out=tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth
|
||||||
|
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
|
||||||
|
out=tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
|
||||||
53
Dockerfile
Normal file
53
Dockerfile
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# Base CUDA image
|
||||||
|
FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
|
||||||
|
|
||||||
|
LABEL maintainer="breakstring@hotmail.com"
|
||||||
|
LABEL version="dev-20240123.03"
|
||||||
|
LABEL description="Docker image for GPT-SoVITS"
|
||||||
|
|
||||||
|
|
||||||
|
# Install 3rd party apps
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV TZ=Etc/UTC
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && \
|
||||||
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
git lfs install
|
||||||
|
|
||||||
|
# Copy application
|
||||||
|
WORKDIR /workspace
|
||||||
|
COPY . /workspace
|
||||||
|
|
||||||
|
# Download models
|
||||||
|
RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh
|
||||||
|
|
||||||
|
# 本应该从 requirements.txt 里面安装package,但是由于funasr和modelscope的问题,暂时先在后面手工安装依赖包吧
|
||||||
|
RUN pip install --no-cache-dir torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba psutil PyYAML
|
||||||
|
# 这里强制指定了modelscope和funasr的版本,后面damo_asr的模型让它们自己下载
|
||||||
|
RUN pip install --no-cache-dir modelscope~=1.10.0 torchaudio sentencepiece funasr~=0.8.7
|
||||||
|
|
||||||
|
# 先屏蔽掉,让容器里自己下载
|
||||||
|
# Clone damo_asr
|
||||||
|
#WORKDIR /workspace/tools/damo_asr/models
|
||||||
|
#RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \
|
||||||
|
# (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull)
|
||||||
|
#RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \
|
||||||
|
# (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull)
|
||||||
|
#RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \
|
||||||
|
# (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull)
|
||||||
|
|
||||||
|
#RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c"
|
||||||
|
|
||||||
|
#WORKDIR /workspace
|
||||||
|
|
||||||
|
EXPOSE 9870
|
||||||
|
EXPOSE 9871
|
||||||
|
EXPOSE 9872
|
||||||
|
EXPOSE 9873
|
||||||
|
EXPOSE 9874
|
||||||
|
|
||||||
|
VOLUME /workspace/output
|
||||||
|
VOLUME /workspace/logs
|
||||||
|
VOLUME /workspace/SoVITS_weights
|
||||||
|
|
||||||
|
CMD ["python", "webui.py"]
|
||||||
@ -1,4 +1,5 @@
|
|||||||
import os
|
import os,re
|
||||||
|
import pdb
|
||||||
|
|
||||||
gpt_path = os.environ.get(
|
gpt_path = os.environ.get(
|
||||||
"gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
"gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
||||||
@ -42,8 +43,6 @@ if is_half == True:
|
|||||||
else:
|
else:
|
||||||
bert_model = bert_model.to(device)
|
bert_model = bert_model.to(device)
|
||||||
|
|
||||||
|
|
||||||
# bert_model=bert_model.to(device)
|
|
||||||
def get_bert_feature(text, word2ph):
|
def get_bert_feature(text, word2ph):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
inputs = tokenizer(text, return_tensors="pt")
|
inputs = tokenizer(text, return_tensors="pt")
|
||||||
@ -57,15 +56,8 @@ def get_bert_feature(text, word2ph):
|
|||||||
repeat_feature = res[i].repeat(word2ph[i], 1)
|
repeat_feature = res[i].repeat(word2ph[i], 1)
|
||||||
phone_level_feature.append(repeat_feature)
|
phone_level_feature.append(repeat_feature)
|
||||||
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
||||||
# if(is_half==True):phone_level_feature=phone_level_feature.half()
|
|
||||||
return phone_level_feature.T
|
return phone_level_feature.T
|
||||||
|
|
||||||
|
|
||||||
n_semantic = 1024
|
|
||||||
|
|
||||||
dict_s2=torch.load(sovits_path,map_location="cpu")
|
|
||||||
hps=dict_s2["config"]
|
|
||||||
|
|
||||||
class DictToAttrRecursive(dict):
|
class DictToAttrRecursive(dict):
|
||||||
def __init__(self, input_dict):
|
def __init__(self, input_dict):
|
||||||
super().__init__(input_dict)
|
super().__init__(input_dict)
|
||||||
@ -94,40 +86,48 @@ class DictToAttrRecursive(dict):
|
|||||||
raise AttributeError(f"Attribute {item} not found")
|
raise AttributeError(f"Attribute {item} not found")
|
||||||
|
|
||||||
|
|
||||||
hps = DictToAttrRecursive(hps)
|
|
||||||
|
|
||||||
hps.model.semantic_frame_rate = "25hz"
|
|
||||||
dict_s1 = torch.load(gpt_path, map_location="cpu")
|
|
||||||
config = dict_s1["config"]
|
|
||||||
ssl_model = cnhubert.get_model()
|
ssl_model = cnhubert.get_model()
|
||||||
if is_half == True:
|
if is_half == True:
|
||||||
ssl_model = ssl_model.half().to(device)
|
ssl_model = ssl_model.half().to(device)
|
||||||
else:
|
else:
|
||||||
ssl_model = ssl_model.to(device)
|
ssl_model = ssl_model.to(device)
|
||||||
|
|
||||||
vq_model = SynthesizerTrn(
|
def change_sovits_weights(sovits_path):
|
||||||
hps.data.filter_length // 2 + 1,
|
global vq_model,hps
|
||||||
hps.train.segment_size // hps.data.hop_length,
|
dict_s2=torch.load(sovits_path,map_location="cpu")
|
||||||
n_speakers=hps.data.n_speakers,
|
hps=dict_s2["config"]
|
||||||
**hps.model
|
hps = DictToAttrRecursive(hps)
|
||||||
)
|
hps.model.semantic_frame_rate = "25hz"
|
||||||
if is_half == True:
|
vq_model = SynthesizerTrn(
|
||||||
vq_model = vq_model.half().to(device)
|
hps.data.filter_length // 2 + 1,
|
||||||
else:
|
hps.train.segment_size // hps.data.hop_length,
|
||||||
vq_model = vq_model.to(device)
|
n_speakers=hps.data.n_speakers,
|
||||||
vq_model.eval()
|
**hps.model
|
||||||
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
)
|
||||||
hz = 50
|
del vq_model.enc_q
|
||||||
max_sec = config["data"]["max_sec"]
|
if is_half == True:
|
||||||
t2s_model = Text2SemanticLightningModule(config, "ojbk", is_train=False)
|
vq_model = vq_model.half().to(device)
|
||||||
t2s_model.load_state_dict(dict_s1["weight"])
|
else:
|
||||||
if is_half == True:
|
vq_model = vq_model.to(device)
|
||||||
t2s_model = t2s_model.half()
|
vq_model.eval()
|
||||||
t2s_model = t2s_model.to(device)
|
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
||||||
t2s_model.eval()
|
change_sovits_weights(sovits_path)
|
||||||
total = sum([param.nelement() for param in t2s_model.parameters()])
|
|
||||||
print("Number of parameter: %.2fM" % (total / 1e6))
|
|
||||||
|
|
||||||
|
def change_gpt_weights(gpt_path):
|
||||||
|
global hz,max_sec,t2s_model,config
|
||||||
|
hz = 50
|
||||||
|
dict_s1 = torch.load(gpt_path, map_location="cpu")
|
||||||
|
config = dict_s1["config"]
|
||||||
|
max_sec = config["data"]["max_sec"]
|
||||||
|
t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
|
||||||
|
t2s_model.load_state_dict(dict_s1["weight"])
|
||||||
|
if is_half == True:
|
||||||
|
t2s_model = t2s_model.half()
|
||||||
|
t2s_model = t2s_model.to(device)
|
||||||
|
t2s_model.eval()
|
||||||
|
total = sum([param.nelement() for param in t2s_model.parameters()])
|
||||||
|
print("Number of parameter: %.2fM" % (total / 1e6))
|
||||||
|
change_gpt_weights(gpt_path)
|
||||||
|
|
||||||
def get_spepc(hps, filename):
|
def get_spepc(hps, filename):
|
||||||
audio = load_audio(filename, int(hps.data.sampling_rate))
|
audio = load_audio(filename, int(hps.data.sampling_rate))
|
||||||
@ -325,14 +325,46 @@ def cut3(inp):
|
|||||||
inp = inp.strip("\n")
|
inp = inp.strip("\n")
|
||||||
return "\n".join(["%s。" % item for item in inp.strip("。").split("。")])
|
return "\n".join(["%s。" % item for item in inp.strip("。").split("。")])
|
||||||
|
|
||||||
|
def custom_sort_key(s):
|
||||||
|
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
||||||
|
parts = re.split('(\d+)', s)
|
||||||
|
# 将数字部分转换为整数,非数字部分保持不变
|
||||||
|
parts = [int(part) if part.isdigit() else part for part in parts]
|
||||||
|
return parts
|
||||||
|
|
||||||
|
def change_choices():
|
||||||
|
SoVITS_names, GPT_names = get_weights_names()
|
||||||
|
return {"choices": sorted(SoVITS_names,key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names,key=custom_sort_key), "__type__": "update"}
|
||||||
|
|
||||||
|
pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth"
|
||||||
|
pretrained_gpt_name="GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
||||||
|
SoVITS_weight_root="SoVITS_weights"
|
||||||
|
GPT_weight_root="GPT_weights"
|
||||||
|
os.makedirs(SoVITS_weight_root,exist_ok=True)
|
||||||
|
os.makedirs(GPT_weight_root,exist_ok=True)
|
||||||
|
def get_weights_names():
|
||||||
|
SoVITS_names = [pretrained_sovits_name]
|
||||||
|
for name in os.listdir(SoVITS_weight_root):
|
||||||
|
if name.endswith(".pth"):SoVITS_names.append("%s/%s"%(SoVITS_weight_root,name))
|
||||||
|
GPT_names = [pretrained_gpt_name]
|
||||||
|
for name in os.listdir(GPT_weight_root):
|
||||||
|
if name.endswith(".ckpt"): GPT_names.append("%s/%s"%(GPT_weight_root,name))
|
||||||
|
return SoVITS_names,GPT_names
|
||||||
|
SoVITS_names,GPT_names = get_weights_names()
|
||||||
|
|
||||||
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||||
gr.Markdown(
|
gr.Markdown(
|
||||||
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
|
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
|
||||||
)
|
)
|
||||||
# with gr.Tabs():
|
|
||||||
# with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
|
|
||||||
with gr.Group():
|
with gr.Group():
|
||||||
|
gr.Markdown(value=i18n("模型切换"))
|
||||||
|
with gr.Row():
|
||||||
|
GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path,interactive=True)
|
||||||
|
SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path,interactive=True)
|
||||||
|
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
|
||||||
|
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
|
||||||
|
SoVITS_dropdown.change(change_sovits_weights,[SoVITS_dropdown],[])
|
||||||
|
GPT_dropdown.change(change_gpt_weights,[GPT_dropdown],[])
|
||||||
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
inp_ref = gr.Audio(label=i18n("请上传参考音频"), type="filepath")
|
inp_ref = gr.Audio(label=i18n("请上传参考音频"), type="filepath")
|
||||||
|
|||||||
@ -49,10 +49,13 @@ maxx=0.95
|
|||||||
alpha=0.5
|
alpha=0.5
|
||||||
device="cuda:0"
|
device="cuda:0"
|
||||||
model=cnhubert.get_model()
|
model=cnhubert.get_model()
|
||||||
|
# is_half=False
|
||||||
if(is_half==True):
|
if(is_half==True):
|
||||||
model=model.half().to(device)
|
model=model.half().to(device)
|
||||||
else:
|
else:
|
||||||
model = model.to(device)
|
model = model.to(device)
|
||||||
|
|
||||||
|
nan_fails=[]
|
||||||
def name2go(wav_name):
|
def name2go(wav_name):
|
||||||
hubert_path="%s/%s.pt"%(hubert_dir,wav_name)
|
hubert_path="%s/%s.pt"%(hubert_dir,wav_name)
|
||||||
if(os.path.exists(hubert_path)):return
|
if(os.path.exists(hubert_path)):return
|
||||||
@ -60,25 +63,28 @@ def name2go(wav_name):
|
|||||||
tmp_audio = load_audio(wav_path, 32000)
|
tmp_audio = load_audio(wav_path, 32000)
|
||||||
tmp_max = np.abs(tmp_audio).max()
|
tmp_max = np.abs(tmp_audio).max()
|
||||||
if tmp_max > 2.2:
|
if tmp_max > 2.2:
|
||||||
print("%s-%s-%s-filtered" % (idx0, idx1, tmp_max))
|
print("%s-filtered" % (wav_name, tmp_max))
|
||||||
return
|
return
|
||||||
tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha*32768)) + ((1 - alpha)*32768) * tmp_audio
|
tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha*32768)) + ((1 - alpha)*32768) * tmp_audio
|
||||||
|
tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha*1145.14)) + ((1 - alpha)*1145.14) * tmp_audio
|
||||||
tmp_audio = librosa.resample(
|
tmp_audio = librosa.resample(
|
||||||
tmp_audio32, orig_sr=32000, target_sr=16000
|
tmp_audio32b, orig_sr=32000, target_sr=16000
|
||||||
)
|
)#不是重采样问题
|
||||||
tensor_wav16 = torch.from_numpy(tmp_audio)
|
tensor_wav16 = torch.from_numpy(tmp_audio)
|
||||||
if (is_half == True):
|
if (is_half == True):
|
||||||
tensor_wav16=tensor_wav16.half().to(device)
|
tensor_wav16=tensor_wav16.half().to(device)
|
||||||
else:
|
else:
|
||||||
tensor_wav16 = tensor_wav16.to(device)
|
tensor_wav16 = tensor_wav16.to(device)
|
||||||
ssl=model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu()#torch.Size([1, 768, 215])
|
ssl=model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu()#torch.Size([1, 768, 215])
|
||||||
if np.isnan(ssl.detach().numpy()).sum()!= 0:return
|
if np.isnan(ssl.detach().numpy()).sum()!= 0:
|
||||||
|
nan_fails.append(wav_name)
|
||||||
|
print("nan filtered:%s"%wav_name)
|
||||||
|
return
|
||||||
wavfile.write(
|
wavfile.write(
|
||||||
"%s/%s"%(wav32dir,wav_name),
|
"%s/%s"%(wav32dir,wav_name),
|
||||||
32000,
|
32000,
|
||||||
tmp_audio32.astype("int16"),
|
tmp_audio32.astype("int16"),
|
||||||
)
|
)
|
||||||
# torch.save(ssl,hubert_path )
|
|
||||||
my_save(ssl,hubert_path )
|
my_save(ssl,hubert_path )
|
||||||
|
|
||||||
with open(inp_text,"r",encoding="utf8")as f:
|
with open(inp_text,"r",encoding="utf8")as f:
|
||||||
@ -92,3 +98,12 @@ for line in lines[int(i_part)::int(all_parts)]:
|
|||||||
name2go(wav_name)
|
name2go(wav_name)
|
||||||
except:
|
except:
|
||||||
print(line,traceback.format_exc())
|
print(line,traceback.format_exc())
|
||||||
|
|
||||||
|
if(len(nan_fails)>0 and is_half==True):
|
||||||
|
is_half=False
|
||||||
|
model=model.float()
|
||||||
|
for wav_name in nan_fails:
|
||||||
|
try:
|
||||||
|
name2go(wav_name)
|
||||||
|
except:
|
||||||
|
print(wav_name,traceback.format_exc())
|
||||||
@ -18,7 +18,7 @@ pinyin_to_symbol_map = {
|
|||||||
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
||||||
}
|
}
|
||||||
|
|
||||||
import jieba.posseg as psg
|
import jieba_fast.posseg as psg
|
||||||
|
|
||||||
|
|
||||||
rep_map = {
|
rep_map = {
|
||||||
|
|||||||
@ -14,7 +14,7 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
import jieba
|
import jieba_fast as jieba
|
||||||
from pypinyin import lazy_pinyin
|
from pypinyin import lazy_pinyin
|
||||||
from pypinyin import Style
|
from pypinyin import Style
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,7 @@ logging.getLogger("matplotlib").setLevel(logging.ERROR)
|
|||||||
|
|
||||||
MATPLOTLIB_FLAG = False
|
MATPLOTLIB_FLAG = False
|
||||||
|
|
||||||
logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
|
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||||
logger = logging
|
logger = logging
|
||||||
|
|
||||||
|
|
||||||
@ -310,13 +310,13 @@ def check_git_hash(model_dir):
|
|||||||
def get_logger(model_dir, filename="train.log"):
|
def get_logger(model_dir, filename="train.log"):
|
||||||
global logger
|
global logger
|
||||||
logger = logging.getLogger(os.path.basename(model_dir))
|
logger = logging.getLogger(os.path.basename(model_dir))
|
||||||
logger.setLevel(logging.WARNING)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
|
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
|
||||||
if not os.path.exists(model_dir):
|
if not os.path.exists(model_dir):
|
||||||
os.makedirs(model_dir)
|
os.makedirs(model_dir)
|
||||||
h = logging.FileHandler(os.path.join(model_dir, filename))
|
h = logging.FileHandler(os.path.join(model_dir, filename))
|
||||||
h.setLevel(logging.WARNING)
|
h.setLevel(logging.DEBUG)
|
||||||
h.setFormatter(formatter)
|
h.setFormatter(formatter)
|
||||||
logger.addHandler(h)
|
logger.addHandler(h)
|
||||||
return logger
|
return logger
|
||||||
|
|||||||
27
README.md
27
README.md
@ -61,7 +61,7 @@ sudo apt-get install python3.9-distutils
|
|||||||
#### Pip Packages
|
#### Pip Packages
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba
|
pip install torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba_fast
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Additional Requirements
|
#### Additional Requirements
|
||||||
@ -107,6 +107,31 @@ For Chinese ASR (additionally), download models from [Damo ASR Model](https://mo
|
|||||||
For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
|
For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
|
||||||
|
|
||||||
|
|
||||||
|
### Using Docker
|
||||||
|
|
||||||
|
#### docker-compose.yaml configuration
|
||||||
|
|
||||||
|
1. Environment Variables:
|
||||||
|
- is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
|
||||||
|
|
||||||
|
2. Volumes Configuration,The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content.
|
||||||
|
3. shm_size: The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation.
|
||||||
|
4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances.
|
||||||
|
|
||||||
|
|
||||||
|
#### Running with docker compose
|
||||||
|
```
|
||||||
|
docker compose -f "docker-compose.yaml" up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Running with docker command
|
||||||
|
|
||||||
|
As above, modify the corresponding parameters based on your actual situation, then run the following command:
|
||||||
|
```
|
||||||
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Dataset Format
|
## Dataset Format
|
||||||
|
|
||||||
The TTS annotation .list file format:
|
The TTS annotation .list file format:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import sys
|
import sys,os
|
||||||
|
|
||||||
#model type name
|
#model type name
|
||||||
MODEL_TYPE_GPT = "GPT"
|
MODEL_TYPE_GPT = "GPT"
|
||||||
@ -11,7 +11,7 @@ MODEL_FOLDER_PATH_SOVITS = "SoVITS_weights"
|
|||||||
# 推理用的指定模型
|
# 推理用的指定模型
|
||||||
sovits_path = ""
|
sovits_path = ""
|
||||||
gpt_path = ""
|
gpt_path = ""
|
||||||
is_half = True
|
is_half = eval(os.environ.get("is_half","True"))
|
||||||
is_share=False
|
is_share=False
|
||||||
|
|
||||||
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
||||||
|
|||||||
31
docker-compose.yaml
Normal file
31
docker-compose.yaml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
gpt-sovits:
|
||||||
|
image: breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
container_name: gpt-sovits-container
|
||||||
|
environment:
|
||||||
|
- is_half=False
|
||||||
|
volumes:
|
||||||
|
- G:/GPT-SoVITS-DockerTest/output:/workspace/output
|
||||||
|
- G:/GPT-SoVITS-DockerTest/logs:/workspace/logs
|
||||||
|
- G:/GPT-SoVITS-DockerTest/SoVITS_weights:/workspace/SoVITS_weights
|
||||||
|
- G:/GPT-SoVITS-DockerTest/reference:/workspace/reference
|
||||||
|
working_dir: /workspace
|
||||||
|
ports:
|
||||||
|
- "9870:9870"
|
||||||
|
- "9871:9871"
|
||||||
|
- "9872:9872"
|
||||||
|
- "9873:9873"
|
||||||
|
- "9874:9874"
|
||||||
|
shm_size: 16G
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: "all"
|
||||||
|
capabilities: [gpu]
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
|
restart: unless-stopped
|
||||||
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
5-清理TEMP文件夹缓存音频等文件
|
5-清理TEMP文件夹缓存音频等文件
|
||||||
|
|
||||||
6-在参考音频结尾留空0.3s,削弱合成音频包含参考音频结尾的问题
|
6-大幅削弱合成音频包含参考音频结尾的问题
|
||||||
|
|
||||||
### 20240122更新
|
### 20240122更新
|
||||||
|
|
||||||
@ -20,5 +20,12 @@
|
|||||||
|
|
||||||
3-音频路径检查。如果尝试读取输入错的路径报错路径不存在,而非ffmpeg错误。
|
3-音频路径检查。如果尝试读取输入错的路径报错路径不存在,而非ffmpeg错误。
|
||||||
|
|
||||||
待修复:-hubert提取在half下出现nan概率更高的问题
|
### 20240123更新
|
||||||
|
|
||||||
|
1-解决hubert提取nan导致SoVITS/GPT训练报错ZeroDivisionError的问题
|
||||||
|
|
||||||
|
2-支持推理界面快速切换模型
|
||||||
|
|
||||||
|
3-优化模型文件排序逻辑
|
||||||
|
|
||||||
|
4-中文分词使用jieba_fast代替jieba
|
||||||
|
|||||||
@ -87,6 +87,32 @@ brew install ffmpeg
|
|||||||
|
|
||||||
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
|
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
|
||||||
|
|
||||||
|
### 在 Docker 中使用
|
||||||
|
|
||||||
|
#### docker-compose.yaml 设置
|
||||||
|
|
||||||
|
1. 环境变量:
|
||||||
|
- is_half: 半精度/双精度控制。在进行 "SSL extracting" 步骤时如果无法正确生成 4-cnhubert/5-wav32k 目录下的内容时,一般都是它引起的,可以根据实际情况来调整为True或者False。
|
||||||
|
|
||||||
|
2. Volume设置,容器内的应用根目录设置为 /workspace。 默认的 docker-compose.yaml 中列出了一些实际的例子,便于上传/下载内容。
|
||||||
|
3. shm_size:Windows下的Docker Desktop默认可用内存过小,会导致运行异常,根据自己情况酌情设置。
|
||||||
|
4. deploy小节下的gpu相关内容,请根据您的系统和实际情况酌情设置。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### 通过 docker compose运行
|
||||||
|
```
|
||||||
|
docker compose -f "docker-compose.yaml" up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 通过 docker 命令运行
|
||||||
|
|
||||||
|
同上,根据您自己的实际情况修改对应的参数,然后运行如下命令:
|
||||||
|
```
|
||||||
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### 预训练模型
|
### 预训练模型
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -93,6 +93,30 @@ brew install ffmpeg
|
|||||||
|
|
||||||
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。
|
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。
|
||||||
|
|
||||||
|
### Dockerの使用
|
||||||
|
|
||||||
|
#### docker-compose.yamlの設定
|
||||||
|
|
||||||
|
1. 環境変数:
|
||||||
|
- `is_half`:半精度/倍精度の制御。"SSL抽出"ステップ中に`4-cnhubert/5-wav32k`ディレクトリ内の内容が正しく生成されない場合、通常これが原因です。実際の状況に応じてTrueまたはFalseに調整してください。
|
||||||
|
|
||||||
|
2. ボリューム設定:コンテナ内のアプリケーションのルートディレクトリは`/workspace`に設定されます。デフォルトの`docker-compose.yaml`には、アップロード/ダウンロードの内容の実例がいくつか記載されています。
|
||||||
|
3. `shm_size`:WindowsのDocker Desktopのデフォルトの利用可能メモリが小さすぎるため、異常な動作を引き起こす可能性があります。状況に応じて適宜設定してください。
|
||||||
|
4. `deploy`セクションのGPUに関連する内容は、システムと実際の状況に応じて慎重に設定してください。
|
||||||
|
|
||||||
|
#### docker composeで実行する
|
||||||
|
```markdown
|
||||||
|
docker compose -f "docker-compose.yaml" up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
#### dockerコマンドで実行する
|
||||||
|
|
||||||
|
上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します:
|
||||||
|
```markdown
|
||||||
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### 事前訓練済みモデル
|
### 事前訓練済みモデル
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -20,4 +20,4 @@ transformers
|
|||||||
chardet
|
chardet
|
||||||
PyYAML
|
PyYAML
|
||||||
psutil
|
psutil
|
||||||
jieba
|
jieba_fast
|
||||||
|
|||||||
@ -110,6 +110,7 @@ def b_submit_change(*text_list):
|
|||||||
|
|
||||||
def b_delete_audio(*checkbox_list):
|
def b_delete_audio(*checkbox_list):
|
||||||
global g_data_json, g_index, g_max_json_index
|
global g_data_json, g_index, g_max_json_index
|
||||||
|
b_save_file()
|
||||||
change = False
|
change = False
|
||||||
for i, checkbox in reversed(list(enumerate(checkbox_list))):
|
for i, checkbox in reversed(list(enumerate(checkbox_list))):
|
||||||
if g_index + i < len(g_data_json):
|
if g_index + i < len(g_data_json):
|
||||||
@ -121,8 +122,8 @@ def b_delete_audio(*checkbox_list):
|
|||||||
if g_index > g_max_json_index:
|
if g_index > g_max_json_index:
|
||||||
g_index = g_max_json_index
|
g_index = g_max_json_index
|
||||||
g_index = g_index if g_index >= 0 else 0
|
g_index = g_index if g_index >= 0 else 0
|
||||||
# if change:
|
if change:
|
||||||
# b_save_file()
|
b_save_file()
|
||||||
# return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch)
|
# return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch)
|
||||||
return {"value":g_index,"__type__":"update","maximum":(g_max_json_index if g_max_json_index>=0 else 0)},*b_change_index(g_index, g_batch)
|
return {"value":g_index,"__type__":"update","maximum":(g_max_json_index if g_max_json_index>=0 else 0)},*b_change_index(g_index, g_batch)
|
||||||
|
|
||||||
@ -172,6 +173,7 @@ def b_audio_split(audio_breakpoint, *checkbox_list):
|
|||||||
|
|
||||||
def b_merge_audio(interval_r, *checkbox_list):
|
def b_merge_audio(interval_r, *checkbox_list):
|
||||||
global g_data_json , g_max_json_index
|
global g_data_json , g_max_json_index
|
||||||
|
b_save_file()
|
||||||
checked_index = []
|
checked_index = []
|
||||||
audios_path = []
|
audios_path = []
|
||||||
audios_text = []
|
audios_text = []
|
||||||
|
|||||||
15
webui.py
15
webui.py
@ -1,4 +1,4 @@
|
|||||||
import os,shutil,sys,pdb
|
import os,shutil,sys,pdb,re
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
import json,yaml,warnings,torch
|
import json,yaml,warnings,torch
|
||||||
@ -85,9 +85,16 @@ os.makedirs(SoVITS_weight_root,exist_ok=True)
|
|||||||
os.makedirs(GPT_weight_root,exist_ok=True)
|
os.makedirs(GPT_weight_root,exist_ok=True)
|
||||||
SoVITS_names,GPT_names = get_weights_names()
|
SoVITS_names,GPT_names = get_weights_names()
|
||||||
|
|
||||||
|
def custom_sort_key(s):
|
||||||
|
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
||||||
|
parts = re.split('(\d+)', s)
|
||||||
|
# 将数字部分转换为整数,非数字部分保持不变
|
||||||
|
parts = [int(part) if part.isdigit() else part for part in parts]
|
||||||
|
return parts
|
||||||
|
|
||||||
def change_choices():
|
def change_choices():
|
||||||
SoVITS_names, GPT_names = get_weights_names()
|
SoVITS_names, GPT_names = get_weights_names()
|
||||||
return {"choices": sorted(SoVITS_names), "__type__": "update"}, {"choices": sorted(GPT_names), "__type__": "update"}
|
return {"choices": sorted(SoVITS_names,key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names,key=custom_sort_key), "__type__": "update"}
|
||||||
|
|
||||||
p_label=None
|
p_label=None
|
||||||
p_uvr5=None
|
p_uvr5=None
|
||||||
@ -733,8 +740,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
with gr.TabItem(i18n("1C-推理")):
|
with gr.TabItem(i18n("1C-推理")):
|
||||||
gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。"))
|
gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names),value=pretrained_gpt_name)
|
GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name,interactive=True)
|
||||||
SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names),value=pretrained_sovits_name)
|
SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name,interactive=True)
|
||||||
gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True)
|
gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True)
|
||||||
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
|
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
|
||||||
refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown])
|
refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown])
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user