mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Merge branch 'main' into inference_optin
This commit is contained in:
commit
013db82d7d
2
.gitignore
vendored
2
.gitignore
vendored
@ -10,3 +10,5 @@ reference
|
|||||||
GPT_weights
|
GPT_weights
|
||||||
SoVITS_weights
|
SoVITS_weights
|
||||||
TEMP
|
TEMP
|
||||||
|
|
||||||
|
|
||||||
|
11
Dockerfile
11
Dockerfile
@ -40,15 +40,6 @@ COPY . /workspace
|
|||||||
# Copy the rest of the application
|
# Copy the rest of the application
|
||||||
COPY . /workspace
|
COPY . /workspace
|
||||||
|
|
||||||
|
EXPOSE 9871 9872 9873 9874 9880
|
||||||
EXPOSE 9870
|
|
||||||
EXPOSE 9871
|
|
||||||
EXPOSE 9872
|
|
||||||
EXPOSE 9873
|
|
||||||
EXPOSE 9874
|
|
||||||
|
|
||||||
VOLUME /workspace/output
|
|
||||||
VOLUME /workspace/logs
|
|
||||||
VOLUME /workspace/SoVITS_weights
|
|
||||||
|
|
||||||
CMD ["python", "webui.py"]
|
CMD ["python", "webui.py"]
|
||||||
|
@ -41,7 +41,8 @@ class Text2SemanticDataModule(LightningDataModule):
|
|||||||
# pad_val=self.config['data']['pad_val'])
|
# pad_val=self.config['data']['pad_val'])
|
||||||
|
|
||||||
def train_dataloader(self):
|
def train_dataloader(self):
|
||||||
batch_size = max(min(self.config["train"]["batch_size"],len(self._train_dataset)//4),1)#防止不保存
|
batch_size=self.config["train"]["batch_size"]//2 if self.config["train"].get("if_dpo",False)==True else self.config["train"]["batch_size"]
|
||||||
|
batch_size = max(min(batch_size,len(self._train_dataset)//4),1)#防止不保存
|
||||||
sampler = DistributedBucketSampler(self._train_dataset, batch_size=batch_size)
|
sampler = DistributedBucketSampler(self._train_dataset, batch_size=batch_size)
|
||||||
return DataLoader(
|
return DataLoader(
|
||||||
self._train_dataset,
|
self._train_dataset,
|
||||||
|
@ -11,7 +11,6 @@ from AR.models.t2s_model import Text2SemanticDecoder
|
|||||||
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
||||||
from AR.modules.optim import ScaledAdam
|
from AR.modules.optim import ScaledAdam
|
||||||
|
|
||||||
|
|
||||||
class Text2SemanticLightningModule(LightningModule):
|
class Text2SemanticLightningModule(LightningModule):
|
||||||
def __init__(self, config, output_dir, is_train=True):
|
def __init__(self, config, output_dir, is_train=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -35,7 +34,8 @@ class Text2SemanticLightningModule(LightningModule):
|
|||||||
def training_step(self, batch: Dict, batch_idx: int):
|
def training_step(self, batch: Dict, batch_idx: int):
|
||||||
opt = self.optimizers()
|
opt = self.optimizers()
|
||||||
scheduler = self.lr_schedulers()
|
scheduler = self.lr_schedulers()
|
||||||
loss, acc = self.model.forward(
|
forward=self.model.forward if self.config["train"].get("if_dpo",False)==True else self.model.forward_old
|
||||||
|
loss, acc = forward(
|
||||||
batch["phoneme_ids"],
|
batch["phoneme_ids"],
|
||||||
batch["phoneme_ids_len"],
|
batch["phoneme_ids_len"],
|
||||||
batch["semantic_ids"],
|
batch["semantic_ids"],
|
||||||
|
@ -8,6 +8,9 @@ from AR.models.utils import (
|
|||||||
sample,
|
sample,
|
||||||
logits_to_probs,
|
logits_to_probs,
|
||||||
multinomial_sample_one_no_sync,
|
multinomial_sample_one_no_sync,
|
||||||
|
dpo_loss,
|
||||||
|
make_reject_y,
|
||||||
|
get_batch_logps
|
||||||
)
|
)
|
||||||
from AR.modules.embedding import SinePositionalEmbedding
|
from AR.modules.embedding import SinePositionalEmbedding
|
||||||
from AR.modules.embedding import TokenEmbedding
|
from AR.modules.embedding import TokenEmbedding
|
||||||
@ -85,11 +88,104 @@ class Text2SemanticDecoder(nn.Module):
|
|||||||
ignore_index=self.EOS,
|
ignore_index=self.EOS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def make_input_data(self, x, x_lens, y, y_lens, bert_feature):
|
||||||
|
x = self.ar_text_embedding(x)
|
||||||
|
x = x + self.bert_proj(bert_feature.transpose(1, 2))
|
||||||
|
x = self.ar_text_position(x)
|
||||||
|
x_mask = make_pad_mask(x_lens)
|
||||||
|
|
||||||
|
y_mask = make_pad_mask(y_lens)
|
||||||
|
y_mask_int = y_mask.type(torch.int64)
|
||||||
|
codes = y.type(torch.int64) * (1 - y_mask_int)
|
||||||
|
|
||||||
|
# Training
|
||||||
|
# AR Decoder
|
||||||
|
y, targets = self.pad_y_eos(codes, y_mask_int, eos_id=self.EOS)
|
||||||
|
x_len = x_lens.max()
|
||||||
|
y_len = y_lens.max()
|
||||||
|
y_emb = self.ar_audio_embedding(y)
|
||||||
|
y_pos = self.ar_audio_position(y_emb)
|
||||||
|
|
||||||
|
xy_padding_mask = torch.concat([x_mask, y_mask], dim=1)
|
||||||
|
|
||||||
|
ar_xy_padding_mask = xy_padding_mask
|
||||||
|
|
||||||
|
x_attn_mask = F.pad(
|
||||||
|
torch.zeros((x_len, x_len), dtype=torch.bool, device=x.device),
|
||||||
|
(0, y_len),
|
||||||
|
value=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
y_attn_mask = F.pad(
|
||||||
|
torch.triu(
|
||||||
|
torch.ones(y_len, y_len, dtype=torch.bool, device=x.device),
|
||||||
|
diagonal=1,
|
||||||
|
),
|
||||||
|
(x_len, 0),
|
||||||
|
value=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
xy_attn_mask = torch.concat([x_attn_mask, y_attn_mask], dim=0)
|
||||||
|
bsz, src_len = x.shape[0], x_len + y_len
|
||||||
|
_xy_padding_mask = (
|
||||||
|
ar_xy_padding_mask.view(bsz, 1, 1, src_len)
|
||||||
|
.expand(-1, self.num_head, -1, -1)
|
||||||
|
.reshape(bsz * self.num_head, 1, src_len)
|
||||||
|
)
|
||||||
|
xy_attn_mask = xy_attn_mask.logical_or(_xy_padding_mask)
|
||||||
|
new_attn_mask = torch.zeros_like(xy_attn_mask, dtype=x.dtype)
|
||||||
|
new_attn_mask.masked_fill_(xy_attn_mask, float("-inf"))
|
||||||
|
xy_attn_mask = new_attn_mask
|
||||||
|
# x 和完整的 y 一次性输入模型
|
||||||
|
xy_pos = torch.concat([x, y_pos], dim=1)
|
||||||
|
|
||||||
|
return xy_pos, xy_attn_mask, targets
|
||||||
|
|
||||||
def forward(self, x, x_lens, y, y_lens, bert_feature):
|
def forward(self, x, x_lens, y, y_lens, bert_feature):
|
||||||
"""
|
"""
|
||||||
x: phoneme_ids
|
x: phoneme_ids
|
||||||
y: semantic_ids
|
y: semantic_ids
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
reject_y, reject_y_lens = make_reject_y(y, y_lens)
|
||||||
|
|
||||||
|
xy_pos, xy_attn_mask, targets = self.make_input_data(x, x_lens, y, y_lens, bert_feature)
|
||||||
|
|
||||||
|
xy_dec, _ = self.h(
|
||||||
|
(xy_pos, None),
|
||||||
|
mask=xy_attn_mask,
|
||||||
|
)
|
||||||
|
x_len = x_lens.max()
|
||||||
|
logits = self.ar_predict_layer(xy_dec[:, x_len:])
|
||||||
|
|
||||||
|
###### DPO #############
|
||||||
|
reject_xy_pos, reject_xy_attn_mask, reject_targets = self.make_input_data(x, x_lens, reject_y, reject_y_lens, bert_feature)
|
||||||
|
|
||||||
|
reject_xy_dec, _ = self.h(
|
||||||
|
(reject_xy_pos, None),
|
||||||
|
mask=reject_xy_attn_mask,
|
||||||
|
)
|
||||||
|
x_len = x_lens.max()
|
||||||
|
reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len:])
|
||||||
|
|
||||||
|
# loss
|
||||||
|
# from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
|
||||||
|
|
||||||
|
loss_1 = F.cross_entropy(logits.permute(0, 2, 1), targets, reduction="sum")
|
||||||
|
acc = self.ar_accuracy_metric(logits.permute(0, 2, 1).detach(), targets).item()
|
||||||
|
|
||||||
|
A_logits, R_logits = get_batch_logps(logits, reject_logits, targets, reject_targets)
|
||||||
|
loss_2, _, _ = dpo_loss(A_logits, R_logits, 0, 0, 0.2, reference_free=True)
|
||||||
|
|
||||||
|
loss = loss_1 + loss_2
|
||||||
|
|
||||||
|
return loss, acc
|
||||||
|
|
||||||
|
def forward_old(self, x, x_lens, y, y_lens, bert_feature):
|
||||||
|
"""
|
||||||
|
x: phoneme_ids
|
||||||
|
y: semantic_ids
|
||||||
|
"""
|
||||||
x = self.ar_text_embedding(x)
|
x = self.ar_text_embedding(x)
|
||||||
x = x + self.bert_proj(bert_feature.transpose(1, 2))
|
x = x + self.bert_proj(bert_feature.transpose(1, 2))
|
||||||
x = self.ar_text_position(x)
|
x = self.ar_text_position(x)
|
||||||
@ -231,6 +327,7 @@ class Text2SemanticDecoder(nn.Module):
|
|||||||
prompts, ####参考音频token
|
prompts, ####参考音频token
|
||||||
bert_feature,
|
bert_feature,
|
||||||
top_k: int = -100,
|
top_k: int = -100,
|
||||||
|
top_p: int = 100,
|
||||||
early_stop_num: int = -1,
|
early_stop_num: int = -1,
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
):
|
):
|
||||||
@ -299,7 +396,7 @@ class Text2SemanticDecoder(nn.Module):
|
|||||||
if(idx==0):###第一次跑不能EOS否则没有了
|
if(idx==0):###第一次跑不能EOS否则没有了
|
||||||
logits = logits[:, :-1] ###刨除1024终止符号的概率
|
logits = logits[:, :-1] ###刨除1024终止符号的概率
|
||||||
samples = sample(
|
samples = sample(
|
||||||
logits[0], y, top_k=top_k, top_p=1.0, repetition_penalty=1.35
|
logits[0], y, top_k=top_k, top_p=top_p, repetition_penalty=1.35, temperature=temperature
|
||||||
)[0].unsqueeze(0)
|
)[0].unsqueeze(0)
|
||||||
# 本次生成的 semantic_ids 和之前的 y 构成新的 y
|
# 本次生成的 semantic_ids 和之前的 y 构成新的 y
|
||||||
# print(samples.shape)#[1,1]#第一个1是bs
|
# print(samples.shape)#[1,1]#第一个1是bs
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/model/utils.py\
|
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/model/utils.py\
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
def sequence_mask(length, max_length=None):
|
def sequence_mask(length, max_length=None):
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
@ -159,3 +159,70 @@ def sample(
|
|||||||
)
|
)
|
||||||
idx_next = multinomial_sample_one_no_sync(probs)
|
idx_next = multinomial_sample_one_no_sync(probs)
|
||||||
return idx_next, probs
|
return idx_next, probs
|
||||||
|
|
||||||
|
def dpo_loss(policy_chosen_logps: torch.FloatTensor,
|
||||||
|
policy_rejected_logps: torch.FloatTensor,
|
||||||
|
reference_chosen_logps: torch.FloatTensor,
|
||||||
|
reference_rejected_logps: torch.FloatTensor,
|
||||||
|
beta: float,
|
||||||
|
reference_free: bool = False) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
|
||||||
|
pi_logratios = policy_chosen_logps - policy_rejected_logps
|
||||||
|
ref_logratios = reference_chosen_logps - reference_rejected_logps
|
||||||
|
|
||||||
|
if reference_free:
|
||||||
|
ref_logratios = 0
|
||||||
|
|
||||||
|
logits = pi_logratios - ref_logratios
|
||||||
|
|
||||||
|
losses = -F.logsigmoid(beta * logits)
|
||||||
|
chosen_rewards = beta * (policy_chosen_logps - reference_chosen_logps).detach()
|
||||||
|
rejected_rewards = beta * (policy_rejected_logps - reference_rejected_logps).detach()
|
||||||
|
|
||||||
|
return losses.mean(), chosen_rewards, rejected_rewards
|
||||||
|
|
||||||
|
def get_batch_logps(logits_target: torch.FloatTensor, logits_reject: torch.FloatTensor, labels_target: torch.LongTensor, labels_reject: torch.LongTensor, average_log_prob: bool = False) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
|
||||||
|
|
||||||
|
# dummy token; we'll ignore the losses on these tokens later
|
||||||
|
|
||||||
|
per_token_logps_target = torch.gather(logits_target.log_softmax(-1), dim=2, index=labels_target.unsqueeze(2)).squeeze(2)
|
||||||
|
per_token_logps_reject = torch.gather(logits_reject.log_softmax(-1), dim=2, index=labels_reject.unsqueeze(2)).squeeze(2)
|
||||||
|
|
||||||
|
return per_token_logps_target.sum(-1), per_token_logps_reject.sum(-1)
|
||||||
|
|
||||||
|
def make_reject_y(y_o, y_lens):
|
||||||
|
def repeat_P(y):
|
||||||
|
range_idx, _ = torch.randint(0, len(y), size=(2,)).sort()
|
||||||
|
pre = y[:range_idx[0]]
|
||||||
|
shf = y[range_idx[1]:]
|
||||||
|
range_text = y[range_idx[0]:range_idx[1]]
|
||||||
|
new_y = torch.cat([pre, range_text, range_text, shf])
|
||||||
|
return new_y
|
||||||
|
def lost_P(y):
|
||||||
|
range_idx, _ = torch.randint(0, len(y), size=(2,)).sort()
|
||||||
|
pre = y[:range_idx[0]]
|
||||||
|
shf = y[range_idx[1]:]
|
||||||
|
range_text = y[range_idx[0]:range_idx[1]]
|
||||||
|
new_y = torch.cat([pre, shf])
|
||||||
|
return new_y
|
||||||
|
bs = len(y_lens)
|
||||||
|
reject_y = []
|
||||||
|
reject_y_lens = []
|
||||||
|
for b in range(bs):
|
||||||
|
process_item_idx = torch.randint(0, 1, size=(1, ))[0]
|
||||||
|
if process_item_idx == 0:
|
||||||
|
new_y = repeat_P(y_o[b])
|
||||||
|
reject_y.append(new_y)
|
||||||
|
reject_y_lens.append(len(new_y))
|
||||||
|
elif process_item_idx==1:
|
||||||
|
new_y = lost_P(y_o[b])
|
||||||
|
reject_y.append(new_y)
|
||||||
|
reject_y_lens.append(len(new_y))
|
||||||
|
max_length = max(reject_y_lens)
|
||||||
|
for b in range(bs):
|
||||||
|
pad_length = max_length - reject_y_lens[b]
|
||||||
|
reject_y[b] = torch.cat([reject_y[b], torch.zeros(pad_length, dtype=y_o.dtype, device=y_o.device)], dim=0)
|
||||||
|
|
||||||
|
reject_y = torch.stack(reject_y, dim = 0)
|
||||||
|
reject_y_lens = torch.tensor(reject_y_lens, device=y_lens.device)
|
||||||
|
|
||||||
|
return reject_y, reject_y_lens
|
||||||
|
@ -245,7 +245,14 @@ def splite_en_inf(sentence, language):
|
|||||||
|
|
||||||
|
|
||||||
def clean_text_inf(text, language):
|
def clean_text_inf(text, language):
|
||||||
phones, word2ph, norm_text = clean_text(text, language.replace("all_",""))
|
formattext = ""
|
||||||
|
language = language.replace("all_","")
|
||||||
|
for tmp in LangSegment.getTexts(text):
|
||||||
|
if tmp["lang"] == language:
|
||||||
|
formattext += tmp["text"] + " "
|
||||||
|
while " " in formattext:
|
||||||
|
formattext = formattext.replace(" ", " ")
|
||||||
|
phones, word2ph, norm_text = clean_text(formattext, language)
|
||||||
phones = cleaned_text_to_sequence(phones)
|
phones = cleaned_text_to_sequence(phones)
|
||||||
return phones, word2ph, norm_text
|
return phones, word2ph, norm_text
|
||||||
|
|
||||||
@ -305,9 +312,8 @@ def nonen_get_bert_inf(text, language):
|
|||||||
print(langlist)
|
print(langlist)
|
||||||
bert_list = []
|
bert_list = []
|
||||||
for i in range(len(textlist)):
|
for i in range(len(textlist)):
|
||||||
text = textlist[i]
|
|
||||||
lang = langlist[i]
|
lang = langlist[i]
|
||||||
phones, word2ph, norm_text = clean_text_inf(text, lang)
|
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang)
|
||||||
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
||||||
bert_list.append(bert)
|
bert_list.append(bert)
|
||||||
bert = torch.cat(bert_list, dim=1)
|
bert = torch.cat(bert_list, dim=1)
|
||||||
@ -359,7 +365,7 @@ def merge_short_text_in_array(texts, threshold):
|
|||||||
result[len(result) - 1] += text
|
result[len(result) - 1] += text
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切")):
|
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6):
|
||||||
t0 = ttime()
|
t0 = ttime()
|
||||||
prompt_language = dict_language[prompt_language]
|
prompt_language = dict_language[prompt_language]
|
||||||
text_language = dict_language[text_language]
|
text_language = dict_language[text_language]
|
||||||
@ -439,7 +445,9 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|||||||
None,
|
None,
|
||||||
bert,
|
bert,
|
||||||
# prompt_phone_len=ph_offset,
|
# prompt_phone_len=ph_offset,
|
||||||
top_k=config["inference"]["top_k"],
|
top_k=top_k,
|
||||||
|
top_p=top_p,
|
||||||
|
temperature=temperature,
|
||||||
early_stop_num=hz * max_sec,
|
early_stop_num=hz * max_sec,
|
||||||
)
|
)
|
||||||
t3 = ttime()
|
t3 = ttime()
|
||||||
@ -616,6 +624,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
value=i18n("凑四句一切"),
|
value=i18n("凑四句一切"),
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
|
with gr.Row():
|
||||||
|
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
|
||||||
|
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
|
||||||
|
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
||||||
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
||||||
output = gr.Audio(label=i18n("输出的语音"))
|
output = gr.Audio(label=i18n("输出的语音"))
|
||||||
|
|
||||||
|
@ -33,13 +33,13 @@ from time import time as ttime
|
|||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
|
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
|
||||||
dir = os.path.dirname(path)
|
dir=os.path.dirname(path)
|
||||||
name = os.path.basename(path)
|
name=os.path.basename(path)
|
||||||
tmp_path = "%s/%s%s.pth" % (dir, ttime(), i_part)
|
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
|
||||||
torch.save(fea, tmp_path)
|
tmp_path="%s%s.pth"%(ttime(),i_part)
|
||||||
shutil.move(tmp_path, "%s/%s" % (dir, name))
|
torch.save(fea,tmp_path)
|
||||||
|
shutil.move(tmp_path,"%s/%s"%(dir,name))
|
||||||
|
|
||||||
|
|
||||||
txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part)
|
txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part)
|
||||||
|
@ -35,7 +35,8 @@ import shutil
|
|||||||
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
|
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
|
||||||
dir=os.path.dirname(path)
|
dir=os.path.dirname(path)
|
||||||
name=os.path.basename(path)
|
name=os.path.basename(path)
|
||||||
tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
|
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
|
||||||
|
tmp_path="%s%s.pth"%(ttime(),i_part)
|
||||||
torch.save(fea,tmp_path)
|
torch.save(fea,tmp_path)
|
||||||
shutil.move(tmp_path,"%s/%s"%(dir,name))
|
shutil.move(tmp_path,"%s/%s"%(dir,name))
|
||||||
|
|
||||||
|
@ -672,6 +672,7 @@ class ToneSandhi:
|
|||||||
and i + 1 < len(seg)
|
and i + 1 < len(seg)
|
||||||
and seg[i - 1][0] == seg[i + 1][0]
|
and seg[i - 1][0] == seg[i + 1][0]
|
||||||
and seg[i - 1][1] == "v"
|
and seg[i - 1][1] == "v"
|
||||||
|
and seg[i + 1][1] == "v"
|
||||||
):
|
):
|
||||||
new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
|
new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
|
||||||
else:
|
else:
|
||||||
|
@ -154,7 +154,7 @@ docker compose -f "docker-compose.yaml" up -d
|
|||||||
As above, modify the corresponding parameters based on your actual situation, then run the following command:
|
As above, modify the corresponding parameters based on your actual situation, then run the following command:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
||||||
```
|
```
|
||||||
|
|
||||||
## Dataset Format
|
## Dataset Format
|
||||||
|
@ -5,8 +5,10 @@ import torch
|
|||||||
# 推理用的指定模型
|
# 推理用的指定模型
|
||||||
sovits_path = ""
|
sovits_path = ""
|
||||||
gpt_path = ""
|
gpt_path = ""
|
||||||
is_half = eval(os.environ.get("is_half","True"))
|
is_half_str = os.environ.get("is_half", "True")
|
||||||
is_share=False
|
is_half = True if is_half_str.lower() == 'true' else False
|
||||||
|
is_share_str = os.environ.get("is_share","False")
|
||||||
|
is_share= True if is_share_str.lower() == 'true' else False
|
||||||
|
|
||||||
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
||||||
bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
|
bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
|
||||||
|
@ -2,10 +2,11 @@ version: '3.8'
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
gpt-sovits:
|
gpt-sovits:
|
||||||
image: breakstring/gpt-sovits:xxxxx # please change the image name and tag base your environment
|
image: breakstring/gpt-sovits:latest # please change the image name and tag base your environment. If the tag contains the word 'elite', such as "latest-elite", it indicates that the image does not include the necessary models such as GPT-SoVITS, UVR5, Damo ASR, etc. You will need to download them yourself and map them into the container.
|
||||||
container_name: gpt-sovits-container
|
container_name: gpt-sovits-container
|
||||||
environment:
|
environment:
|
||||||
- is_half=False
|
- is_half=False
|
||||||
|
- is_share=False
|
||||||
volumes:
|
volumes:
|
||||||
- ./output:/workspace/output
|
- ./output:/workspace/output
|
||||||
- ./logs:/workspace/logs
|
- ./logs:/workspace/logs
|
||||||
@ -13,7 +14,7 @@ services:
|
|||||||
- ./reference:/workspace/reference
|
- ./reference:/workspace/reference
|
||||||
working_dir: /workspace
|
working_dir: /workspace
|
||||||
ports:
|
ports:
|
||||||
- "9870:9870"
|
- "9880:9880"
|
||||||
- "9871:9871"
|
- "9871:9871"
|
||||||
- "9872:9872"
|
- "9872:9872"
|
||||||
- "9873:9873"
|
- "9873:9873"
|
||||||
|
@ -107,8 +107,21 @@
|
|||||||
|
|
||||||
1-GPT训练卡死(win10 1909)和https://github.com/RVC-Boss/GPT-SoVITS/issues/232 (系统语言繁体)GPT训练报错,[尝试修复](https://github.com/RVC-Boss/GPT-SoVITS/commit/59f35adad85815df27e9c6b33d420f5ebfd8376b)。
|
1-GPT训练卡死(win10 1909)和https://github.com/RVC-Boss/GPT-SoVITS/issues/232 (系统语言繁体)GPT训练报错,[尝试修复](https://github.com/RVC-Boss/GPT-SoVITS/commit/59f35adad85815df27e9c6b33d420f5ebfd8376b)。
|
||||||
|
|
||||||
|
### 20240212更新
|
||||||
|
|
||||||
|
1-faster whisper和funasr逻辑优化。faster whisper转镜像站下载,规避huggingface连不上的问题。
|
||||||
|
|
||||||
|
2-DPO Loss实验性训练选项开启,通过构造负样本训练缓解GPT重复漏字问题。推理界面公开几个推理参数。 https://github.com/RVC-Boss/GPT-SoVITS/pull/457
|
||||||
|
|
||||||
|
### 20240214更新
|
||||||
|
|
||||||
|
1-训练支持中文实验名(原来会报错)
|
||||||
|
|
||||||
|
2-DPO训练改为可勾选选项而非必须。如勾选batch size自动减半。修复推理界面新参数不传参的问题。
|
||||||
|
|
||||||
todolist:
|
todolist:
|
||||||
|
|
||||||
1-中文多音字推理优化
|
1-中文多音字推理优化
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ docker compose -f "docker-compose.yaml" up -d
|
|||||||
同上,根据您自己的实际情况修改对应的参数,然后运行如下命令:
|
同上,根据您自己的实际情况修改对应的参数,然后运行如下命令:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
||||||
```
|
```
|
||||||
|
|
||||||
### 预训练模型
|
### 预训练模型
|
||||||
|
@ -138,7 +138,7 @@ docker compose -f "docker-compose.yaml" up -d
|
|||||||
上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します:
|
上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します:
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
||||||
```
|
```
|
||||||
|
|
||||||
### 事前訓練済みモデル
|
### 事前訓練済みモデル
|
||||||
|
@ -141,7 +141,7 @@ docker compose -f "docker-compose.yaml" up -d
|
|||||||
위와 동일하게 실제 상황에 맞게 매개변수를 수정한 다음 다음 명령을 실행합니다:
|
위와 동일하게 실제 상황에 맞게 매개변수를 수정한 다음 다음 명령을 실행합니다:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
|
||||||
```
|
```
|
||||||
|
|
||||||
### 사전 훈련된 모델
|
### 사전 훈련된 모델
|
||||||
|
@ -8,8 +8,16 @@
|
|||||||
"是否开启UVR5-WebUI": "Activer UVR5-WebUI",
|
"是否开启UVR5-WebUI": "Activer UVR5-WebUI",
|
||||||
"UVR5进程输出信息": "Informations de processus UVR5",
|
"UVR5进程输出信息": "Informations de processus UVR5",
|
||||||
"0b-语音切分工具": "0b-Outil de découpage vocal",
|
"0b-语音切分工具": "0b-Outil de découpage vocal",
|
||||||
|
".list标注文件的路径": "Chemin du fichier d'annotation .list",
|
||||||
|
"GPT模型列表": "Liste des modèles GPT",
|
||||||
|
"SoVITS模型列表": "Liste des modèles SoVITS",
|
||||||
|
"填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "Répertoire où sont enregistrés les fichiers audio après la découpe ! Chemin complet du fichier audio à lire = ce répertoire - nom du fichier correspondant à la forme d'onde dans le fichier liste (pas le chemin complet).",
|
||||||
"音频自动切分输入路径,可文件可文件夹": "Chemin d'entrée automatique de découpage audio, peut être un fichier ou un dossier",
|
"音频自动切分输入路径,可文件可文件夹": "Chemin d'entrée automatique de découpage audio, peut être un fichier ou un dossier",
|
||||||
"切分后的子音频的输出根目录": "Répertoire racine de sortie des sous-audios après découpage",
|
"切分后的子音频的输出根目录": "Répertoire racine de sortie des sous-audios après découpage",
|
||||||
|
"怎么切": "Comment découper",
|
||||||
|
"不切": "Pas de découpe",
|
||||||
|
"凑四句一切": "Composez quatre phrases pour tout remplir",
|
||||||
|
"按英文句号.切": "Découpez par des points en anglais",
|
||||||
"threshold:音量小于这个值视作静音的备选切割点": "seuil: le volume inférieur à cette valeur est considéré comme un point de coupe silencieux alternatif",
|
"threshold:音量小于这个值视作静音的备选切割点": "seuil: le volume inférieur à cette valeur est considéré comme un point de coupe silencieux alternatif",
|
||||||
"min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: longueur minimale de chaque segment, si le premier segment est trop court, il est continué avec le segment suivant jusqu'à dépasser cette valeur",
|
"min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: longueur minimale de chaque segment, si le premier segment est trop court, il est continué avec le segment suivant jusqu'à dépasser cette valeur",
|
||||||
"min_interval:最短切割间隔": "min_interval: intervalle de coupe minimum",
|
"min_interval:最短切割间隔": "min_interval: intervalle de coupe minimum",
|
||||||
|
@ -13,7 +13,7 @@ def check_fw_local_models():
|
|||||||
"large-v2", "large-v3"]
|
"large-v2", "large-v3"]
|
||||||
for i, size in enumerate(model_size_list):
|
for i, size in enumerate(model_size_list):
|
||||||
if os.path.exists(f'tools/asr/models/faster-whisper-{size}'):
|
if os.path.exists(f'tools/asr/models/faster-whisper-{size}'):
|
||||||
model_size_list[i] = size + '(local)'
|
model_size_list[i] = size + '-local'
|
||||||
return model_size_list
|
return model_size_list
|
||||||
|
|
||||||
asr_dict = {
|
asr_dict = {
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
os.environ["HF_ENDPOINT"]="https://hf-mirror.com"
|
||||||
import traceback
|
import traceback
|
||||||
import requests
|
import requests
|
||||||
from glob import glob
|
from glob import glob
|
||||||
@ -8,6 +9,7 @@ from faster_whisper import WhisperModel
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from tools.asr.config import check_fw_local_models
|
from tools.asr.config import check_fw_local_models
|
||||||
|
from tools.asr.funasr_asr import only_asr
|
||||||
|
|
||||||
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
||||||
|
|
||||||
@ -35,8 +37,8 @@ language_code_list = [
|
|||||||
"auto"]
|
"auto"]
|
||||||
|
|
||||||
def execute_asr(input_folder, output_folder, model_size, language,precision):
|
def execute_asr(input_folder, output_folder, model_size, language,precision):
|
||||||
if 'local' in model_size:
|
if '-local' in model_size:
|
||||||
model_size = model_size.split('(')[0]
|
model_size = model_size[:-6]
|
||||||
model_path = f'tools/asr/models/faster-whisper-{model_size}'
|
model_path = f'tools/asr/models/faster-whisper-{model_size}'
|
||||||
else:
|
else:
|
||||||
model_path = model_size
|
model_path = model_size
|
||||||
@ -63,8 +65,14 @@ def execute_asr(input_folder, output_folder, model_size, language,precision):
|
|||||||
vad_parameters = dict(min_silence_duration_ms=700),
|
vad_parameters = dict(min_silence_duration_ms=700),
|
||||||
language = language)
|
language = language)
|
||||||
text = ''
|
text = ''
|
||||||
for segment in segments:
|
|
||||||
text += segment.text
|
if info.language == "zh":
|
||||||
|
print("检测为中文文本,转funasr处理")
|
||||||
|
text = only_asr(file)
|
||||||
|
|
||||||
|
if text == '':
|
||||||
|
for segment in segments:
|
||||||
|
text += segment.text
|
||||||
output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}")
|
output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}")
|
||||||
except:
|
except:
|
||||||
return print(traceback.format_exc())
|
return print(traceback.format_exc())
|
||||||
@ -96,4 +104,4 @@ if __name__ == '__main__':
|
|||||||
model_size = cmd.model_size,
|
model_size = cmd.model_size,
|
||||||
language = cmd.language,
|
language = cmd.language,
|
||||||
precision = cmd.precision,
|
precision = cmd.precision,
|
||||||
)
|
)
|
||||||
|
@ -23,6 +23,14 @@ model = AutoModel(
|
|||||||
punc_model_revision = "v2.0.4",
|
punc_model_revision = "v2.0.4",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def only_asr(input_file):
|
||||||
|
try:
|
||||||
|
text = model.generate(input=input_file)[0]["text"]
|
||||||
|
except:
|
||||||
|
text = ''
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return text
|
||||||
|
|
||||||
def execute_asr(input_folder, output_folder, model_size, language):
|
def execute_asr(input_folder, output_folder, model_size, language):
|
||||||
input_file_names = os.listdir(input_folder)
|
input_file_names = os.listdir(input_folder)
|
||||||
input_file_names.sort()
|
input_file_names.sort()
|
||||||
@ -35,7 +43,7 @@ def execute_asr(input_folder, output_folder, model_size, language):
|
|||||||
text = model.generate(input="%s/%s"%(input_folder, name))[0]["text"]
|
text = model.generate(input="%s/%s"%(input_folder, name))[0]["text"]
|
||||||
output.append(f"{input_folder}/{name}|{output_file_name}|{language.upper()}|{text}")
|
output.append(f"{input_folder}/{name}|{output_file_name}|{language.upper()}|{text}")
|
||||||
except:
|
except:
|
||||||
return print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
|
|
||||||
output_folder = output_folder or "output/asr_opt"
|
output_folder = output_folder or "output/asr_opt"
|
||||||
os.makedirs(output_folder, exist_ok=True)
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
@ -65,4 +73,4 @@ if __name__ == '__main__':
|
|||||||
output_folder = cmd.output_folder,
|
output_folder = cmd.output_folder,
|
||||||
model_size = cmd.model_size,
|
model_size = cmd.model_size,
|
||||||
language = cmd.language,
|
language = cmd.language,
|
||||||
)
|
)
|
||||||
|
6
webui.py
6
webui.py
@ -266,7 +266,7 @@ def close1Ba():
|
|||||||
return "已终止SoVITS训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False}
|
return "已终止SoVITS训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False}
|
||||||
|
|
||||||
p_train_GPT=None
|
p_train_GPT=None
|
||||||
def open1Bb(batch_size,total_epoch,exp_name,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1):
|
def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1):
|
||||||
global p_train_GPT
|
global p_train_GPT
|
||||||
if(p_train_GPT==None):
|
if(p_train_GPT==None):
|
||||||
with open("GPT_SoVITS/configs/s1longer.yaml")as f:
|
with open("GPT_SoVITS/configs/s1longer.yaml")as f:
|
||||||
@ -283,6 +283,7 @@ def open1Bb(batch_size,total_epoch,exp_name,if_save_latest,if_save_every_weights
|
|||||||
data["train"]["save_every_n_epoch"]=save_every_epoch
|
data["train"]["save_every_n_epoch"]=save_every_epoch
|
||||||
data["train"]["if_save_every_weights"]=if_save_every_weights
|
data["train"]["if_save_every_weights"]=if_save_every_weights
|
||||||
data["train"]["if_save_latest"]=if_save_latest
|
data["train"]["if_save_latest"]=if_save_latest
|
||||||
|
data["train"]["if_dpo"]=if_dpo
|
||||||
data["train"]["half_weights_save_dir"]=GPT_weight_root
|
data["train"]["half_weights_save_dir"]=GPT_weight_root
|
||||||
data["train"]["exp_name"]=exp_name
|
data["train"]["exp_name"]=exp_name
|
||||||
data["train_semantic_path"]="%s/6-name2semantic.tsv"%s1_dir
|
data["train_semantic_path"]="%s/6-name2semantic.tsv"%s1_dir
|
||||||
@ -807,6 +808,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
|
batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
|
||||||
total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True)
|
total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True)
|
||||||
|
if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True)
|
||||||
if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
|
if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
|
||||||
if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
|
if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
|
||||||
save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True)
|
save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True)
|
||||||
@ -817,7 +819,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息"))
|
info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息"))
|
||||||
button1Ba_open.click(open1Ba, [batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D], [info1Ba,button1Ba_open,button1Ba_close])
|
button1Ba_open.click(open1Ba, [batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D], [info1Ba,button1Ba_open,button1Ba_close])
|
||||||
button1Ba_close.click(close1Ba, [], [info1Ba,button1Ba_open,button1Ba_close])
|
button1Ba_close.click(close1Ba, [], [info1Ba,button1Ba_open,button1Ba_close])
|
||||||
button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close])
|
button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_dpo,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close])
|
||||||
button1Bb_close.click(close1Bb, [], [info1Bb,button1Bb_open,button1Bb_close])
|
button1Bb_close.click(close1Bb, [], [info1Bb,button1Bb_open,button1Bb_close])
|
||||||
with gr.TabItem(i18n("1C-推理")):
|
with gr.TabItem(i18n("1C-推理")):
|
||||||
gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。"))
|
gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。"))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user