mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-06 03:57:44 +08:00
* Update README * Optimize-English-G2P * docs: change akward expression * docs: update Changelog_KO.md * Fix CN punc in EN,add 's match * Adjust normalize and g2p logic * Update zh_CN.json * Update README (#827) Update README.md Update some outdated file paths and commands * 修复英文多音字,调整字典热加载,新增姓名匹配 (#869) * Fix homograph dict * Add JSON in dict * Adjust hot dict to hot reload * Add English name dict * Adjust get name dict logic * Make API Great Again (#894) * Add zh/jp/en mix * Optimize code readability and formatted output. * Try OGG streaming * Add stream mode arg * Add media type arg * Add cut punc arg * Eliminate punc risk * Update README (#895) * Update README * Update README * update README * update README * fix typo s/Licence /License (#904) * fix reformat cmd (#917) Co-authored-by: starylan <starylan@outlook.com> * Update README.md * Normalize chinese arithmetic operations (#947) * 改变训练和推理时的mask策略,以修复当batch_size>1时,产生的复读现象 * 同步main分支代码,增加“保持随机”选项 * 在colab中运行colab_webui.ipynb发生的uvr5模型缺失问题 (#968) 在colab中使用git下载uvr5模型时报错: fatal: destination path 'uvr5_weights' already exists and is not an empty directory. 通过在下载前将原本从本仓库下载的uvr5_weights文件夹删除可以解决问题。 * [ASR] 修复FasterWhisper遍历输入路径失败 (#956) * remove glob * rename * reset mirror pos * 回退mask策略; 回退pad策略; 在T2SBlock中添加padding_mask,以减少pad的影响; 开放repetition_penalty参数,让用户自行调整重复惩罚的强度; 增加parallel_infer参数,用于开启或关闭并行推理,关闭时与0307版本保持一致; 在webui中增加“保持随机”选项; 同步main分支代码。 * 删除无用注释 --------- Co-authored-by: Lion <drain.daters.0p@icloud.com> Co-authored-by: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Co-authored-by: KamioRinn <snowsdream@live.com> Co-authored-by: Pengoose <pengoose_dev@naver.com> Co-authored-by: Yuan-Man <68322456+Yuan-ManX@users.noreply.github.com> Co-authored-by: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Co-authored-by: KamioRinn <63162909+KamioRinn@users.noreply.github.com> Co-authored-by: Lion-Wu <130235128+Lion-Wu@users.noreply.github.com> Co-authored-by: digger yu <digger-yu@outlook.com> Co-authored-by: SapphireLab <36986837+SapphireLab@users.noreply.github.com> Co-authored-by: starylan <starylan@outlook.com> Co-authored-by: shadow01a <141255649+shadow01a@users.noreply.github.com>
142 lines
4.7 KiB
Python
142 lines
4.7 KiB
Python
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
|
# reference: https://github.com/lifeiteng/vall-e
|
|
import os, sys
|
|
|
|
now_dir = os.getcwd()
|
|
sys.path.append(now_dir)
|
|
from typing import Dict
|
|
|
|
import torch
|
|
from pytorch_lightning import LightningModule
|
|
from AR.models.t2s_model import Text2SemanticDecoder
|
|
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
|
from AR.modules.optim import ScaledAdam
|
|
|
|
class Text2SemanticLightningModule(LightningModule):
|
|
def __init__(self, config, output_dir, is_train=True):
|
|
super().__init__()
|
|
self.config = config
|
|
self.top_k = 3
|
|
self.model = Text2SemanticDecoder(config=config, top_k=self.top_k)
|
|
pretrained_s1 = config.get("pretrained_s1")
|
|
if pretrained_s1 and is_train:
|
|
# print(self.load_state_dict(torch.load(pretrained_s1,map_location="cpu")["state_dict"]))
|
|
print(
|
|
self.load_state_dict(
|
|
torch.load(pretrained_s1, map_location="cpu")["weight"]
|
|
)
|
|
)
|
|
if is_train:
|
|
self.automatic_optimization = False
|
|
self.save_hyperparameters()
|
|
self.eval_dir = output_dir / "eval"
|
|
self.eval_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def training_step(self, batch: Dict, batch_idx: int):
|
|
opt = self.optimizers()
|
|
scheduler = self.lr_schedulers()
|
|
forward=self.model.forward if self.config["train"].get("if_dpo",False)==True else self.model.forward_old
|
|
loss, acc = forward(
|
|
batch["phoneme_ids"],
|
|
batch["phoneme_ids_len"],
|
|
batch["semantic_ids"],
|
|
batch["semantic_ids_len"],
|
|
batch["bert_feature"],
|
|
)
|
|
self.manual_backward(loss)
|
|
if batch_idx > 0 and batch_idx % 4 == 0:
|
|
opt.step()
|
|
opt.zero_grad()
|
|
scheduler.step()
|
|
|
|
self.log(
|
|
"total_loss",
|
|
loss,
|
|
on_step=True,
|
|
on_epoch=True,
|
|
prog_bar=True,
|
|
sync_dist=True,
|
|
)
|
|
self.log(
|
|
"lr",
|
|
scheduler.get_last_lr()[0],
|
|
on_epoch=True,
|
|
prog_bar=True,
|
|
sync_dist=True,
|
|
)
|
|
self.log(
|
|
f"top_{self.top_k}_acc",
|
|
acc,
|
|
on_step=True,
|
|
on_epoch=True,
|
|
prog_bar=True,
|
|
sync_dist=True,
|
|
)
|
|
|
|
def validation_step(self, batch: Dict, batch_idx: int):
|
|
return
|
|
|
|
# # get loss
|
|
# loss, acc = self.model.forward(
|
|
# batch['phoneme_ids'], batch['phoneme_ids_len'],
|
|
# batch['semantic_ids'], batch['semantic_ids_len'],
|
|
# batch['bert_feature']
|
|
# )
|
|
#
|
|
# self.log(
|
|
# "val_total_loss",
|
|
# loss,
|
|
# on_step=True,
|
|
# on_epoch=True,
|
|
# prog_bar=True,
|
|
# sync_dist=True)
|
|
# self.log(
|
|
# f"val_top_{self.top_k}_acc",
|
|
# acc,
|
|
# on_step=True,
|
|
# on_epoch=True,
|
|
# prog_bar=True,
|
|
# sync_dist=True)
|
|
#
|
|
# # get infer output
|
|
# semantic_len = batch['semantic_ids'].size(1)
|
|
# prompt_len = min(int(semantic_len * 0.5), 150)
|
|
# prompt = batch['semantic_ids'][:, :prompt_len]
|
|
# pred_semantic = self.model.infer(batch['phoneme_ids'],
|
|
# batch['phoneme_ids_len'], prompt,
|
|
# batch['bert_feature']
|
|
# )
|
|
# save_name = f'semantic_toks_{batch_idx}.pt'
|
|
# save_path = os.path.join(self.eval_dir, save_name)
|
|
# torch.save(pred_semantic.detach().cpu(), save_path)
|
|
|
|
def configure_optimizers(self):
|
|
model_parameters = self.model.parameters()
|
|
parameters_names = []
|
|
parameters_names.append(
|
|
[name_param_pair[0] for name_param_pair in self.model.named_parameters()]
|
|
)
|
|
lm_opt = ScaledAdam(
|
|
model_parameters,
|
|
lr=0.01,
|
|
betas=(0.9, 0.95),
|
|
clipping_scale=2.0,
|
|
parameters_names=parameters_names,
|
|
show_dominant_parameters=False,
|
|
clipping_update_period=1000,
|
|
)
|
|
|
|
return {
|
|
"optimizer": lm_opt,
|
|
"lr_scheduler": {
|
|
"scheduler": WarmupCosineLRSchedule(
|
|
lm_opt,
|
|
init_lr=self.config["optimizer"]["lr_init"],
|
|
peak_lr=self.config["optimizer"]["lr"],
|
|
end_lr=self.config["optimizer"]["lr_end"],
|
|
warmup_steps=self.config["optimizer"]["warmup_steps"],
|
|
total_steps=self.config["optimizer"]["decay_steps"],
|
|
)
|
|
},
|
|
}
|