From aa07216bba05f624c9aa50c04a9c6a66bf04c8be Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sun, 23 Feb 2025 15:09:22 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dresume=20epoch=E6=95=B0?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E9=94=99=EF=BC=8C=E6=AF=8F=E6=AC=A1resume?= =?UTF-8?q?=E9=83=BD=E8=A6=81=E9=83=BD=E8=AE=AD=E4=B8=80=E8=BD=AE=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复resume epoch数识别错,每次resume都要都训一轮的问题 --- GPT_SoVITS/s2_train.py | 7 +++++-- GPT_SoVITS/s2_train_v3.py | 5 ++++- GPT_SoVITS/s2_train_v3_lora.py | 5 ++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/GPT_SoVITS/s2_train.py b/GPT_SoVITS/s2_train.py index 5be43c9..2c7a426 100644 --- a/GPT_SoVITS/s2_train.py +++ b/GPT_SoVITS/s2_train.py @@ -205,6 +205,7 @@ def run(rank, n_gpus, hps): net_g, optim_g, ) + epoch_str+=1 global_step = (epoch_str - 1) * len(train_loader) # epoch_str = 1 # global_step = 0 @@ -215,7 +216,7 @@ def run(rank, n_gpus, hps): if hps.train.pretrained_s2G != ""and hps.train.pretrained_s2G != None and os.path.exists(hps.train.pretrained_s2G): if rank == 0: logger.info("loaded pretrained %s" % hps.train.pretrained_s2G) - print( + print("loaded pretrained %s" % hps.train.pretrained_s2G, net_g.module.load_state_dict( torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"], strict=False, @@ -227,7 +228,7 @@ def run(rank, n_gpus, hps): if hps.train.pretrained_s2D != ""and hps.train.pretrained_s2D != None and os.path.exists(hps.train.pretrained_s2D): if rank == 0: logger.info("loaded pretrained %s" % hps.train.pretrained_s2D) - print( + print("loaded pretrained %s" % hps.train.pretrained_s2D, net_d.module.load_state_dict( torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"] ) if torch.cuda.is_available() else net_d.load_state_dict( @@ -251,6 +252,7 @@ def run(rank, n_gpus, hps): scaler = GradScaler(enabled=hps.train.fp16_run) for epoch in range(epoch_str, hps.train.epochs + 1): + print("start training from epoch %s"%epoch) if rank == 0: train_and_evaluate( rank, @@ -280,6 +282,7 @@ def run(rank, n_gpus, hps): ) scheduler_g.step() scheduler_d.step() + print("training done") def train_and_evaluate( diff --git a/GPT_SoVITS/s2_train_v3.py b/GPT_SoVITS/s2_train_v3.py index a5f7da7..1d8ff30 100644 --- a/GPT_SoVITS/s2_train_v3.py +++ b/GPT_SoVITS/s2_train_v3.py @@ -178,6 +178,7 @@ def run(rank, n_gpus, hps): net_g, optim_g, ) + epoch_str+=1 global_step = (epoch_str - 1) * len(train_loader) # epoch_str = 1 # global_step = 0 @@ -188,7 +189,7 @@ def run(rank, n_gpus, hps): if hps.train.pretrained_s2G != ""and hps.train.pretrained_s2G != None and os.path.exists(hps.train.pretrained_s2G): if rank == 0: logger.info("loaded pretrained %s" % hps.train.pretrained_s2G) - print( + print("loaded pretrained %s" % hps.train.pretrained_s2G, net_g.module.load_state_dict( torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"], strict=False, @@ -225,6 +226,7 @@ def run(rank, n_gpus, hps): net_d=optim_d=scheduler_d=None for epoch in range(epoch_str, hps.train.epochs + 1): + print("start training from epoch %s"%epoch) if rank == 0: train_and_evaluate( rank, @@ -254,6 +256,7 @@ def run(rank, n_gpus, hps): ) scheduler_g.step() # scheduler_d.step() + print("training done") def train_and_evaluate( diff --git a/GPT_SoVITS/s2_train_v3_lora.py b/GPT_SoVITS/s2_train_v3_lora.py index f10bde1..f2d5f83 100644 --- a/GPT_SoVITS/s2_train_v3_lora.py +++ b/GPT_SoVITS/s2_train_v3_lora.py @@ -161,6 +161,7 @@ def run(rank, n_gpus, hps): net_g, optim_g, ) + epoch_str+=1 global_step = (epoch_str - 1) * len(train_loader) except: # 如果首次不能加载,加载pretrain # traceback.print_exc() @@ -170,7 +171,7 @@ def run(rank, n_gpus, hps): if hps.train.pretrained_s2G != ""and hps.train.pretrained_s2G != None and os.path.exists(hps.train.pretrained_s2G): if rank == 0: logger.info("loaded pretrained %s" % hps.train.pretrained_s2G) - print( + print("loaded pretrained %s" % hps.train.pretrained_s2G, net_g.load_state_dict( torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"], strict=False, @@ -198,6 +199,7 @@ def run(rank, n_gpus, hps): net_d=optim_d=scheduler_d=None for epoch in range(epoch_str, hps.train.epochs + 1): + print("start training from epoch %s"%epoch) if rank == 0: train_and_evaluate( rank, @@ -226,6 +228,7 @@ def run(rank, n_gpus, hps): None, ) scheduler_g.step() + print("training done") def train_and_evaluate( rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers