From 780383d5bd0d09a4f132b5ab1e80c04c9606b48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=9C=E4=BA=91?= Date: Sat, 18 Apr 2026 16:54:26 +0800 Subject: [PATCH] =?UTF-8?q?[codex]=20Improve=20Windows=20single-GPU=20v3?= =?UTF-8?q?=20LoRA=20training=20/=20=E6=94=B9=E8=BF=9B=20Windows=20?= =?UTF-8?q?=E5=8D=95=E5=8D=A1=20v3=20LoRA=20=E8=AE=AD=E7=BB=83=E6=B5=81?= =?UTF-8?q?=E7=A8=8B=20(#2767)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve Windows single-GPU v3 LoRA training * Drop unrelated checkpoint helper change from PR * Tighten PR scope to single-GPU training path fixes --- GPT_SoVITS/s2_train_v3_lora.py | 41 +++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/GPT_SoVITS/s2_train_v3_lora.py b/GPT_SoVITS/s2_train_v3_lora.py index ff62ccfe..f0b96fb8 100644 --- a/GPT_SoVITS/s2_train_v3_lora.py +++ b/GPT_SoVITS/s2_train_v3_lora.py @@ -55,6 +55,10 @@ def main(): n_gpus = torch.cuda.device_count() else: n_gpus = 1 + if n_gpus <= 1: + run(0, n_gpus, hps) + return + os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(randint(20000, 55555)) @@ -77,12 +81,14 @@ def run(rank, n_gpus, hps): writer = SummaryWriter(log_dir=hps.s2_ckpt_dir) writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval")) - dist.init_process_group( - backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl", - init_method="env://?use_libuv=False", - world_size=n_gpus, - rank=rank, - ) + use_ddp = n_gpus > 1 + if use_ddp: + dist.init_process_group( + backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl", + init_method="env://?use_libuv=False", + world_size=n_gpus, + rank=rank, + ) torch.manual_seed(hps.train.seed) if torch.cuda.is_available(): torch.cuda.set_device(rank) @@ -118,15 +124,20 @@ def run(rank, n_gpus, hps): shuffle=True, ) collate_fn = TextAudioSpeakerCollate() - train_loader = DataLoader( - train_dataset, - num_workers=5, + worker_count = 0 if os.name == "nt" and n_gpus <= 1 else min(2 if os.name == "nt" else 5, os.cpu_count() or 1) + loader_kwargs = dict( + num_workers=worker_count, shuffle=False, - pin_memory=True, + pin_memory=torch.cuda.is_available(), collate_fn=collate_fn, batch_sampler=train_sampler, - persistent_workers=True, - prefetch_factor=3, + ) + if worker_count > 0: + loader_kwargs["persistent_workers"] = True + loader_kwargs["prefetch_factor"] = 2 if os.name == "nt" else 3 + train_loader = DataLoader( + train_dataset, + **loader_kwargs, ) save_root = "%s/logs_s2_%s_lora_%s" % (hps.data.exp_dir, hps.model.version, hps.train.lora_rank) os.makedirs(save_root, exist_ok=True) @@ -156,7 +167,9 @@ def run(rank, n_gpus, hps): def model2cuda(net_g, rank): if torch.cuda.is_available(): - net_g = DDP(net_g.cuda(rank), device_ids=[rank], find_unused_parameters=True) + net_g = net_g.cuda(rank) + if use_ddp: + net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) else: net_g = net_g.to(device) return net_g @@ -242,6 +255,8 @@ def run(rank, n_gpus, hps): None, ) scheduler_g.step() + if use_ddp and dist.is_initialized(): + dist.destroy_process_group() print("training done")