diff --git a/GPT_SoVITS/AR/data/bucket_sampler.py b/GPT_SoVITS/AR/data/bucket_sampler.py index d8457334..8f0ecefa 100644 --- a/GPT_SoVITS/AR/data/bucket_sampler.py +++ b/GPT_SoVITS/AR/data/bucket_sampler.py @@ -36,14 +36,16 @@ class DistributedBucketSampler(Sampler[T_co]): drop_last: bool = False, batch_size: int = 32, ) -> None: + # Patched: support non-DDP single-GPU runs (Lightning strategy='auto' on + # Windows). When the distributed group isn't initialized, fall back to + # a single-replica configuration. + _dist_ready = ( + dist.is_available() and dist.is_initialized() and torch.cuda.is_available() + ) if num_replicas is None: - if not dist.is_available(): - raise RuntimeError("Requires distributed package to be available") - num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1 + num_replicas = dist.get_world_size() if _dist_ready else 1 if rank is None: - if not dist.is_available(): - raise RuntimeError("Requires distributed package to be available") - rank = dist.get_rank() if torch.cuda.is_available() else 0 + rank = dist.get_rank() if _dist_ready else 0 if torch.cuda.is_available(): torch.cuda.set_device(rank) if rank >= num_replicas or rank < 0: diff --git a/GPT_SoVITS/s1_train.py b/GPT_SoVITS/s1_train.py index 1176f0bc..b98de600 100644 --- a/GPT_SoVITS/s1_train.py +++ b/GPT_SoVITS/s1_train.py @@ -114,12 +114,18 @@ def main(args): # val_check_interval=9999999999999999999999,###不要验证 # check_val_every_n_epoch=None, limit_val_batches=0, - devices=-1 if torch.cuda.is_available() else 1, + # On Windows, force single-device (no DDP) — see strategy comment below. + # Non-Windows preserves original "all GPUs" behaviour. + devices=(1 if platform.system() == "Windows" else -1) if torch.cuda.is_available() else 1, benchmark=False, fast_dev_run=False, - strategy=DDPStrategy(process_group_backend="nccl" if platform.system() != "Windows" else "gloo") - if torch.cuda.is_available() - else "auto", + # On Windows, DDPStrategy with the gloo backend crashes with native + # access violations on Blackwell (sm_120) / CUDA 12.8. Lightning's + # "auto" strategy picks `single_device` for 1 GPU which avoids DDP + # entirely. Non-Windows behaviour is preserved (NCCL DDP). + strategy="auto" + if (platform.system() == "Windows" or not torch.cuda.is_available()) + else DDPStrategy(process_group_backend="nccl"), precision=config["train"]["precision"], logger=logger, num_sanity_val_steps=0,