Merge 551d3dc2812a9f8ba8ac91cfb5ed61b7806eb530 into 08d627c3338173c3229286d8787060d6559fe0f8

2026-06-03 20:40:30 +08:00 · 2026-05-16 19:11:17 -07:00 · 2026-05-16 19:11:17 -07:00 · df29057d95
commit df29057d95
parent 08d627c333 551d3dc281
2 changed files with 18 additions and 10 deletions
--- a/GPT_SoVITS/AR/data/bucket_sampler.py
+++ b/GPT_SoVITS/AR/data/bucket_sampler.py
@ -36,14 +36,16 @@ class DistributedBucketSampler(Sampler[T_co]):
        drop_last: bool = False,
        batch_size: int = 32,
    ) -> None:
+        # Patched: support non-DDP single-GPU runs (Lightning strategy='auto' on
+        # Windows). When the distributed group isn't initialized, fall back to
+        # a single-replica configuration.
+        _dist_ready = (
+            dist.is_available() and dist.is_initialized() and torch.cuda.is_available()
+        )
        if num_replicas is None:
-            if not dist.is_available():
-                raise RuntimeError("Requires distributed package to be available")
-            num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1
+            num_replicas = dist.get_world_size() if _dist_ready else 1
        if rank is None:
-            if not dist.is_available():
-                raise RuntimeError("Requires distributed package to be available")
-            rank = dist.get_rank() if torch.cuda.is_available() else 0
+            rank = dist.get_rank() if _dist_ready else 0
            if torch.cuda.is_available():
                torch.cuda.set_device(rank)
        if rank >= num_replicas or rank < 0:
--- a/GPT_SoVITS/s1_train.py
+++ b/GPT_SoVITS/s1_train.py
@ -114,12 +114,18 @@ def main(args):
        # val_check_interval=9999999999999999999999,###不要验证
        # check_val_every_n_epoch=None,
        limit_val_batches=0,
-        devices=-1 if torch.cuda.is_available() else 1,
+        # On Windows, force single-device (no DDP) — see strategy comment below.
+        # Non-Windows preserves original "all GPUs" behaviour.
+        devices=(1 if platform.system() == "Windows" else -1) if torch.cuda.is_available() else 1,
        benchmark=False,
        fast_dev_run=False,
-        strategy=DDPStrategy(process_group_backend="nccl" if platform.system() != "Windows" else "gloo")
-        if torch.cuda.is_available()
-        else "auto",
+        # On Windows, DDPStrategy with the gloo backend crashes with native
+        # access violations on Blackwell (sm_120) / CUDA 12.8. Lightning's
+        # "auto" strategy picks `single_device` for 1 GPU which avoids DDP
+        # entirely. Non-Windows behaviour is preserved (NCCL DDP).
+        strategy="auto"
+        if (platform.system() == "Windows" or not torch.cuda.is_available())
+        else DDPStrategy(process_group_backend="nccl"),
        precision=config["train"]["precision"],
        logger=logger,
        num_sanity_val_steps=0,