mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-06-03 20:40:30 +08:00
Merge 551d3dc2812a9f8ba8ac91cfb5ed61b7806eb530 into 08d627c3338173c3229286d8787060d6559fe0f8
This commit is contained in:
commit
df29057d95
@ -36,14 +36,16 @@ class DistributedBucketSampler(Sampler[T_co]):
|
||||
drop_last: bool = False,
|
||||
batch_size: int = 32,
|
||||
) -> None:
|
||||
# Patched: support non-DDP single-GPU runs (Lightning strategy='auto' on
|
||||
# Windows). When the distributed group isn't initialized, fall back to
|
||||
# a single-replica configuration.
|
||||
_dist_ready = (
|
||||
dist.is_available() and dist.is_initialized() and torch.cuda.is_available()
|
||||
)
|
||||
if num_replicas is None:
|
||||
if not dist.is_available():
|
||||
raise RuntimeError("Requires distributed package to be available")
|
||||
num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1
|
||||
num_replicas = dist.get_world_size() if _dist_ready else 1
|
||||
if rank is None:
|
||||
if not dist.is_available():
|
||||
raise RuntimeError("Requires distributed package to be available")
|
||||
rank = dist.get_rank() if torch.cuda.is_available() else 0
|
||||
rank = dist.get_rank() if _dist_ready else 0
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.set_device(rank)
|
||||
if rank >= num_replicas or rank < 0:
|
||||
|
||||
@ -114,12 +114,18 @@ def main(args):
|
||||
# val_check_interval=9999999999999999999999,###不要验证
|
||||
# check_val_every_n_epoch=None,
|
||||
limit_val_batches=0,
|
||||
devices=-1 if torch.cuda.is_available() else 1,
|
||||
# On Windows, force single-device (no DDP) — see strategy comment below.
|
||||
# Non-Windows preserves original "all GPUs" behaviour.
|
||||
devices=(1 if platform.system() == "Windows" else -1) if torch.cuda.is_available() else 1,
|
||||
benchmark=False,
|
||||
fast_dev_run=False,
|
||||
strategy=DDPStrategy(process_group_backend="nccl" if platform.system() != "Windows" else "gloo")
|
||||
if torch.cuda.is_available()
|
||||
else "auto",
|
||||
# On Windows, DDPStrategy with the gloo backend crashes with native
|
||||
# access violations on Blackwell (sm_120) / CUDA 12.8. Lightning's
|
||||
# "auto" strategy picks `single_device` for 1 GPU which avoids DDP
|
||||
# entirely. Non-Windows behaviour is preserved (NCCL DDP).
|
||||
strategy="auto"
|
||||
if (platform.system() == "Windows" or not torch.cuda.is_available())
|
||||
else DDPStrategy(process_group_backend="nccl"),
|
||||
precision=config["train"]["precision"],
|
||||
logger=logger,
|
||||
num_sanity_val_steps=0,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user