fix MCCL error on qy1

This commit is contained in:
KakaruHayate 2025-10-21 18:01:24 +08:00
parent 3a92c046f9
commit 2b64032cda
2 changed files with 2 additions and 2 deletions

View File

@ -93,7 +93,7 @@ def run(rank, n_gpus, hps):
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
dist.init_process_group(
backend = "mccl" if torch.musa.is_available() else ("gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"),
backend = "mccl" if musa_ddp else ("gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"),
init_method="env://?use_libuv=False",
world_size=n_gpus,
rank=rank,

View File

@ -91,7 +91,7 @@ def run(rank, n_gpus, hps):
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
dist.init_process_group(
backend= "mccl" if torch.musa.is_available() else ("gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"),
backend= "mccl" if musa_ddp else ("gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"),
init_method="env://?use_libuv=False",
world_size=n_gpus,
rank=rank,