mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-06-03 20:40:30 +08:00
fix: apply same Windows single-GPU gloo bypass to s2_train_v3 and s2_train_v3_lora
Extend the fix to v3 and LoRA training scripts: - s2_train_v3.py: skip dist.init_process_group() + DummyDDP for Windows single-GPU - s2_train_v3_lora.py: same fix applied to LoRA fine-tuning script
This commit is contained in:
parent
832e5b6160
commit
4820d5a101
@ -77,6 +77,7 @@ def run(rank, n_gpus, hps):
|
|||||||
writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
|
writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
|
||||||
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
|
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
|
||||||
|
|
||||||
|
if not (os.name == "nt" and n_gpus == 1):
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
|
backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
|
||||||
init_method="env://?use_libuv=False",
|
init_method="env://?use_libuv=False",
|
||||||
@ -166,6 +167,16 @@ def run(rank, n_gpus, hps):
|
|||||||
# eps=hps.train.eps,
|
# eps=hps.train.eps,
|
||||||
# )
|
# )
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
|
if os.name == "nt" and n_gpus == 1:
|
||||||
|
class DummyDDP(torch.nn.Module):
|
||||||
|
def __init__(self, module):
|
||||||
|
super().__init__()
|
||||||
|
self.module = module
|
||||||
|
def forward(self, *args, **kwargs):
|
||||||
|
return self.module(*args, **kwargs)
|
||||||
|
net_g = DummyDDP(net_g)
|
||||||
|
# net_d = DummyDDP(net_d)
|
||||||
|
else:
|
||||||
net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
|
net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
|
||||||
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -77,6 +77,7 @@ def run(rank, n_gpus, hps):
|
|||||||
writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
|
writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
|
||||||
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
|
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
|
||||||
|
|
||||||
|
if not (os.name == "nt" and n_gpus == 1):
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
|
backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
|
||||||
init_method="env://?use_libuv=False",
|
init_method="env://?use_libuv=False",
|
||||||
@ -156,6 +157,15 @@ def run(rank, n_gpus, hps):
|
|||||||
|
|
||||||
def model2cuda(net_g, rank):
|
def model2cuda(net_g, rank):
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
|
if os.name == "nt" and n_gpus == 1:
|
||||||
|
class DummyDDP(torch.nn.Module):
|
||||||
|
def __init__(self, module):
|
||||||
|
super().__init__()
|
||||||
|
self.module = module
|
||||||
|
def forward(self, *args, **kwargs):
|
||||||
|
return self.module(*args, **kwargs)
|
||||||
|
net_g = DummyDDP(net_g.cuda(rank))
|
||||||
|
else:
|
||||||
net_g = DDP(net_g.cuda(rank), device_ids=[rank], find_unused_parameters=True)
|
net_g = DDP(net_g.cuda(rank), device_ids=[rank], find_unused_parameters=True)
|
||||||
else:
|
else:
|
||||||
net_g = net_g.to(device)
|
net_g = net_g.to(device)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user