diff --git a/GPT_SoVITS/AR/data/bucket_sampler.py b/GPT_SoVITS/AR/data/bucket_sampler.py index 7d752db..647491f 100644 --- a/GPT_SoVITS/AR/data/bucket_sampler.py +++ b/GPT_SoVITS/AR/data/bucket_sampler.py @@ -41,12 +41,13 @@ class DistributedBucketSampler(Sampler[T_co]): if num_replicas is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") - num_replicas = dist.get_world_size() + num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1 if rank is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") - rank = dist.get_rank() - torch.cuda.set_device(rank) + rank = dist.get_rank() if torch.cuda.is_available() else 0 + if torch.cuda.is_available(): + torch.cuda.set_device(rank) if rank >= num_replicas or rank < 0: raise ValueError( "Invalid rank {}, rank should be in the interval" diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 1d417b1..79e4a82 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -35,9 +35,11 @@ from my_utils import load_audio from tools.i18n.i18n import I18nAuto i18n = I18nAuto() +os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 + if torch.cuda.is_available(): device = "cuda" -elif torch.mps.is_available(): +elif torch.backends.mps.is_available(): device = "mps" else: device = "cpu" diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index 8579693..b4a145c 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -46,7 +46,7 @@ if os.path.exists(txt_path) == False: bert_dir = "%s/3-bert" % (opt_dir) os.makedirs(opt_dir, exist_ok=True) os.makedirs(bert_dir, exist_ok=True) - device = "cuda:0" + device = "cuda:0" if torch.cuda.is_available() else "mps" tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir) bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir) if is_half == True: diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 71b48a9..31e8068 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -47,7 +47,7 @@ os.makedirs(wav32dir,exist_ok=True) maxx=0.95 alpha=0.5 -device="cuda:0" +device="cuda:0" if torch.cuda.is_available() else "mps" model=cnhubert.get_model() # is_half=False if(is_half==True): diff --git a/GPT_SoVITS/prepare_datasets/3-get-semantic.py b/GPT_SoVITS/prepare_datasets/3-get-semantic.py index 7cee6e4..69eea07 100644 --- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py +++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py @@ -38,7 +38,7 @@ semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part) if os.path.exists(semantic_path) == False: os.makedirs(opt_dir, exist_ok=True) - device = "cuda:0" + device = "cuda:0" if torch.cuda.is_available() else "mps" hps = utils.get_hparams_from_file(s2config_path) vq_model = SynthesizerTrn( hps.data.filter_length // 2 + 1, diff --git a/GPT_SoVITS/s1_train.py b/GPT_SoVITS/s1_train.py index db7b9a3..30c167e 100644 --- a/GPT_SoVITS/s1_train.py +++ b/GPT_SoVITS/s1_train.py @@ -116,7 +116,7 @@ def main(args): devices=-1, benchmark=False, fast_dev_run=False, - strategy = "auto" if torch.mps.is_available() else DDPStrategy( + strategy = "auto" if torch.backends.mps.is_available() else DDPStrategy( process_group_backend="nccl" if platform.system() != "Windows" else "gloo" ), # mps 不支持多节点训练 precision=config["train"]["precision"], diff --git a/GPT_SoVITS/s2_train.py b/GPT_SoVITS/s2_train.py index d2ec262..e6b64f6 100644 --- a/GPT_SoVITS/s2_train.py +++ b/GPT_SoVITS/s2_train.py @@ -44,9 +44,12 @@ global_step = 0 def main(): """Assume Single Node Multi GPUs Training Only""" - assert torch.cuda.is_available(), "CPU training is not allowed." + assert torch.cuda.is_available() or torch.backends.mps.is_available(), "Only GPU training is allowed." - n_gpus = torch.cuda.device_count() + if torch.backends.mps.is_available(): + n_gpus = 1 + else: + n_gpus = torch.cuda.device_count() os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(randint(20000, 55555)) @@ -70,13 +73,14 @@ def run(rank, n_gpus, hps): writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval")) dist.init_process_group( - backend="gloo" if os.name == "nt" else "nccl", + backend = "gloo" if os.name == "nt" or torch.backends.mps.is_available() else "nccl", init_method="env://", world_size=n_gpus, rank=rank, ) torch.manual_seed(hps.train.seed) - torch.cuda.set_device(rank) + if torch.cuda.is_available(): + torch.cuda.set_device(rank) train_dataset = TextAudioSpeakerLoader(hps.data) ######## train_sampler = DistributedBucketSampler( @@ -128,9 +132,14 @@ def run(rank, n_gpus, hps): hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **hps.model, - ).cuda(rank) + ).cuda(rank) if torch.cuda.is_available() else SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + n_speakers=hps.data.n_speakers, + **hps.model, + ).to("mps") - net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to("mps") for name, param in net_g.named_parameters(): if not param.requires_grad: print(name, "not requires_grad") @@ -174,8 +183,12 @@ def run(rank, n_gpus, hps): betas=hps.train.betas, eps=hps.train.eps, ) - net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) - net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) + if torch.cuda.is_available(): + net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) + net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) + else: + net_g = net_g.to("mps") + net_d = net_d.to("mps") try: # 如果能加载自动resume _, _, _, epoch_str = utils.load_checkpoint( @@ -205,6 +218,9 @@ def run(rank, n_gpus, hps): net_g.module.load_state_dict( torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"], strict=False, + ) if torch.cuda.is_available() else net_g.load_state_dict( + torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"], + strict=False, ) ) ##测试不加载优化器 if hps.train.pretrained_s2D != "": @@ -213,6 +229,8 @@ def run(rank, n_gpus, hps): print( net_d.module.load_state_dict( torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"] + ) if torch.cuda.is_available() else net_d.load_state_dict( + torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"] ) ) @@ -288,18 +306,26 @@ def train_and_evaluate( text, text_lengths, ) in tqdm(enumerate(train_loader)): - spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda( - rank, non_blocking=True - ) - y, y_lengths = y.cuda(rank, non_blocking=True), y_lengths.cuda( - rank, non_blocking=True - ) - ssl = ssl.cuda(rank, non_blocking=True) - ssl.requires_grad = False - # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True) - text, text_lengths = text.cuda(rank, non_blocking=True), text_lengths.cuda( - rank, non_blocking=True - ) + if torch.cuda.is_available(): + spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda( + rank, non_blocking=True + ) + y, y_lengths = y.cuda(rank, non_blocking=True), y_lengths.cuda( + rank, non_blocking=True + ) + ssl = ssl.cuda(rank, non_blocking=True) + ssl.requires_grad = False + # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True) + text, text_lengths = text.cuda(rank, non_blocking=True), text_lengths.cuda( + rank, non_blocking=True + ) + else: + spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps") + y, y_lengths = y.to("mps"), y_lengths.to("mps") + ssl = ssl.to("mps") + ssl.requires_grad = False + # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True) + text, text_lengths = text.to("mps"), text_lengths.to("mps") with autocast(enabled=hps.train.fp16_run): ( @@ -500,13 +526,21 @@ def evaluate(hps, generator, eval_loader, writer_eval): text_lengths, ) in enumerate(eval_loader): print(111) - spec, spec_lengths = spec.cuda(), spec_lengths.cuda() - y, y_lengths = y.cuda(), y_lengths.cuda() - ssl = ssl.cuda() - text, text_lengths = text.cuda(), text_lengths.cuda() + if torch.cuda.is_available(): + spec, spec_lengths = spec.cuda(), spec_lengths.cuda() + y, y_lengths = y.cuda(), y_lengths.cuda() + ssl = ssl.cuda() + text, text_lengths = text.cuda(), text_lengths.cuda() + else: + spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps") + y, y_lengths = y.to("mps"), y_lengths.to("mps") + ssl = ssl.to("mps") + text, text_lengths = text.to("mps"), text_lengths.to("mps") for test in [0, 1]: y_hat, mask, *_ = generator.module.infer( ssl, spec, spec_lengths, text, text_lengths, test=test + ) if torch.cuda.is_available() else generator.infer( + ssl, spec, spec_lengths, text, text_lengths, test=test ) y_hat_lengths = mask.sum([1, 2]).long() * hps.data.hop_length diff --git a/config.py b/config.py index c9124bf..897f53c 100644 --- a/config.py +++ b/config.py @@ -17,7 +17,7 @@ exp_root = "logs" python_exec = sys.executable or "python" if torch.cuda.is_available(): infer_device = "cuda" -elif torch.mps.is_available(): +elif torch.backends.mps.is_available(): infer_device = "mps" else: infer_device = "cpu"