diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index ee09962..4fe8045 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -49,7 +49,7 @@ is_share = os.environ.get("is_share", "False") is_share = eval(is_share) if "_CUDA_VISIBLE_DEVICES" in os.environ: os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] -is_half = eval(os.environ.get("is_half", "True")) and not torch.backends.mps.is_available() +is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available() import gradio as gr from transformers import AutoModelForMaskedLM, AutoTokenizer import numpy as np @@ -69,7 +69,7 @@ from tools.i18n.i18n import I18nAuto i18n = I18nAuto() -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 if torch.cuda.is_available(): device = "cuda" diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index 5873164..b241382 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -49,8 +49,8 @@ if os.path.exists(txt_path) == False: os.makedirs(bert_dir, exist_ok=True) if torch.cuda.is_available(): device = "cuda:0" - elif torch.backends.mps.is_available(): - device = "mps" + # elif torch.backends.mps.is_available(): + # device = "mps" else: device = "cpu" tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 7607259..9a2f73c 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -50,8 +50,8 @@ maxx=0.95 alpha=0.5 if torch.cuda.is_available(): device = "cuda:0" -elif torch.backends.mps.is_available(): - device = "mps" +# elif torch.backends.mps.is_available(): +# device = "mps" else: device = "cpu" model=cnhubert.get_model() diff --git a/GPT_SoVITS/prepare_datasets/3-get-semantic.py b/GPT_SoVITS/prepare_datasets/3-get-semantic.py index 9ab56a4..3448a58 100644 --- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py +++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py @@ -40,8 +40,8 @@ if os.path.exists(semantic_path) == False: if torch.cuda.is_available(): device = "cuda" - elif torch.backends.mps.is_available(): - device = "mps" + # elif torch.backends.mps.is_available(): + # device = "mps" else: device = "cpu" hps = utils.get_hparams_from_file(s2config_path) diff --git a/GPT_SoVITS/s1_train.py b/GPT_SoVITS/s1_train.py index fb27354..43cfa19 100644 --- a/GPT_SoVITS/s1_train.py +++ b/GPT_SoVITS/s1_train.py @@ -118,16 +118,16 @@ def main(args): os.environ["MASTER_ADDR"]="localhost" trainer: Trainer = Trainer( max_epochs=config["train"]["epochs"], - accelerator="gpu", + accelerator="gpu" if torch.cuda.is_available() else "cpu", # val_check_interval=9999999999999999999999,###不要验证 # check_val_every_n_epoch=None, limit_val_batches=0, - devices=-1, + devices=-1 if torch.cuda.is_available() else 1, benchmark=False, fast_dev_run=False, - strategy = "auto" if torch.backends.mps.is_available() else DDPStrategy( + strategy = DDPStrategy( process_group_backend="nccl" if platform.system() != "Windows" else "gloo" - ), # mps 不支持多节点训练 + ) if torch.cuda.is_available() else "auto", precision=config["train"]["precision"], logger=logger, num_sanity_val_steps=0, diff --git a/GPT_SoVITS/s2_train.py b/GPT_SoVITS/s2_train.py index e6b64f6..4f0ca4c 100644 --- a/GPT_SoVITS/s2_train.py +++ b/GPT_SoVITS/s2_train.py @@ -41,15 +41,15 @@ torch.set_float32_matmul_precision("medium") # 最低精度但最快(也就 # from config import pretrained_s2G,pretrained_s2D global_step = 0 +device = "cpu" # cuda以外的设备,等mps优化后加入 + def main(): - """Assume Single Node Multi GPUs Training Only""" - assert torch.cuda.is_available() or torch.backends.mps.is_available(), "Only GPU training is allowed." - if torch.backends.mps.is_available(): - n_gpus = 1 - else: + if torch.cuda.is_available(): n_gpus = torch.cuda.device_count() + else: + n_gpus = 1 os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(randint(20000, 55555)) @@ -73,7 +73,7 @@ def run(rank, n_gpus, hps): writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval")) dist.init_process_group( - backend = "gloo" if os.name == "nt" or torch.backends.mps.is_available() else "nccl", + backend = "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl", init_method="env://", world_size=n_gpus, rank=rank, @@ -137,9 +137,9 @@ def run(rank, n_gpus, hps): hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **hps.model, - ).to("mps") + ).to(device) - net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to("mps") + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to(device) for name, param in net_g.named_parameters(): if not param.requires_grad: print(name, "not requires_grad") @@ -187,8 +187,8 @@ def run(rank, n_gpus, hps): net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) else: - net_g = net_g.to("mps") - net_d = net_d.to("mps") + net_g = net_g.to(device) + net_d = net_d.to(device) try: # 如果能加载自动resume _, _, _, epoch_str = utils.load_checkpoint( @@ -320,12 +320,12 @@ def train_and_evaluate( rank, non_blocking=True ) else: - spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps") - y, y_lengths = y.to("mps"), y_lengths.to("mps") - ssl = ssl.to("mps") + spec, spec_lengths = spec.to(device), spec_lengths.to(device) + y, y_lengths = y.to(device), y_lengths.to(device) + ssl = ssl.to(device) ssl.requires_grad = False # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True) - text, text_lengths = text.to("mps"), text_lengths.to("mps") + text, text_lengths = text.to(device), text_lengths.to(device) with autocast(enabled=hps.train.fp16_run): ( @@ -532,10 +532,10 @@ def evaluate(hps, generator, eval_loader, writer_eval): ssl = ssl.cuda() text, text_lengths = text.cuda(), text_lengths.cuda() else: - spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps") - y, y_lengths = y.to("mps"), y_lengths.to("mps") - ssl = ssl.to("mps") - text, text_lengths = text.to("mps"), text_lengths.to("mps") + spec, spec_lengths = spec.to(device), spec_lengths.to(device) + y, y_lengths = y.to(device), y_lengths.to(device) + ssl = ssl.to(device) + text, text_lengths = text.to(device), text_lengths.to(device) for test in [0, 1]: y_hat, mask, *_ = generator.module.infer( ssl, spec, spec_lengths, text, text_lengths, test=test diff --git a/api.py b/api.py index 754f076..34adfbe 100644 --- a/api.py +++ b/api.py @@ -13,7 +13,7 @@ `-dt` - `默认参考音频文本` `-dl` - `默认参考音频语种, "中文","英文","日文","zh","en","ja"` -`-d` - `推理设备, "cuda","cpu","mps"` +`-d` - `推理设备, "cuda","cpu"` `-a` - `绑定地址, 默认"127.0.0.1"` `-p` - `绑定端口, 默认9880, 可在 config.py 中指定` `-fp` - `覆盖 config.py 使用全精度` @@ -143,7 +143,7 @@ parser.add_argument("-dr", "--default_refer_path", type=str, default="", help=" parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本") parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种") -parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu / mps") +parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu") parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0") parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880") parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度") @@ -482,9 +482,6 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language): wav.seek(0) torch.cuda.empty_cache() - if device == "mps": - print('executed torch.mps.empty_cache()') - torch.mps.empty_cache() return StreamingResponse(wav, media_type="audio/wav") diff --git a/webui.py b/webui.py index fc8680e..e1c36e1 100644 --- a/webui.py +++ b/webui.py @@ -55,7 +55,7 @@ from scipy.io import wavfile from tools.my_utils import load_audio from multiprocessing import cpu_count -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu n_cpu=cpu_count() @@ -73,18 +73,19 @@ if torch.cuda.is_available() or ngpu != 0: if_gpu_ok = True # 至少有一张能用的N卡 gpu_infos.append("%s\t%s" % (i, gpu_name)) mem.append(int(torch.cuda.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4)) -# 判断是否支持mps加速 -if torch.backends.mps.is_available(): - if_gpu_ok = True - gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) - mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 +# # 判断是否支持mps加速 +# if torch.backends.mps.is_available(): +# if_gpu_ok = True +# gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) +# mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 if if_gpu_ok and len(gpu_infos) > 0: gpu_info = "\n".join(gpu_infos) default_batch_size = min(mem) // 2 else: - gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") - default_batch_size = 1 + gpu_info = ("%s\t%s" % ("0", "CPU")) + gpu_infos.append("%s\t%s" % ("0", "CPU")) + default_batch_size = psutil.virtual_memory().total/ 1024 / 1024 / 1024 / 2 gpus = "-".join([i[0] for i in gpu_infos]) pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth"