From 1963eb01cc4333b3650678d3c19392d1a0b87ac6 Mon Sep 17 00:00:00 2001 From: Lion Date: Wed, 13 Mar 2024 22:09:24 +0800 Subject: [PATCH 1/2] support cpu training, use cpu training on mac --- GPT_SoVITS/inference_webui.py | 4 +-- GPT_SoVITS/prepare_datasets/1-get-text.py | 4 +-- .../prepare_datasets/2-get-hubert-wav32k.py | 4 +-- GPT_SoVITS/prepare_datasets/3-get-semantic.py | 4 +-- GPT_SoVITS/s1_train.py | 8 ++--- GPT_SoVITS/s2_train.py | 36 +++++++++---------- api.py | 7 ++-- webui.py | 17 ++++----- 8 files changed, 41 insertions(+), 43 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index ee09962..4fe8045 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -49,7 +49,7 @@ is_share = os.environ.get("is_share", "False") is_share = eval(is_share) if "_CUDA_VISIBLE_DEVICES" in os.environ: os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] -is_half = eval(os.environ.get("is_half", "True")) and not torch.backends.mps.is_available() +is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available() import gradio as gr from transformers import AutoModelForMaskedLM, AutoTokenizer import numpy as np @@ -69,7 +69,7 @@ from tools.i18n.i18n import I18nAuto i18n = I18nAuto() -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。 if torch.cuda.is_available(): device = "cuda" diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index 5873164..b241382 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -49,8 +49,8 @@ if os.path.exists(txt_path) == False: os.makedirs(bert_dir, exist_ok=True) if torch.cuda.is_available(): device = "cuda:0" - elif torch.backends.mps.is_available(): - device = "mps" + # elif torch.backends.mps.is_available(): + # device = "mps" else: device = "cpu" tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 7607259..9a2f73c 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -50,8 +50,8 @@ maxx=0.95 alpha=0.5 if torch.cuda.is_available(): device = "cuda:0" -elif torch.backends.mps.is_available(): - device = "mps" +# elif torch.backends.mps.is_available(): +# device = "mps" else: device = "cpu" model=cnhubert.get_model() diff --git a/GPT_SoVITS/prepare_datasets/3-get-semantic.py b/GPT_SoVITS/prepare_datasets/3-get-semantic.py index 9ab56a4..3448a58 100644 --- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py +++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py @@ -40,8 +40,8 @@ if os.path.exists(semantic_path) == False: if torch.cuda.is_available(): device = "cuda" - elif torch.backends.mps.is_available(): - device = "mps" + # elif torch.backends.mps.is_available(): + # device = "mps" else: device = "cpu" hps = utils.get_hparams_from_file(s2config_path) diff --git a/GPT_SoVITS/s1_train.py b/GPT_SoVITS/s1_train.py index fb27354..43cfa19 100644 --- a/GPT_SoVITS/s1_train.py +++ b/GPT_SoVITS/s1_train.py @@ -118,16 +118,16 @@ def main(args): os.environ["MASTER_ADDR"]="localhost" trainer: Trainer = Trainer( max_epochs=config["train"]["epochs"], - accelerator="gpu", + accelerator="gpu" if torch.cuda.is_available() else "cpu", # val_check_interval=9999999999999999999999,###不要验证 # check_val_every_n_epoch=None, limit_val_batches=0, - devices=-1, + devices=-1 if torch.cuda.is_available() else 1, benchmark=False, fast_dev_run=False, - strategy = "auto" if torch.backends.mps.is_available() else DDPStrategy( + strategy = DDPStrategy( process_group_backend="nccl" if platform.system() != "Windows" else "gloo" - ), # mps 不支持多节点训练 + ) if torch.cuda.is_available() else "auto", precision=config["train"]["precision"], logger=logger, num_sanity_val_steps=0, diff --git a/GPT_SoVITS/s2_train.py b/GPT_SoVITS/s2_train.py index e6b64f6..4f0ca4c 100644 --- a/GPT_SoVITS/s2_train.py +++ b/GPT_SoVITS/s2_train.py @@ -41,15 +41,15 @@ torch.set_float32_matmul_precision("medium") # 最低精度但最快(也就 # from config import pretrained_s2G,pretrained_s2D global_step = 0 +device = "cpu" # cuda以外的设备,等mps优化后加入 + def main(): - """Assume Single Node Multi GPUs Training Only""" - assert torch.cuda.is_available() or torch.backends.mps.is_available(), "Only GPU training is allowed." - if torch.backends.mps.is_available(): - n_gpus = 1 - else: + if torch.cuda.is_available(): n_gpus = torch.cuda.device_count() + else: + n_gpus = 1 os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(randint(20000, 55555)) @@ -73,7 +73,7 @@ def run(rank, n_gpus, hps): writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval")) dist.init_process_group( - backend = "gloo" if os.name == "nt" or torch.backends.mps.is_available() else "nccl", + backend = "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl", init_method="env://", world_size=n_gpus, rank=rank, @@ -137,9 +137,9 @@ def run(rank, n_gpus, hps): hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **hps.model, - ).to("mps") + ).to(device) - net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to("mps") + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to(device) for name, param in net_g.named_parameters(): if not param.requires_grad: print(name, "not requires_grad") @@ -187,8 +187,8 @@ def run(rank, n_gpus, hps): net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) else: - net_g = net_g.to("mps") - net_d = net_d.to("mps") + net_g = net_g.to(device) + net_d = net_d.to(device) try: # 如果能加载自动resume _, _, _, epoch_str = utils.load_checkpoint( @@ -320,12 +320,12 @@ def train_and_evaluate( rank, non_blocking=True ) else: - spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps") - y, y_lengths = y.to("mps"), y_lengths.to("mps") - ssl = ssl.to("mps") + spec, spec_lengths = spec.to(device), spec_lengths.to(device) + y, y_lengths = y.to(device), y_lengths.to(device) + ssl = ssl.to(device) ssl.requires_grad = False # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True) - text, text_lengths = text.to("mps"), text_lengths.to("mps") + text, text_lengths = text.to(device), text_lengths.to(device) with autocast(enabled=hps.train.fp16_run): ( @@ -532,10 +532,10 @@ def evaluate(hps, generator, eval_loader, writer_eval): ssl = ssl.cuda() text, text_lengths = text.cuda(), text_lengths.cuda() else: - spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps") - y, y_lengths = y.to("mps"), y_lengths.to("mps") - ssl = ssl.to("mps") - text, text_lengths = text.to("mps"), text_lengths.to("mps") + spec, spec_lengths = spec.to(device), spec_lengths.to(device) + y, y_lengths = y.to(device), y_lengths.to(device) + ssl = ssl.to(device) + text, text_lengths = text.to(device), text_lengths.to(device) for test in [0, 1]: y_hat, mask, *_ = generator.module.infer( ssl, spec, spec_lengths, text, text_lengths, test=test diff --git a/api.py b/api.py index 754f076..34adfbe 100644 --- a/api.py +++ b/api.py @@ -13,7 +13,7 @@ `-dt` - `默认参考音频文本` `-dl` - `默认参考音频语种, "中文","英文","日文","zh","en","ja"` -`-d` - `推理设备, "cuda","cpu","mps"` +`-d` - `推理设备, "cuda","cpu"` `-a` - `绑定地址, 默认"127.0.0.1"` `-p` - `绑定端口, 默认9880, 可在 config.py 中指定` `-fp` - `覆盖 config.py 使用全精度` @@ -143,7 +143,7 @@ parser.add_argument("-dr", "--default_refer_path", type=str, default="", help=" parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本") parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种") -parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu / mps") +parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu") parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0") parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880") parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度") @@ -482,9 +482,6 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language): wav.seek(0) torch.cuda.empty_cache() - if device == "mps": - print('executed torch.mps.empty_cache()') - torch.mps.empty_cache() return StreamingResponse(wav, media_type="audio/wav") diff --git a/webui.py b/webui.py index fc8680e..e1c36e1 100644 --- a/webui.py +++ b/webui.py @@ -55,7 +55,7 @@ from scipy.io import wavfile from tools.my_utils import load_audio from multiprocessing import cpu_count -os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu +# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu n_cpu=cpu_count() @@ -73,18 +73,19 @@ if torch.cuda.is_available() or ngpu != 0: if_gpu_ok = True # 至少有一张能用的N卡 gpu_infos.append("%s\t%s" % (i, gpu_name)) mem.append(int(torch.cuda.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4)) -# 判断是否支持mps加速 -if torch.backends.mps.is_available(): - if_gpu_ok = True - gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) - mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 +# # 判断是否支持mps加速 +# if torch.backends.mps.is_available(): +# if_gpu_ok = True +# gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) +# mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 if if_gpu_ok and len(gpu_infos) > 0: gpu_info = "\n".join(gpu_infos) default_batch_size = min(mem) // 2 else: - gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") - default_batch_size = 1 + gpu_info = ("%s\t%s" % ("0", "CPU")) + gpu_infos.append("%s\t%s" % ("0", "CPU")) + default_batch_size = psutil.virtual_memory().total/ 1024 / 1024 / 1024 / 2 gpus = "-".join([i[0] for i in gpu_infos]) pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth" From 7822f1961bc2c0fee71f6debe2ed0566a1423ed4 Mon Sep 17 00:00:00 2001 From: Lion Date: Wed, 13 Mar 2024 22:36:20 +0800 Subject: [PATCH 2/2] update README --- README.md | 13 ++----------- docs/cn/README.md | 13 +++---------- docs/ja/README.md | 11 +---------- docs/ko/README.md | 11 +---------- 4 files changed, 7 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 0b0e2d4..96f31b7 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Unseen speakers few-shot fine-tuning demo: https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb -[教程中文版](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) [User guide (EN)](https://rentry.co/GPT-SoVITS-guide#/) +**User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)** ## Installation @@ -61,13 +61,7 @@ bash install.sh ### macOS -Only Macs that meet the following conditions can train models: - -- Mac computers with Apple silicon -- macOS 12.3 or later -- Xcode command-line tools installed by running `xcode-select --install` - -**All Macs can do inference with CPU, which has been demonstrated to outperform GPU inference.** +**Note: The models trained with GPUs on Macs result in significantly lower quality compared to those trained on other devices, so we are temporarily using CPUs instead.** First make sure you have installed FFmpeg by running `brew install ffmpeg` or `conda install ffmpeg`, then install by using the following commands: @@ -75,12 +69,9 @@ First make sure you have installed FFmpeg by running `brew install ffmpeg` or `c conda create -n GPTSoVits python=3.9 conda activate GPTSoVits -pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu pip install -r requirements.txt ``` -_Note: Training models will only work if you've installed PyTorch Nightly._ - ### Install Manually #### Install Dependences diff --git a/docs/cn/README.md b/docs/cn/README.md index 1f31ecc..161a12d 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -33,6 +33,8 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb +**用户手册: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)** + ## 安装 中国地区用户可[点击此处](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official)使用 AutoDL 云端镜像进行体验。 @@ -59,13 +61,7 @@ bash install.sh ### macOS -只有符合以下条件的 Mac 可以训练模型: - -- 搭载 Apple 芯片的 Mac -- 运行macOS 12.3 或更高版本 -- 已通过运行`xcode-select --install`安装 Xcode command-line tools - -**所有 Mac 都可使用 CPU 进行推理,且已测试性能优于 GPU。** +**注:在 Mac 上使用 GPU 训练的模型效果显著低于其他设备训练的模型,所以我们暂时使用CPU进行训练。** 首先确保你已通过运行 `brew install ffmpeg` 或 `conda install ffmpeg` 安装 FFmpeg,然后运行以下命令安装: @@ -73,12 +69,9 @@ bash install.sh conda create -n GPTSoVits python=3.9 conda activate GPTSoVits -pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu pip install -r requirements.txt ``` -_注:只有安装了Pytorch Nightly才可训练模型。_ - ### 手动安装 #### 安装依赖 diff --git a/docs/ja/README.md b/docs/ja/README.md index 88ed865..5d9b4de 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -57,13 +57,7 @@ bash install.sh ### macOS -モデルをトレーニングできるMacは、以下の条件を満たす必要があります: - -- Appleシリコンを搭載したMacコンピュータ -- macOS 12.3以降 -- `xcode-select --install`を実行してインストールされたXcodeコマンドラインツール - -**すべてのMacはCPUを使用して推論を行うことができ、GPU推論よりも優れていることが実証されています。** +**注:MacでGPUを使用して訓練されたモデルは、他のデバイスで訓練されたモデルと比較して著しく品質が低下するため、当面はCPUを使用して訓練します。** まず、`brew install ffmpeg`または`conda install ffmpeg`を実行してFFmpegをインストールしたことを確認してください。次に、以下のコマンドを使用してインストールします: @@ -71,12 +65,9 @@ bash install.sh conda create -n GPTSoVits python=3.9 conda activate GPTSoVits -pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu pip install -r requirements.txt ``` -_注:PyTorch Nightlyをインストールした場合にのみ、モデルのトレーニングが可能です。_ - ### 手動インストール #### 依存関係をインストールします diff --git a/docs/ko/README.md b/docs/ko/README.md index cc390b0..5fbff3b 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -57,13 +57,7 @@ bash install.sh ### macOS -다음 조건을 충족하는 Mac에서만 모델을 훈련할 수 있습니다: - -- Apple 실리콘을 탑재한 Mac -- macOS 12.3 이상 버전 -- `xcode-select --install`을 실행하여 Xcode 명령줄 도구가 설치됨 - -**모든 Mac은 CPU를 사용하여 추론할 수 있으며, GPU 추론보다 우수한 성능을 보여주었습니다.** +**주의: Mac에서 GPU로 훈련된 모델은 다른 장치에서 훈련된 모델에 비해 현저히 낮은 품질을 나타내므로, 우리는 일시적으로 CPU를 사용하여 훈련하고 있습니다.** 먼저 `brew install ffmpeg` 또는 `conda install ffmpeg`를 실행하여 FFmpeg가 설치되었는지 확인한 다음, 다음 명령어를 사용하여 설치하세요: @@ -71,12 +65,9 @@ bash install.sh conda create -n GPTSoVits python=3.9 conda activate GPTSoVits -pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu pip install -r requirements.txt ``` -_참고: PyTorch Nightly가 설치되어야만 모델을 훈련할 수 있습니다._ - ### 수동 설치 #### 의존성 설치