google-labs-jules[bot] d3b8f7e09e feat: Migrate from CUDA to XPU for Intel GPU support
This commit migrates the project from using NVIDIA CUDA to Intel XPU for GPU acceleration, based on the PyTorch 2.9 release.

Key changes include:
- Replaced `torch.cuda` with `torch.xpu` for device checks, memory management, and distributed training.
- Updated device strings from "cuda" to "xpu" across the codebase.
- Switched the distributed training backend from "nccl" to "ccl" for Intel GPUs.
- Disabled custom CUDA kernels in the `BigVGAN` module by setting `use_cuda_kernel=False`.
- Updated `requirements.txt` to include `torch==2.9` and `intel-extension-for-pytorch`.
- Modified CI/CD pipelines and build scripts to remove CUDA dependencies and build for an XPU target.
2025-11-10 13:09:27 +00:00

88 lines
2.6 KiB
Python

# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
# LICENSE is in incl_licenses directory.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import argparse
import json
import torch
import librosa
from utils import load_checkpoint
from meldataset import get_mel_spectrogram
from scipy.io.wavfile import write
from env import AttrDict
from meldataset import MAX_WAV_VALUE
from bigvgan import BigVGAN as Generator
h = None
device = None
torch.backends.cudnn.benchmark = False
def inference(a, h):
generator = Generator(h, use_cuda_kernel=a.use_cuda_kernel).to(device)
state_dict_g = load_checkpoint(a.checkpoint_file, device)
generator.load_state_dict(state_dict_g["generator"])
filelist = os.listdir(a.input_wavs_dir)
os.makedirs(a.output_dir, exist_ok=True)
generator.eval()
generator.remove_weight_norm()
with torch.no_grad():
for i, filname in enumerate(filelist):
# Load the ground truth audio and resample if necessary
wav, sr = librosa.load(os.path.join(a.input_wavs_dir, filname), sr=h.sampling_rate, mono=True)
wav = torch.FloatTensor(wav).to(device)
# Compute mel spectrogram from the ground truth audio
x = get_mel_spectrogram(wav.unsqueeze(0), generator.h)
y_g_hat = generator(x)
audio = y_g_hat.squeeze()
audio = audio * MAX_WAV_VALUE
audio = audio.cpu().numpy().astype("int16")
output_file = os.path.join(a.output_dir, os.path.splitext(filname)[0] + "_generated.wav")
write(output_file, h.sampling_rate, audio)
print(output_file)
def main():
print("Initializing Inference Process..")
parser = argparse.ArgumentParser()
parser.add_argument("--input_wavs_dir", default="test_files")
parser.add_argument("--output_dir", default="generated_files")
parser.add_argument("--checkpoint_file", required=True)
# --use_cuda_kernel argument is removed to disable custom CUDA kernels.
# parser.add_argument("--use_cuda_kernel", action="store_true", default=False)
a = parser.parse_args()
a.use_cuda_kernel = False
config_file = os.path.join(os.path.split(a.checkpoint_file)[0], "config.json")
with open(config_file) as f:
data = f.read()
global h
json_config = json.loads(data)
h = AttrDict(json_config)
torch.manual_seed(h.seed)
global device
if torch.xpu.is_available():
torch.xpu.manual_seed(h.seed)
device = torch.device("xpu")
else:
device = torch.device("cpu")
inference(a, h)
if __name__ == "__main__":
main()