mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-06-16 16:06:53 +08:00
* Docker Auto-Build Workflow * Rename * Update * Fix Bugs * Disable Progress Bar When workflows triggered * Fix Wget * Fix Bugs * Fix Bugs * Update Wget * Update Workflows * Accelerate Docker Image Building * Fix Install.sh * Add Skip-Check For Action Runner * Fix Dockerfile * . * . * . * . * Delete File in Runner * Add Sort * Delete More Files * Delete More * . * . * . * Add Pre-Commit Hook Update Docker * Add Code Spell Check * [pre-commit.ci] trigger * [pre-commit.ci] trigger * [pre-commit.ci] trigger * Fix Bugs * . * Disable Progress Bar and Logs while using GitHub Actions * . * . * Fix Bugs * update conda * fix bugs * Fix Bugs * fix bugs * . * . * Quiet Installation * fix bugs * . * fix bug * . * Fix pre-commit.ci and Docker * fix bugs * . * Update Docker & Pre-Commit * fix bugs * Update Req * Update Req * Update OpenCC * update precommit * . * Update .pre-commit-config.yaml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update Docs and fix bugs * Fix \ * Fix MacOS * . * test * . * Add Tag Alias * . * fix bugs * fix bugs * make image smaller * update pre-commit config * . * . * fix bugs * use miniconda * Fix Wrong Path * . * debug * debug * revert * Fix Bugs * Update Docs, Add Dict Auto Download in install.sh * update docker_build * Update Docs for Install.sh * update docker docs about architecture * Add Xcode-Commandline-Tool Installation * Update Docs 1. Add Missing VC17 2. Modufied the Order of FFmpeg Installation and Requirements Installation 3. Remove Duplicate FFmpeg * Fix Wrong Cuda Version * Update TESTED ENV * Add PYTHONNOUSERSITE(-s) * Fix Wrapper * Update install.sh For Robustness * Ignore .git * Preload CUDNN For Ctranslate2 * Remove Gradio Warnings * Update Colab * Fix OpenCC Problems * Update Win DLL Strategy * Fix Onnxruntime-gpu NVRTC Error * Fix Path Problems * Add Windows Packages Workflow * WIP * WIP * WIP * WIP * WIP * WIP * . * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * Fix Path * Fix Path * Enable Logging * Set 7-Zip compression level to maximum (-mx=9) * Use Multithread in ONNX Session * Fix Tag Bugs * Add Time * Add Time * Add Time * Compress More * Copy DLL to Solve VC Runtime DLL Missing Issues * Expose FFmpeg Errors, Copy Only Part of Visual C++ Runtime * Update build_windows_packages.ps1 * Update build_windows_packages.ps1 * Update build_windows_packages.ps1 * Update build_windows_packages.ps1 * WIP * WIP * WIP * Update build_windows_packages.ps1 * Update install.sh * Update build_windows_packages.ps1 * Update docker-publish.yaml * Update install.sh * Update Dockerfile * Update docker_build.sh * Update miniconda_install.sh * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update Colab-WebUI.ipynb * Update Colab-Inference.ipynb * Update docker-compose.yaml * 更新 build_windows_packages.ps1 * Update install.sh --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
168 lines
5.5 KiB
Python
168 lines
5.5 KiB
Python
import os
|
|
import random
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torch.utils.data
|
|
from tqdm import tqdm
|
|
|
|
from . import spec_utils
|
|
|
|
|
|
class VocalRemoverValidationSet(torch.utils.data.Dataset):
|
|
def __init__(self, patch_list):
|
|
self.patch_list = patch_list
|
|
|
|
def __len__(self):
|
|
return len(self.patch_list)
|
|
|
|
def __getitem__(self, idx):
|
|
path = self.patch_list[idx]
|
|
data = np.load(path)
|
|
|
|
X, y = data["X"], data["y"]
|
|
|
|
X_mag = np.abs(X)
|
|
y_mag = np.abs(y)
|
|
|
|
return X_mag, y_mag
|
|
|
|
|
|
def make_pair(mix_dir, inst_dir):
|
|
input_exts = [".wav", ".m4a", ".mp3", ".mp4", ".flac"]
|
|
|
|
X_list = sorted(
|
|
[os.path.join(mix_dir, fname) for fname in os.listdir(mix_dir) if os.path.splitext(fname)[1] in input_exts]
|
|
)
|
|
y_list = sorted(
|
|
[os.path.join(inst_dir, fname) for fname in os.listdir(inst_dir) if os.path.splitext(fname)[1] in input_exts]
|
|
)
|
|
|
|
filelist = list(zip(X_list, y_list))
|
|
|
|
return filelist
|
|
|
|
|
|
def train_val_split(dataset_dir, split_mode, val_rate, val_filelist):
|
|
if split_mode == "random":
|
|
filelist = make_pair(
|
|
os.path.join(dataset_dir, "mixtures"),
|
|
os.path.join(dataset_dir, "instruments"),
|
|
)
|
|
|
|
random.shuffle(filelist)
|
|
|
|
if len(val_filelist) == 0:
|
|
val_size = int(len(filelist) * val_rate)
|
|
train_filelist = filelist[:-val_size]
|
|
val_filelist = filelist[-val_size:]
|
|
else:
|
|
train_filelist = [pair for pair in filelist if list(pair) not in val_filelist]
|
|
elif split_mode == "subdirs":
|
|
if len(val_filelist) != 0:
|
|
raise ValueError("The `val_filelist` option is not available in `subdirs` mode")
|
|
|
|
train_filelist = make_pair(
|
|
os.path.join(dataset_dir, "training/mixtures"),
|
|
os.path.join(dataset_dir, "training/instruments"),
|
|
)
|
|
|
|
val_filelist = make_pair(
|
|
os.path.join(dataset_dir, "validation/mixtures"),
|
|
os.path.join(dataset_dir, "validation/instruments"),
|
|
)
|
|
|
|
return train_filelist, val_filelist
|
|
|
|
|
|
def augment(X, y, reduction_rate, reduction_mask, mixup_rate, mixup_alpha):
|
|
perm = np.random.permutation(len(X))
|
|
for i, idx in enumerate(tqdm(perm)):
|
|
if np.random.uniform() < reduction_rate:
|
|
y[idx] = spec_utils.reduce_vocal_aggressively(X[idx], y[idx], reduction_mask)
|
|
|
|
if np.random.uniform() < 0.5:
|
|
# swap channel
|
|
X[idx] = X[idx, ::-1]
|
|
y[idx] = y[idx, ::-1]
|
|
if np.random.uniform() < 0.02:
|
|
# mono
|
|
X[idx] = X[idx].mean(axis=0, keepdims=True)
|
|
y[idx] = y[idx].mean(axis=0, keepdims=True)
|
|
if np.random.uniform() < 0.02:
|
|
# inst
|
|
X[idx] = y[idx]
|
|
|
|
if np.random.uniform() < mixup_rate and i < len(perm) - 1:
|
|
lam = np.random.beta(mixup_alpha, mixup_alpha)
|
|
X[idx] = lam * X[idx] + (1 - lam) * X[perm[i + 1]]
|
|
y[idx] = lam * y[idx] + (1 - lam) * y[perm[i + 1]]
|
|
|
|
return X, y
|
|
|
|
|
|
def make_padding(width, cropsize, offset):
|
|
left = offset
|
|
roi_size = cropsize - left * 2
|
|
if roi_size == 0:
|
|
roi_size = cropsize
|
|
right = roi_size - (width % roi_size) + left
|
|
|
|
return left, right, roi_size
|
|
|
|
|
|
def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset):
|
|
len_dataset = patches * len(filelist)
|
|
|
|
X_dataset = np.zeros((len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
|
|
y_dataset = np.zeros((len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
|
|
|
|
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
|
X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
|
|
coef = np.max([np.abs(X).max(), np.abs(y).max()])
|
|
X, y = X / coef, y / coef
|
|
|
|
l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
|
|
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode="constant")
|
|
y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode="constant")
|
|
|
|
starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches)
|
|
ends = starts + cropsize
|
|
for j in range(patches):
|
|
idx = i * patches + j
|
|
X_dataset[idx] = X_pad[:, :, starts[j] : ends[j]]
|
|
y_dataset[idx] = y_pad[:, :, starts[j] : ends[j]]
|
|
|
|
return X_dataset, y_dataset
|
|
|
|
|
|
def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset):
|
|
patch_list = []
|
|
patch_dir = "cs{}_sr{}_hl{}_nf{}_of{}".format(cropsize, sr, hop_length, n_fft, offset)
|
|
os.makedirs(patch_dir, exist_ok=True)
|
|
|
|
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
|
basename = os.path.splitext(os.path.basename(X_path))[0]
|
|
|
|
X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
|
|
coef = np.max([np.abs(X).max(), np.abs(y).max()])
|
|
X, y = X / coef, y / coef
|
|
|
|
l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
|
|
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode="constant")
|
|
y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode="constant")
|
|
|
|
len_dataset = int(np.ceil(X.shape[2] / roi_size))
|
|
for j in range(len_dataset):
|
|
outpath = os.path.join(patch_dir, "{}_p{}.npz".format(basename, j))
|
|
start = j * roi_size
|
|
if not os.path.exists(outpath):
|
|
np.savez(
|
|
outpath,
|
|
X=X_pad[:, :, start : start + cropsize],
|
|
y=y_pad[:, :, start : start + cropsize],
|
|
)
|
|
patch_list.append(outpath)
|
|
|
|
return VocalRemoverValidationSet(patch_list)
|