Reformat scripts

This commit is contained in:
laubonghaudoi 2025-03-19 00:24:44 -07:00
parent 165882d64f
commit c18c9a9eed
4 changed files with 145 additions and 108 deletions

1
.gitignore vendored
View File

@ -18,3 +18,4 @@ TEMP
weight.json
ffmpeg*
ffprobe*
zoengjyutgaai*

View File

@ -1,6 +1,17 @@
# -*- coding: utf-8 -*-
import os
import os.path
import shutil
import traceback
from time import time as ttime
import torch
from text.cleaner import clean_text
from transformers import AutoModelForMaskedLM, AutoTokenizer
from tqdm import tqdm
from tools.my_utils import clean_path
inp_text = os.environ.get("inp_text")
inp_wav_dir = os.environ.get("inp_wav_dir")
@ -11,17 +22,8 @@ if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
opt_dir = os.environ.get("opt_dir")
bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
version = os.environ.get('version', None)
import sys, numpy as np, traceback, pdb
import os.path
from glob import glob
from tqdm import tqdm
from text.cleaner import clean_text
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
from tools.my_utils import clean_path
# inp_text=sys.argv[1]
# inp_wav_dir=sys.argv[2]
@ -32,11 +34,8 @@ from tools.my_utils import clean_path
# opt_dir="/data/docker/liujing04/gpt-vits/fine_tune_dataset/%s"%exp_name
# bert_pretrained_dir="/data/docker/liujing04/bert-vits2/Bert-VITS2-master20231106/bert/chinese-roberta-wwm-ext-large"
from time import time as ttime
import shutil
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
def my_save(fea, path): # fix issue: torch.save doesn't support chinese path
dir = os.path.dirname(path)
name = os.path.basename(path)
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
@ -56,11 +55,13 @@ if os.path.exists(txt_path) == False:
# device = "mps"
else:
device = "cpu"
if os.path.exists(bert_pretrained_dir):...
else:raise FileNotFoundError(bert_pretrained_dir)
if os.path.exists(bert_pretrained_dir):
...
else:
raise FileNotFoundError(bert_pretrained_dir)
tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir)
if is_half == True:
if is_half:
bert_model = bert_model.half().to(device)
else:
bert_model = bert_model.to(device)
@ -126,7 +127,7 @@ if os.path.exists(txt_path) == False:
"YUE": "yue",
"Yue": "yue",
}
for line in lines[int(i_part) :: int(all_parts)]:
for line in tqdm(lines[int(i_part):: int(all_parts)]):
try:
wav_name, spk_name, language, text = line.split("|")
# todo.append([name,text,"zh"])

View File

@ -1,6 +1,23 @@
# -*- coding: utf-8 -*-
"""
Step 2 of data preparation: Extract HuBERT features from the audio files,
and resample the audio to 32kHz and saving it.
"""
import os
import shutil
import sys
import traceback
from time import time as ttime
import librosa
import numpy as np
import torch
from feature_extractor import cnhubert
from scipy.io import wavfile
from tqdm import tqdm
from tools.my_utils import clean_path, load_audio
import sys,os
inp_text = os.environ.get("inp_text")
inp_wav_dir = os.environ.get("inp_wav_dir")
exp_name = os.environ.get("exp_name")
@ -8,18 +25,16 @@ i_part= os.environ.get("i_part")
all_parts = os.environ.get("all_parts")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
from feature_extractor import cnhubert
opt_dir = os.environ.get("opt_dir")
cnhubert.cnhubert_base_path = os.environ.get("cnhubert_base_dir")
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import pdb,traceback,numpy as np,logging
from scipy.io import wavfile
import librosa
now_dir = os.getcwd()
sys.path.append(now_dir)
from tools.my_utils import load_audio,clean_path
# from config import cnhubert_base_path
# cnhubert.cnhubert_base_path=cnhubert_base_path
@ -32,15 +47,14 @@ from tools.my_utils import load_audio,clean_path
# cnhubert.cnhubert_base_path=sys.argv[7]
# opt_dir="/data/docker/liujing04/gpt-vits/fine_tune_dataset/%s"%exp_name
from time import time as ttime
import shutil
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
def my_save(fea, path): # fix issue: torch.save doesn't support chinese path
dir = os.path.dirname(path)
name = os.path.basename(path)
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
tmp_path="%s%s.pth"%(ttime(),i_part)
tmp_path = f"{ttime()}{i_part}.pth"
torch.save(fea, tmp_path)
shutil.move(tmp_path,"%s/%s"%(dir,name))
shutil.move(tmp_path, f"{dir}/{name}")
hubert_dir = "%s/4-cnhubert" % (opt_dir)
wav32dir = "%s/5-wav32k" % (opt_dir)
@ -56,6 +70,8 @@ if torch.cuda.is_available():
# device = "mps"
else:
device = "cpu"
model = cnhubert.get_model()
# is_half=False
if (is_half == True):
@ -64,47 +80,66 @@ else:
model = model.to(device)
nan_fails = []
def name2go(wav_name, wav_path):
"""
Extract HuBERT features from the audio files, and resample the audio to 32kHz and saving it.
"""
# Skip if the file already exists
hubert_path = "%s/%s.pt" % (hubert_dir, wav_name)
if(os.path.exists(hubert_path)):return
tmp_audio = load_audio(wav_path, 32000)
tmp_max = np.abs(tmp_audio).max()
if tmp_max > 2.2:
print("%s-filtered,%s" % (wav_name, tmp_max))
if (os.path.exists(hubert_path)):
return
# Load the audio file in 32kHz sampling rate
tmp_audio = load_audio(wav_path, 32000)
# Check the maximum amplitude of the audio file
tmp_max = np.abs(tmp_audio).max()
# Skip if the maximum amplitude is too high (volume is too loud)
if tmp_max > 2.2:
print(f"{wav_name}-filtered,{tmp_max}")
return
# Normalize the audio
tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha * 32768)) + ((1 - alpha) * 32768) * tmp_audio
tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha * 1145.14)) + ((1 - alpha) * 1145.14) * tmp_audio
tmp_audio = librosa.resample(
tmp_audio32b, orig_sr=32000, target_sr=16000
) # 不是重采样问题
tensor_wav16 = torch.from_numpy(tmp_audio)
if (is_half == True):
# if half-precision is enabled, convert the tensor to half-precision
if is_half:
tensor_wav16 = tensor_wav16.half().to(device)
else:
tensor_wav16 = tensor_wav16.to(device)
# Extract HuBERT features from the audio file
ssl = model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1, 2).cpu() # torch.Size([1, 768, 215])
if np.isnan(ssl.detach().numpy()).sum() != 0:
nan_fails.append((wav_name, wav_path))
print("nan filtered:%s"%wav_name)
print(f"nan filtered:{wav_name}")
return
wavfile.write(
"%s/%s"%(wav32dir,wav_name),
f"{wav32dir}/{wav_name}",
32000,
tmp_audio32.astype("int16"),
)
my_save(ssl, hubert_path)
with open(inp_text, "r", encoding="utf8")as f:
lines = f.read().strip("\n").split("\n")
for line in lines[int(i_part)::int(all_parts)]:
for line in tqdm(lines[int(i_part)::int(all_parts)]):
try:
# wav_name,text=line.split("\t")
wav_name, spk_name, language, text = line.split("|")
wav_name = clean_path(wav_name)
if (inp_wav_dir != "" and inp_wav_dir != None):
if (inp_wav_dir != "" and inp_wav_dir is not None):
wav_name = os.path.basename(wav_name)
wav_path = "%s/%s"%(inp_wav_dir, wav_name)
wav_path = f"{inp_wav_dir}/{wav_name}"
else:
wav_path = wav_name
@ -113,7 +148,7 @@ for line in lines[int(i_part)::int(all_parts)]:
except:
print(line, traceback.format_exc())
if(len(nan_fails)>0 and is_half==True):
if (len(nan_fails) > 0 and is_half):
is_half = False
model = model.float()
for wav in nan_fails:

View File

@ -1,4 +1,12 @@
import logging
import os
import sys
import traceback
import torch
import utils
from tools.my_utils import clean_path
inp_text = os.environ.get("inp_text")
exp_name = os.environ.get("exp_name")
@ -10,8 +18,10 @@ opt_dir = os.environ.get("opt_dir")
pretrained_s2G = os.environ.get("pretrained_s2G")
s2config_path = os.environ.get("s2config_path")
if os.path.exists(pretrained_s2G):...
else:raise FileNotFoundError(pretrained_s2G)
if os.path.exists(pretrained_s2G):
...
else:
raise FileNotFoundError(pretrained_s2G)
# version=os.environ.get("version","v2")
size = os.path.getsize(pretrained_s2G)
if size < 82978 * 1024:
@ -24,24 +34,14 @@ elif size < 700 * 1024 * 1024:
version = "v2"
else:
version = "v3"
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import math, traceback
import multiprocessing
import sys, pdb
now_dir = os.getcwd()
sys.path.append(now_dir)
from random import shuffle
import torch.multiprocessing as mp
from glob import glob
from tqdm import tqdm
import logging, librosa, utils
if version != "v3":
from module.models import SynthesizerTrn
else:
from module.models import SynthesizerTrnV3 as SynthesizerTrn
from tools.my_utils import clean_path
logging.getLogger("numba").setLevel(logging.WARNING)
# from config import pretrained_s2G