mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-29 08:49:59 +08:00
chore: change to python syntax\n for higher readibility
This commit is contained in:
parent
fdf794e31d
commit
44499eb5bd
@ -61,11 +61,7 @@ class Text2SemanticDataset(Dataset):
|
|||||||
)
|
)
|
||||||
# get dict
|
# get dict
|
||||||
self.path2 = phoneme_path # "%s/2-name2text.txt"%exp_dir#phoneme_path
|
self.path2 = phoneme_path # "%s/2-name2text.txt"%exp_dir#phoneme_path
|
||||||
self.path3 = "%s/3-bert" % (
|
self.path3 = f"{os.path.dirname(phoneme_path)}/3-bert" # "%s/3-bert"%exp_dir#bert_dir
|
||||||
os.path.dirname(
|
|
||||||
phoneme_path,
|
|
||||||
)
|
|
||||||
) # "%s/3-bert"%exp_dir#bert_dir
|
|
||||||
self.path6 = semantic_path # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
|
self.path6 = semantic_path # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
|
||||||
assert os.path.exists(self.path2)
|
assert os.path.exists(self.path2)
|
||||||
assert os.path.exists(self.path6)
|
assert os.path.exists(self.path6)
|
||||||
@ -219,7 +215,7 @@ class Text2SemanticDataset(Dataset):
|
|||||||
semantic_ids_len = len(semantic_ids)
|
semantic_ids_len = len(semantic_ids)
|
||||||
|
|
||||||
flag = 0
|
flag = 0
|
||||||
path_bert = "%s/%s.pt" % (self.path3, item_name)
|
path_bert = f"{self.path3}/{item_name}.pt"
|
||||||
if os.path.exists(path_bert) == True:
|
if os.path.exists(path_bert) == True:
|
||||||
bert_feature = torch.load(path_bert, map_location="cpu")
|
bert_feature = torch.load(path_bert, map_location="cpu")
|
||||||
else:
|
else:
|
||||||
|
@ -26,5 +26,5 @@ def write_args(args, path):
|
|||||||
args_file.write(str(sys.argv))
|
args_file.write(str(sys.argv))
|
||||||
args_file.write("\n==> args:\n")
|
args_file.write("\n==> args:\n")
|
||||||
for k, v in sorted(args_dict.items()):
|
for k, v in sorted(args_dict.items()):
|
||||||
args_file.write(" %s: %s\n" % (str(k), str(v)))
|
args_file.write(f" {str(k)}: {str(v)}\n")
|
||||||
args_file.close()
|
args_file.close()
|
||||||
|
@ -41,7 +41,7 @@ resample_transform_dict = {}
|
|||||||
|
|
||||||
def resample(audio_tensor, sr0, sr1, device):
|
def resample(audio_tensor, sr0, sr1, device):
|
||||||
global resample_transform_dict
|
global resample_transform_dict
|
||||||
key = "%s-%s-%s" % (sr0, sr1, str(device))
|
key = f"{sr0}-{sr1}-{str(device)}"
|
||||||
if key not in resample_transform_dict:
|
if key not in resample_transform_dict:
|
||||||
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
||||||
return resample_transform_dict[key](audio_tensor)
|
return resample_transform_dict[key](audio_tensor)
|
||||||
@ -489,7 +489,7 @@ class TTS:
|
|||||||
path_sovits = self.configs.default_configs[model_version]["vits_weights_path"]
|
path_sovits = self.configs.default_configs[model_version]["vits_weights_path"]
|
||||||
|
|
||||||
if if_lora_v3 == True and os.path.exists(path_sovits) == False:
|
if if_lora_v3 == True and os.path.exists(path_sovits) == False:
|
||||||
info = path_sovits + i18n("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version)
|
info = path_sovits + i18n(f"SoVITS {model_version} 底模缺失,无法加载相应 LoRA 权重")
|
||||||
raise FileExistsError(info)
|
raise FileExistsError(info)
|
||||||
|
|
||||||
# dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
|
# dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
|
||||||
@ -608,7 +608,7 @@ class TTS:
|
|||||||
self.empty_cache()
|
self.empty_cache()
|
||||||
|
|
||||||
self.vocoder = BigVGAN.from_pretrained(
|
self.vocoder = BigVGAN.from_pretrained(
|
||||||
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
|
||||||
use_cuda_kernel=False,
|
use_cuda_kernel=False,
|
||||||
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
||||||
# remove weight norm in the model and set to eval mode
|
# remove weight norm in the model and set to eval mode
|
||||||
@ -641,7 +641,7 @@ class TTS:
|
|||||||
)
|
)
|
||||||
self.vocoder.remove_weight_norm()
|
self.vocoder.remove_weight_norm()
|
||||||
state_dict_g = torch.load(
|
state_dict_g = torch.load(
|
||||||
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
|
||||||
map_location="cpu",
|
map_location="cpu",
|
||||||
weights_only=False,
|
weights_only=False,
|
||||||
)
|
)
|
||||||
|
@ -143,7 +143,7 @@ def cut2(inp):
|
|||||||
@register_method("cut3")
|
@register_method("cut3")
|
||||||
def cut3(inp):
|
def cut3(inp):
|
||||||
inp = inp.strip("\n")
|
inp = inp.strip("\n")
|
||||||
opts = ["%s" % item for item in inp.strip("。").split("。")]
|
opts = [f"{item}" for item in inp.strip("。").split("。")]
|
||||||
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
||||||
return "\n".join(opts)
|
return "\n".join(opts)
|
||||||
|
|
||||||
|
@ -625,18 +625,7 @@ def fbank(
|
|||||||
# size (num_mel_bins, padded_window_size // 2)
|
# size (num_mel_bins, padded_window_size // 2)
|
||||||
# print(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp)
|
# print(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp)
|
||||||
|
|
||||||
cache_key = "%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % (
|
cache_key = f"{num_mel_bins}-{padded_window_size}-{sample_frequency}-{low_freq}-{high_freq}-{vtln_low}-{vtln_high}-{vtln_warp}-{device}-{dtype}"
|
||||||
num_mel_bins,
|
|
||||||
padded_window_size,
|
|
||||||
sample_frequency,
|
|
||||||
low_freq,
|
|
||||||
high_freq,
|
|
||||||
vtln_low,
|
|
||||||
vtln_high,
|
|
||||||
vtln_warp,
|
|
||||||
device,
|
|
||||||
dtype,
|
|
||||||
)
|
|
||||||
if cache_key not in cache:
|
if cache_key not in cache:
|
||||||
mel_energies = get_mel_banks(
|
mel_energies = get_mel_banks(
|
||||||
num_mel_bins,
|
num_mel_bins,
|
||||||
|
@ -505,7 +505,7 @@ def init_bigvgan():
|
|||||||
from BigVGAN import bigvgan
|
from BigVGAN import bigvgan
|
||||||
|
|
||||||
bigvgan_model = bigvgan.BigVGAN.from_pretrained(
|
bigvgan_model = bigvgan.BigVGAN.from_pretrained(
|
||||||
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
|
||||||
use_cuda_kernel=False,
|
use_cuda_kernel=False,
|
||||||
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
||||||
# remove weight norm in the model and set to eval mode
|
# remove weight norm in the model and set to eval mode
|
||||||
@ -533,7 +533,7 @@ def init_hifigan():
|
|||||||
hifigan_model.eval()
|
hifigan_model.eval()
|
||||||
hifigan_model.remove_weight_norm()
|
hifigan_model.remove_weight_norm()
|
||||||
state_dict_g = torch.load(
|
state_dict_g = torch.load(
|
||||||
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu"
|
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu"
|
||||||
)
|
)
|
||||||
print("loading vocoder", hifigan_model.load_state_dict(state_dict_g))
|
print("loading vocoder", hifigan_model.load_state_dict(state_dict_g))
|
||||||
if is_half == True:
|
if is_half == True:
|
||||||
@ -1042,7 +1042,7 @@ def test_export(
|
|||||||
wav_gen = wav_gen[:, :, :wav_gen_length]
|
wav_gen = wav_gen[:, :, :wav_gen_length]
|
||||||
|
|
||||||
audio = wav_gen[0][0].cpu().detach().numpy()
|
audio = wav_gen[0][0].cpu().detach().numpy()
|
||||||
logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
sr = 24000
|
sr = 24000
|
||||||
soundfile.write(output, (audio * 32768).astype(np.int16), sr)
|
soundfile.write(output, (audio * 32768).astype(np.int16), sr)
|
||||||
|
|
||||||
@ -1115,7 +1115,7 @@ def test_export(
|
|||||||
wav_gen = torch.cat([wav_gen, zero_wav_torch], 0)
|
wav_gen = torch.cat([wav_gen, zero_wav_torch], 0)
|
||||||
|
|
||||||
audio = wav_gen.cpu().detach().numpy()
|
audio = wav_gen.cpu().detach().numpy()
|
||||||
logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
soundfile.write(output, (audio * 32768).astype(np.int16), out_sr)
|
soundfile.write(output, (audio * 32768).astype(np.int16), out_sr)
|
||||||
|
|
||||||
|
|
||||||
|
@ -235,7 +235,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
|
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
|
||||||
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
||||||
if if_lora_v3 == True and is_exist == False:
|
if if_lora_v3 == True and is_exist == False:
|
||||||
info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重")
|
info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失,无法加载相应 LoRA 权重")
|
||||||
gr.Warning(info)
|
gr.Warning(info)
|
||||||
raise FileExistsError(info)
|
raise FileExistsError(info)
|
||||||
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
||||||
@ -320,7 +320,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
vq_model = vq_model.to(device)
|
vq_model = vq_model.to(device)
|
||||||
vq_model.eval()
|
vq_model.eval()
|
||||||
if if_lora_v3 == False:
|
if if_lora_v3 == False:
|
||||||
print("loading sovits_%s" % model_version, vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
print(f"loading sovits_{model_version}", vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
||||||
else:
|
else:
|
||||||
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
||||||
print(
|
print(
|
||||||
@ -335,7 +335,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
init_lora_weights=True,
|
init_lora_weights=True,
|
||||||
)
|
)
|
||||||
vq_model.cfm = get_peft_model(vq_model.cfm, lora_config)
|
vq_model.cfm = get_peft_model(vq_model.cfm, lora_config)
|
||||||
print("loading sovits_%s_lora%s" % (model_version, lora_rank))
|
print(f"loading sovits_{model_version}_lora{lora_rank}")
|
||||||
vq_model.load_state_dict(dict_s2["weight"], strict=False)
|
vq_model.load_state_dict(dict_s2["weight"], strict=False)
|
||||||
vq_model.cfm = vq_model.cfm.merge_and_unload()
|
vq_model.cfm = vq_model.cfm.merge_and_unload()
|
||||||
# torch.save(vq_model.state_dict(),"merge_win.pth")
|
# torch.save(vq_model.state_dict(),"merge_win.pth")
|
||||||
@ -442,7 +442,7 @@ def init_bigvgan():
|
|||||||
from BigVGAN import bigvgan
|
from BigVGAN import bigvgan
|
||||||
|
|
||||||
bigvgan_model = bigvgan.BigVGAN.from_pretrained(
|
bigvgan_model = bigvgan.BigVGAN.from_pretrained(
|
||||||
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
|
||||||
use_cuda_kernel=False,
|
use_cuda_kernel=False,
|
||||||
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
||||||
# remove weight norm in the model and set to eval mode
|
# remove weight norm in the model and set to eval mode
|
||||||
@ -472,7 +472,7 @@ def init_hifigan():
|
|||||||
hifigan_model.eval()
|
hifigan_model.eval()
|
||||||
hifigan_model.remove_weight_norm()
|
hifigan_model.remove_weight_norm()
|
||||||
state_dict_g = torch.load(
|
state_dict_g = torch.load(
|
||||||
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
|
||||||
map_location="cpu",
|
map_location="cpu",
|
||||||
weights_only=False,
|
weights_only=False,
|
||||||
)
|
)
|
||||||
@ -508,7 +508,7 @@ resample_transform_dict = {}
|
|||||||
|
|
||||||
def resample(audio_tensor, sr0, sr1, device):
|
def resample(audio_tensor, sr0, sr1, device):
|
||||||
global resample_transform_dict
|
global resample_transform_dict
|
||||||
key = "%s-%s-%s" % (sr0, sr1, str(device))
|
key = f"{sr0}-{sr1}-{str(device)}"
|
||||||
if key not in resample_transform_dict:
|
if key not in resample_transform_dict:
|
||||||
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
||||||
return resample_transform_dict[key](audio_tensor)
|
return resample_transform_dict[key](audio_tensor)
|
||||||
@ -1062,7 +1062,7 @@ def cut2(inp):
|
|||||||
|
|
||||||
def cut3(inp):
|
def cut3(inp):
|
||||||
inp = inp.strip("\n")
|
inp = inp.strip("\n")
|
||||||
opts = ["%s" % item for item in inp.strip("。").split("。")]
|
opts = [f"{item}" for item in inp.strip("。").split("。")]
|
||||||
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
||||||
return "\n".join(opts)
|
return "\n".join(opts)
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ import torch
|
|||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
sys.path.append(f"{now_dir}/GPT_SoVITS")
|
||||||
|
|
||||||
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
||||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
@ -239,7 +239,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
|
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
|
||||||
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
||||||
if if_lora_v3 == True and is_exist == False:
|
if if_lora_v3 == True and is_exist == False:
|
||||||
info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重")
|
info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失,无法加载相应 LoRA 权重")
|
||||||
gr.Warning(info)
|
gr.Warning(info)
|
||||||
raise FileExistsError(info)
|
raise FileExistsError(info)
|
||||||
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
||||||
|
@ -23,15 +23,15 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
|||||||
|
|
||||||
def __init__(self, hparams, version=None, val=False):
|
def __init__(self, hparams, version=None, val=False):
|
||||||
exp_dir = hparams.exp_dir
|
exp_dir = hparams.exp_dir
|
||||||
self.path2 = "%s/2-name2text.txt" % exp_dir
|
self.path2 = f"{exp_dir}/2-name2text.txt"
|
||||||
self.path4 = "%s/4-cnhubert" % exp_dir
|
self.path4 = f"{exp_dir}/4-cnhubert"
|
||||||
self.path5 = "%s/5-wav32k" % exp_dir
|
self.path5 = f"{exp_dir}/5-wav32k"
|
||||||
assert os.path.exists(self.path2)
|
assert os.path.exists(self.path2)
|
||||||
assert os.path.exists(self.path4)
|
assert os.path.exists(self.path4)
|
||||||
assert os.path.exists(self.path5)
|
assert os.path.exists(self.path5)
|
||||||
self.is_v2Pro = version in {"v2Pro", "v2ProPlus"}
|
self.is_v2Pro = version in {"v2Pro", "v2ProPlus"}
|
||||||
if self.is_v2Pro:
|
if self.is_v2Pro:
|
||||||
self.path7 = "%s/7-sv_cn" % exp_dir
|
self.path7 = f"{exp_dir}/7-sv_cn"
|
||||||
assert os.path.exists(self.path7)
|
assert os.path.exists(self.path7)
|
||||||
names4 = set([name[:-3] for name in list(os.listdir(self.path4))]) # 去除.pt后缀
|
names4 = set([name[:-3] for name in list(os.listdir(self.path4))]) # 去除.pt后缀
|
||||||
names5 = set(os.listdir(self.path5))
|
names5 = set(os.listdir(self.path5))
|
||||||
@ -85,7 +85,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
|||||||
skipped_phone += 1
|
skipped_phone += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
size = os.path.getsize("%s/%s" % (self.path5, audiopath))
|
size = os.path.getsize(f"{self.path5}/{audiopath}")
|
||||||
duration = size / self.sampling_rate / 2
|
duration = size / self.sampling_rate / 2
|
||||||
|
|
||||||
if duration == 0:
|
if duration == 0:
|
||||||
@ -110,9 +110,9 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
|||||||
audiopath, phoneme_ids = audiopath_sid_text
|
audiopath, phoneme_ids = audiopath_sid_text
|
||||||
text = torch.FloatTensor(phoneme_ids)
|
text = torch.FloatTensor(phoneme_ids)
|
||||||
try:
|
try:
|
||||||
spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath))
|
spec, wav = self.get_audio(f"{self.path5}/{audiopath}")
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
ssl = torch.load("%s/%s.pt" % (self.path4, audiopath), map_location="cpu")
|
ssl = torch.load(f"{self.path4}/{audiopath}.pt", map_location="cpu")
|
||||||
if ssl.shape[-1] != spec.shape[-1]:
|
if ssl.shape[-1] != spec.shape[-1]:
|
||||||
typee = ssl.dtype
|
typee = ssl.dtype
|
||||||
ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)
|
ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)
|
||||||
|
@ -46,7 +46,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
|
|||||||
global hann_window
|
global hann_window
|
||||||
dtype_device = str(y.dtype) + "_" + str(y.device)
|
dtype_device = str(y.dtype) + "_" + str(y.device)
|
||||||
# wnsize_dtype_device = str(win_size) + '_' + dtype_device
|
# wnsize_dtype_device = str(win_size) + '_' + dtype_device
|
||||||
key = "%s-%s-%s-%s-%s" % (dtype_device, n_fft, sampling_rate, hop_size, win_size)
|
key = f"{dtype_device}-{n_fft}-{sampling_rate}-{hop_size}-{win_size}"
|
||||||
# if wnsize_dtype_device not in hann_window:
|
# if wnsize_dtype_device not in hann_window:
|
||||||
if key not in hann_window:
|
if key not in hann_window:
|
||||||
# hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
|
# hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
|
||||||
@ -78,7 +78,7 @@ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
|
|||||||
global mel_basis
|
global mel_basis
|
||||||
dtype_device = str(spec.dtype) + "_" + str(spec.device)
|
dtype_device = str(spec.dtype) + "_" + str(spec.device)
|
||||||
# fmax_dtype_device = str(fmax) + '_' + dtype_device
|
# fmax_dtype_device = str(fmax) + '_' + dtype_device
|
||||||
key = "%s-%s-%s-%s-%s-%s" % (dtype_device, n_fft, num_mels, sampling_rate, fmin, fmax)
|
key = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{fmin}-{fmax}"
|
||||||
# if fmax_dtype_device not in mel_basis:
|
# if fmax_dtype_device not in mel_basis:
|
||||||
if key not in mel_basis:
|
if key not in mel_basis:
|
||||||
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
|
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
|
||||||
@ -99,16 +99,7 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size,
|
|||||||
global mel_basis, hann_window
|
global mel_basis, hann_window
|
||||||
dtype_device = str(y.dtype) + "_" + str(y.device)
|
dtype_device = str(y.dtype) + "_" + str(y.device)
|
||||||
# fmax_dtype_device = str(fmax) + '_' + dtype_device
|
# fmax_dtype_device = str(fmax) + '_' + dtype_device
|
||||||
fmax_dtype_device = "%s-%s-%s-%s-%s-%s-%s-%s" % (
|
fmax_dtype_device = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{hop_size}-{win_size}-{fmin}-{fmax}"
|
||||||
dtype_device,
|
|
||||||
n_fft,
|
|
||||||
num_mels,
|
|
||||||
sampling_rate,
|
|
||||||
hop_size,
|
|
||||||
win_size,
|
|
||||||
fmin,
|
|
||||||
fmax,
|
|
||||||
)
|
|
||||||
# wnsize_dtype_device = str(win_size) + '_' + dtype_device
|
# wnsize_dtype_device = str(win_size) + '_' + dtype_device
|
||||||
wnsize_dtype_device = fmax_dtype_device
|
wnsize_dtype_device = fmax_dtype_device
|
||||||
if fmax_dtype_device not in mel_basis:
|
if fmax_dtype_device not in mel_basis:
|
||||||
|
@ -12,9 +12,9 @@ i18n = I18nAuto()
|
|||||||
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
|
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
name = os.path.basename(path)
|
name = os.path.basename(path)
|
||||||
tmp_path = "%s.pth" % (ttime())
|
tmp_path = f"{ttime()}.pth"
|
||||||
torch.save(fea, tmp_path)
|
torch.save(fea, tmp_path)
|
||||||
shutil.move(tmp_path, "%s/%s" % (dir, name))
|
shutil.move(tmp_path, f"{dir}/{name}")
|
||||||
|
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
@ -47,14 +47,14 @@ def savee(ckpt, name, epoch, steps, hps, model_version=None, lora_rank=None):
|
|||||||
continue
|
continue
|
||||||
opt["weight"][key] = ckpt[key].half()
|
opt["weight"][key] = ckpt[key].half()
|
||||||
opt["config"] = hps
|
opt["config"] = hps
|
||||||
opt["info"] = "%sepoch_%siteration" % (epoch, steps)
|
opt["info"] = f"{epoch}epoch_{steps}iteration"
|
||||||
if lora_rank:
|
if lora_rank:
|
||||||
opt["lora_rank"] = lora_rank
|
opt["lora_rank"] = lora_rank
|
||||||
my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
|
my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version)
|
||||||
elif model_version != None and "Pro" in model_version:
|
elif model_version != None and "Pro" in model_version:
|
||||||
my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
|
my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version)
|
||||||
else:
|
else:
|
||||||
my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
|
my_save(opt, f"{hps.save_weight_dir}/{name}.pth")
|
||||||
return "Success."
|
return "Success."
|
||||||
except:
|
except:
|
||||||
return traceback.format_exc()
|
return traceback.format_exc()
|
||||||
|
@ -44,7 +44,7 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
|
|||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print("error, %s is not in the checkpoint" % k) # shape不对也会,比如text_embedding当cleaner修改时
|
print(f"error, {k} is not in the checkpoint") # shape不对也会,比如text_embedding当cleaner修改时
|
||||||
new_state_dict[k] = v
|
new_state_dict[k] = v
|
||||||
if hasattr(model, "module"):
|
if hasattr(model, "module"):
|
||||||
model.module.load_state_dict(new_state_dict)
|
model.module.load_state_dict(new_state_dict)
|
||||||
@ -67,9 +67,9 @@ from time import time as ttime
|
|||||||
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
|
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
|
||||||
dir = os.path.dirname(path)
|
dir = os.path.dirname(path)
|
||||||
name = os.path.basename(path)
|
name = os.path.basename(path)
|
||||||
tmp_path = "%s.pth" % (ttime())
|
tmp_path = f"{ttime()}.pth"
|
||||||
torch.save(fea, tmp_path)
|
torch.save(fea, tmp_path)
|
||||||
shutil.move(tmp_path, "%s/%s" % (dir, name))
|
shutil.move(tmp_path, f"{dir}/{name}")
|
||||||
|
|
||||||
|
|
||||||
def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
|
def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
|
||||||
|
10
api.py
10
api.py
@ -147,7 +147,7 @@ import sys
|
|||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
sys.path.append(f"{now_dir}/GPT_SoVITS")
|
||||||
|
|
||||||
import signal
|
import signal
|
||||||
from text.LangSegmenter import LangSegmenter
|
from text.LangSegmenter import LangSegmenter
|
||||||
@ -239,7 +239,7 @@ def init_bigvgan():
|
|||||||
from BigVGAN import bigvgan
|
from BigVGAN import bigvgan
|
||||||
|
|
||||||
bigvgan_model = bigvgan.BigVGAN.from_pretrained(
|
bigvgan_model = bigvgan.BigVGAN.from_pretrained(
|
||||||
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
|
||||||
use_cuda_kernel=False,
|
use_cuda_kernel=False,
|
||||||
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
) # if True, RuntimeError: Ninja is required to load C++ extensions
|
||||||
# remove weight norm in the model and set to eval mode
|
# remove weight norm in the model and set to eval mode
|
||||||
@ -268,7 +268,7 @@ def init_hifigan():
|
|||||||
hifigan_model.eval()
|
hifigan_model.eval()
|
||||||
hifigan_model.remove_weight_norm()
|
hifigan_model.remove_weight_norm()
|
||||||
state_dict_g = torch.load(
|
state_dict_g = torch.load(
|
||||||
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
|
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
|
||||||
map_location="cpu",
|
map_location="cpu",
|
||||||
weights_only=False,
|
weights_only=False,
|
||||||
)
|
)
|
||||||
@ -292,7 +292,7 @@ resample_transform_dict = {}
|
|||||||
|
|
||||||
def resample(audio_tensor, sr0, sr1, device):
|
def resample(audio_tensor, sr0, sr1, device):
|
||||||
global resample_transform_dict
|
global resample_transform_dict
|
||||||
key = "%s-%s-%s" % (sr0, sr1, str(device))
|
key = f"{sr0}-{sr1}-{str(device)}"
|
||||||
if key not in resample_transform_dict:
|
if key not in resample_transform_dict:
|
||||||
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
||||||
return resample_transform_dict[key](audio_tensor)
|
return resample_transform_dict[key](audio_tensor)
|
||||||
@ -391,7 +391,7 @@ def get_sovits_weights(sovits_path):
|
|||||||
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
|
||||||
|
|
||||||
if if_lora_v3 == True and is_exist == False:
|
if if_lora_v3 == True and is_exist == False:
|
||||||
logger.info("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version)
|
logger.info(f"SoVITS {model_version} 底模缺失,无法加载相应 LoRA 权重")
|
||||||
|
|
||||||
dict_s2 = load_sovits_new(sovits_path)
|
dict_s2 = load_sovits_new(sovits_path)
|
||||||
hps = dict_s2["config"]
|
hps = dict_s2["config"]
|
||||||
|
@ -105,7 +105,7 @@ from typing import Generator
|
|||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
sys.path.append(f"{now_dir}/GPT_SoVITS")
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import subprocess
|
import subprocess
|
||||||
|
@ -93,7 +93,7 @@ def get_weights_names():
|
|||||||
continue
|
continue
|
||||||
for name in os.listdir(path):
|
for name in os.listdir(path):
|
||||||
if name.endswith(".pth"):
|
if name.endswith(".pth"):
|
||||||
SoVITS_names.append("%s/%s" % (path, name))
|
SoVITS_names.append(f"{path}/{name}")
|
||||||
if not SoVITS_names:
|
if not SoVITS_names:
|
||||||
SoVITS_names = [""]
|
SoVITS_names = [""]
|
||||||
GPT_names = []
|
GPT_names = []
|
||||||
@ -105,7 +105,7 @@ def get_weights_names():
|
|||||||
continue
|
continue
|
||||||
for name in os.listdir(path):
|
for name in os.listdir(path):
|
||||||
if name.endswith(".ckpt"):
|
if name.endswith(".ckpt"):
|
||||||
GPT_names.append("%s/%s" % (path, name))
|
GPT_names.append(f"{path}/{name}")
|
||||||
SoVITS_names = sorted(SoVITS_names, key=custom_sort_key)
|
SoVITS_names = sorted(SoVITS_names, key=custom_sort_key)
|
||||||
GPT_names = sorted(GPT_names, key=custom_sort_key)
|
GPT_names = sorted(GPT_names, key=custom_sort_key)
|
||||||
if not GPT_names:
|
if not GPT_names:
|
||||||
|
@ -16,7 +16,7 @@ from models.model import APNet_BWE_Model
|
|||||||
class AP_BWE:
|
class AP_BWE:
|
||||||
def __init__(self, device, DictToAttrRecursive, checkpoint_file=None):
|
def __init__(self, device, DictToAttrRecursive, checkpoint_file=None):
|
||||||
if checkpoint_file == None:
|
if checkpoint_file == None:
|
||||||
checkpoint_file = "%s/24kto48k/g_24kto48k.zip" % (AP_BWE_main_dir_path)
|
checkpoint_file = f"{AP_BWE_main_dir_path}/24kto48k/g_24kto48k.zip"
|
||||||
if os.path.exists(checkpoint_file) == False:
|
if os.path.exists(checkpoint_file) == False:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
config_file = os.path.join(os.path.split(checkpoint_file)[0], "config.json")
|
config_file = os.path.join(os.path.split(checkpoint_file)[0], "config.json")
|
||||||
|
@ -17,7 +17,7 @@ def execute_denoise(input_folder, output_folder):
|
|||||||
# print(list(os.listdir(input_folder).sort()))
|
# print(list(os.listdir(input_folder).sort()))
|
||||||
for name in tqdm(os.listdir(input_folder)):
|
for name in tqdm(os.listdir(input_folder)):
|
||||||
try:
|
try:
|
||||||
ans("%s/%s" % (input_folder, name), output_path="%s/%s" % (output_folder, name))
|
ans(f"{input_folder}/{name}", output_path=f"{output_folder}/{name}")
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ def check_details(path_list=None, is_train=False, is_dataset_processing=False):
|
|||||||
wav_name = clean_path(wav_name)
|
wav_name = clean_path(wav_name)
|
||||||
if audio_path != "" and audio_path != None:
|
if audio_path != "" and audio_path != None:
|
||||||
wav_name = os.path.basename(wav_name)
|
wav_name = os.path.basename(wav_name)
|
||||||
wav_path = "%s/%s" % (audio_path, wav_name)
|
wav_path = f"{audio_path}/{wav_name}"
|
||||||
else:
|
else:
|
||||||
wav_path = wav_name
|
wav_path = wav_name
|
||||||
if os.path.exists(wav_path):
|
if os.path.exists(wav_path):
|
||||||
|
@ -40,7 +40,7 @@ def slice(inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_
|
|||||||
chunk /= tmp_max
|
chunk /= tmp_max
|
||||||
chunk = (chunk / tmp_max * (_max * alpha)) + (1 - alpha) * chunk
|
chunk = (chunk / tmp_max * (_max * alpha)) + (1 - alpha) * chunk
|
||||||
wavfile.write(
|
wavfile.write(
|
||||||
"%s/%s_%010d_%010d.wav" % (opt_root, name, start, end),
|
f"{opt_root}/{name}_{start:010d}_{end:010d}.wav",
|
||||||
32000,
|
32000,
|
||||||
# chunk.astype(np.float32),
|
# chunk.astype(np.float32),
|
||||||
(chunk * 32767).astype(np.int16),
|
(chunk * 32767).astype(np.int16),
|
||||||
|
@ -219,7 +219,7 @@ def main():
|
|||||||
soundfile.write(
|
soundfile.write(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
out,
|
out,
|
||||||
"%s_%d.wav" % (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
|
f"{os.path.basename(args.audio).rsplit('.', maxsplit=1)[0]}_{i}.wav",
|
||||||
),
|
),
|
||||||
chunk,
|
chunk,
|
||||||
sr,
|
sr,
|
||||||
|
@ -92,19 +92,16 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
|||||||
need_reformat = 1
|
need_reformat = 1
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
if need_reformat == 1:
|
if need_reformat == 1:
|
||||||
tmp_path = "%s/%s.reformatted.wav" % (
|
tmp_path = f"{os.path.join(os.environ['TEMP'])}/{os.path.basename(inp_path)}.reformatted.wav"
|
||||||
os.path.join(os.environ["TEMP"]),
|
|
||||||
os.path.basename(inp_path),
|
|
||||||
)
|
|
||||||
os.system(f'ffmpeg -i "{inp_path}" -vn -acodec pcm_s16le -ac 2 -ar 44100 "{tmp_path}" -y')
|
os.system(f'ffmpeg -i "{inp_path}" -vn -acodec pcm_s16le -ac 2 -ar 44100 "{tmp_path}" -y')
|
||||||
inp_path = tmp_path
|
inp_path = tmp_path
|
||||||
try:
|
try:
|
||||||
if done == 0:
|
if done == 0:
|
||||||
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal, format0, is_hp3)
|
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal, format0, is_hp3)
|
||||||
infos.append("%s->Success" % (os.path.basename(inp_path)))
|
infos.append(f"{os.path.basename(inp_path)}->Success")
|
||||||
yield "\n".join(infos)
|
yield "\n".join(infos)
|
||||||
except:
|
except:
|
||||||
infos.append("%s->%s" % (os.path.basename(inp_path), traceback.format_exc()))
|
infos.append(f"{os.path.basename(inp_path)}->{traceback.format_exc()}")
|
||||||
yield "\n".join(infos)
|
yield "\n".join(infos)
|
||||||
except:
|
except:
|
||||||
infos.append(traceback.format_exc())
|
infos.append(traceback.format_exc())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user