mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-29 00:30:15 +08:00
feat:v1v2 both works for export
This commit is contained in:
parent
bc7fe01876
commit
403c5bf320
@ -206,7 +206,7 @@ class T2SModel(nn.Module):
|
||||
class VitsModel(nn.Module):
|
||||
def __init__(self, vits_path, version:str = 'v2'):
|
||||
super().__init__()
|
||||
dict_s2 = torch.load(vits_path, map_location="cpu")
|
||||
dict_s2 = torch.load(vits_path, map_location="cpu", weights_only=False)
|
||||
self.hps = dict_s2["config"]
|
||||
if dict_s2["weight"]["enc_p.text_embedding.weight"].shape[0] == 322:
|
||||
self.hps["model"]["version"] = "v1"
|
||||
@ -400,11 +400,11 @@ if __name__ == "__main__":
|
||||
except:
|
||||
pass
|
||||
|
||||
# gpt_path = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
||||
# vits_path = "GPT_SoVITS/pretrained_models/s2G488k.pth"
|
||||
# exp_path = "v1_export"
|
||||
# version = "v1"
|
||||
# export(vits_path, gpt_path, exp_path, version)
|
||||
gpt_path = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
|
||||
vits_path = "GPT_SoVITS/pretrained_models/s2G488k.pth"
|
||||
exp_path = "v1_export"
|
||||
version = "v1"
|
||||
export(vits_path, gpt_path, exp_path, version)
|
||||
|
||||
gpt_path = "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt"
|
||||
vits_path = "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth"
|
||||
|
@ -7,7 +7,7 @@ import torch
|
||||
from TTS_infer_pack.TextPreprocessor_onnx import TextPreprocessorOnnx
|
||||
|
||||
|
||||
MODEL_PATH = "onnx/v2proplus_export/v2proplus"
|
||||
MODEL_PATH = "onnx/v1_export/v1"
|
||||
|
||||
def audio_postprocess(
|
||||
audios,
|
||||
@ -56,7 +56,7 @@ def audio_preprocess(audio_path):
|
||||
|
||||
def preprocess_text(text:str):
|
||||
preprocessor = TextPreprocessorOnnx("playground/bert")
|
||||
[phones, bert_features, norm_text] = preprocessor.segment_and_extract_feature_for_text(text, 'all_zh', 'v2')
|
||||
[phones, bert_features, norm_text] = preprocessor.segment_and_extract_feature_for_text(text, 'all_zh', 'v1')
|
||||
phones = np.expand_dims(np.array(phones, dtype=np.int64), axis=0)
|
||||
return phones, bert_features.T.astype(np.float32)
|
||||
|
||||
@ -123,7 +123,7 @@ vtis = ort.InferenceSession(MODEL_PATH+"_export_vits.onnx")
|
||||
"input_text_phones": input_phones,
|
||||
"pred_semantic": pred_semantic,
|
||||
"spectrum": spectrum.astype(np.float32),
|
||||
"sv_emb": sv_emb.astype(np.float32)
|
||||
# "sv_emb": sv_emb.astype(np.float32)
|
||||
})
|
||||
|
||||
audio_postprocess([audio])
|
||||
|
Loading…
x
Reference in New Issue
Block a user