mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-04-29 21:00:42 +08:00
commit
60414d25a3
@ -99,7 +99,12 @@ class ZIP_File:
|
|||||||
fl.delete_dir(self.temp_write)
|
fl.delete_dir(self.temp_write)
|
||||||
POOL.remove(self.name)
|
POOL.remove(self.name)
|
||||||
|
|
||||||
def save_tensor(path: str, tensors: Union[torch.Tensor, list],name:str,MySet:set=set(),file_names:Union[str,list,None]=None,**info_save) -> None:
|
def save_tensor(path: str,
|
||||||
|
tensors: Union[torch.Tensor, list],
|
||||||
|
name:str,
|
||||||
|
MySet:set=set(),
|
||||||
|
file_names:Union[str,list,None]=None,
|
||||||
|
**info_save,) -> None:
|
||||||
if isinstance(tensors, torch.Tensor):
|
if isinstance(tensors, torch.Tensor):
|
||||||
tensors = [tensors]
|
tensors = [tensors]
|
||||||
if not file_names:
|
if not file_names:
|
||||||
@ -109,6 +114,7 @@ def save_tensor(path: str, tensors: Union[torch.Tensor, list],name:str,MySet:set
|
|||||||
else:
|
else:
|
||||||
files = file_names
|
files = file_names
|
||||||
|
|
||||||
|
print(f"length of tensors: {len(tensors)}, length of files: {len(files)}")
|
||||||
if len(tensors) != len(files):
|
if len(tensors) != len(files):
|
||||||
raise ValueError("The number of tensors and files must be the same.")
|
raise ValueError("The number of tensors and files must be the same.")
|
||||||
np_arrays = []
|
np_arrays = []
|
||||||
@ -128,7 +134,10 @@ def save_tensor(path: str, tensors: Union[torch.Tensor, list],name:str,MySet:set
|
|||||||
zf.close()
|
zf.close()
|
||||||
del zf
|
del zf
|
||||||
|
|
||||||
def load_tensor(path: str,name:str,find_func,MySet:set=set()) -> list[torch.Tensor]:
|
def load_tensor(path: str,
|
||||||
|
name:str,
|
||||||
|
find_func,
|
||||||
|
MySet:set=set(),) -> list[torch.Tensor]:
|
||||||
zf = ZIP_File(path, name, MySet=MySet)
|
zf = ZIP_File(path, name, MySet=MySet)
|
||||||
zf.release()
|
zf.release()
|
||||||
voice_path = find_func(zf,il)
|
voice_path = find_func(zf,il)
|
||||||
@ -141,3 +150,29 @@ def load_tensor(path: str,name:str,find_func,MySet:set=set()) -> list[torch.Tens
|
|||||||
zf.close()
|
zf.close()
|
||||||
del zf
|
del zf
|
||||||
return tensors
|
return tensors
|
||||||
|
|
||||||
|
def add_tensor(add:list[torch.Tensor],
|
||||||
|
path: str,
|
||||||
|
name:str,
|
||||||
|
find_func,
|
||||||
|
MySet:set=set(),
|
||||||
|
file_names:Union[str,list,None]=None,
|
||||||
|
**info_save,):
|
||||||
|
tensors = load_tensor(path,name,find_func,MySet=MySet)
|
||||||
|
tensors.extend(add)
|
||||||
|
save_tensor(path,tensors,name,MySet=MySet,file_names=file_names,**info_save)
|
||||||
|
|
||||||
|
def __find_func__(zf,il):
|
||||||
|
f = zf.get_file_path("voice.json")
|
||||||
|
info = il.load_info(f)
|
||||||
|
if info is None:
|
||||||
|
return None
|
||||||
|
list_names = info["access_list"]
|
||||||
|
ret = []
|
||||||
|
for name in list_names:
|
||||||
|
try:
|
||||||
|
a = zf.get_file_path(name)
|
||||||
|
ret.append(a)
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
return ret
|
||||||
7
GPT_SoVITS/config.json
Normal file
7
GPT_SoVITS/config.json
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"running_on" : "local",
|
||||||
|
"Default":{
|
||||||
|
"GPT_Path": "不训练直接推v3底模!",
|
||||||
|
"SoVITS_Path": "不训练直接推v2ProPlus底模!"
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -24,6 +24,7 @@ class CNHubert(nn.Module):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
if base_path is None:
|
if base_path is None:
|
||||||
base_path = cnhubert_base_path
|
base_path = cnhubert_base_path
|
||||||
|
print(f"Loading CN-Hubert from \"{base_path}\"")
|
||||||
if os.path.exists(base_path):
|
if os.path.exists(base_path):
|
||||||
...
|
...
|
||||||
else:
|
else:
|
||||||
@ -69,6 +70,7 @@ class CNHubert(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
def get_model():
|
def get_model():
|
||||||
|
print("cnhubert_base_path:", cnhubert_base_path)
|
||||||
model = CNHubert()
|
model = CNHubert()
|
||||||
model.eval()
|
model.eval()
|
||||||
return model
|
return model
|
||||||
|
|||||||
@ -9,7 +9,12 @@
|
|||||||
import psutil
|
import psutil
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import uuid
|
||||||
|
from scipy.io.wavfile import write
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_my_dir():
|
def get_my_dir():
|
||||||
return os.path.dirname(os.path.abspath(__file__))
|
return os.path.dirname(os.path.abspath(__file__))
|
||||||
@ -23,6 +28,12 @@ def get_parent_dir(dir_path,depth=1):
|
|||||||
def merge_dir_txt2(*TXT):
|
def merge_dir_txt2(*TXT):
|
||||||
return Path(os.path.join(*TXT))
|
return Path(os.path.join(*TXT))
|
||||||
|
|
||||||
|
with open(merge_dir_txt2(get_my_dir(), "config.json"), "r", encoding="utf-8") as f:
|
||||||
|
config_json = f.read()
|
||||||
|
config_json = json.loads(config_json)
|
||||||
|
running_on = config_json["running_on"]
|
||||||
|
Default = config_json["Default"]
|
||||||
|
|
||||||
ROOT_DIR = str(get_parent_dir(get_my_dir()))
|
ROOT_DIR = str(get_parent_dir(get_my_dir()))
|
||||||
sys.path.append(get_my_dir())
|
sys.path.append(get_my_dir())
|
||||||
import VoiceSave
|
import VoiceSave
|
||||||
@ -115,6 +126,7 @@ with open("./weight.json", "r", encoding="utf-8") as file:
|
|||||||
if isinstance(sovits_path, list):
|
if isinstance(sovits_path, list):
|
||||||
sovits_path = sovits_path[0]
|
sovits_path = sovits_path[0]
|
||||||
|
|
||||||
|
|
||||||
# print(2333333)
|
# print(2333333)
|
||||||
# print(os.environ["gpt_path"])
|
# print(os.environ["gpt_path"])
|
||||||
# print(gpt_path)
|
# print(gpt_path)
|
||||||
@ -141,7 +153,7 @@ import numpy as np
|
|||||||
from feature_extractor import cnhubert
|
from feature_extractor import cnhubert
|
||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
|
|
||||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
cnhubert.cnhubert_base_path = merge_dir_txt2(ROOT_DIR, cnhubert_base_path)
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@ -175,6 +187,12 @@ language = os.environ.get("language", "Auto")
|
|||||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||||
i18n = I18nAuto(language=language)
|
i18n = I18nAuto(language=language)
|
||||||
|
|
||||||
|
|
||||||
|
if gpt_path in [None, "",]:
|
||||||
|
gpt_path = str(merge_dir_txt2(ROOT_DIR, name2gpt_path[i18n(Default["GPT_Path"])]))
|
||||||
|
if sovits_path in [None, "",]:
|
||||||
|
sovits_path = str(merge_dir_txt2(ROOT_DIR, name2sovits_path[i18n(Default["SoVITS_Path"])]))
|
||||||
|
|
||||||
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
|
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
@ -205,8 +223,8 @@ dict_language_v2 = {
|
|||||||
}
|
}
|
||||||
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(bert_path)
|
tokenizer = AutoTokenizer.from_pretrained(str(merge_dir_txt2(ROOT_DIR,bert_path)))
|
||||||
bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
|
bert_model = AutoModelForMaskedLM.from_pretrained(str(merge_dir_txt2(ROOT_DIR,bert_path)))
|
||||||
if is_half == True:
|
if is_half == True:
|
||||||
bert_model = bert_model.half().to(device)
|
bert_model = bert_model.half().to(device)
|
||||||
else:
|
else:
|
||||||
@ -419,6 +437,7 @@ except:
|
|||||||
|
|
||||||
|
|
||||||
def change_gpt_weights(gpt_path):
|
def change_gpt_weights(gpt_path):
|
||||||
|
print("gpt_path:", gpt_path)
|
||||||
if "!" in gpt_path or "!" in gpt_path:
|
if "!" in gpt_path or "!" in gpt_path:
|
||||||
gpt_path = name2gpt_path[gpt_path]
|
gpt_path = name2gpt_path[gpt_path]
|
||||||
global hz, max_sec, t2s_model, config
|
global hz, max_sec, t2s_model, config
|
||||||
@ -816,12 +835,21 @@ def get_tts_wav(
|
|||||||
SaveSvEmbName="sv_emb.voice",
|
SaveSvEmbName="sv_emb.voice",
|
||||||
SaveRefersName="refers.voice",
|
SaveRefersName="refers.voice",
|
||||||
|
|
||||||
|
SaveGE=False,
|
||||||
|
SaveGEName="ge.voice",
|
||||||
|
|
||||||
InjectSvEmb=False,
|
InjectSvEmb=False,
|
||||||
InjectRefers=False,
|
InjectRefers=False,
|
||||||
InjectSvEmbName="sv_emb.voice",
|
InjectSvEmbName="sv_emb.voice",
|
||||||
InjectRefersName="refers.voice",
|
InjectRefersName="refers.voice",
|
||||||
|
|
||||||
EnableAudioLoad=True,
|
EnableAudioLoad=True,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded=False,
|
||||||
|
SaveOutputAsUndecodedName="output.voice",
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName=False,
|
||||||
|
|
||||||
|
ReturnWay = "yield", # "yield" or "return"
|
||||||
):
|
):
|
||||||
global cache
|
global cache
|
||||||
if ref_wav_path:
|
if ref_wav_path:
|
||||||
@ -1041,6 +1069,60 @@ def get_tts_wav(
|
|||||||
#print("注入后refers数量:", len(refers))
|
#print("注入后refers数量:", len(refers))
|
||||||
#print("注入后sv_emb数量:", len(sv_emb) if is_v2pro else "无sv_emb")
|
#print("注入后sv_emb数量:", len(sv_emb) if is_v2pro else "无sv_emb")
|
||||||
|
|
||||||
|
try:
|
||||||
|
ges = []
|
||||||
|
for i in range(len(refers)):
|
||||||
|
if is_v2pro:
|
||||||
|
ge_ = vq_model.ge_(refers[i],sv_emb[i])
|
||||||
|
else:
|
||||||
|
ge_ = vq_model.ge_(refers[i])
|
||||||
|
ges.append(ge_)
|
||||||
|
if SaveGE:
|
||||||
|
names = []
|
||||||
|
for i in ges:
|
||||||
|
names.append(_get_unique_name(str(i.shape))+".npy")
|
||||||
|
ge_path = merge_dir_txt2(ROOT_DIR,"output","ge_opt")
|
||||||
|
if not os.path.exists(ge_path):
|
||||||
|
os.makedirs(ge_path,exist_ok=True)
|
||||||
|
if not os.path.exists(SaveGEName):
|
||||||
|
_pth_ = str(merge_dir_txt2(ROOT_DIR,"output","ge_opt",SaveGEName))
|
||||||
|
else:
|
||||||
|
_pth_ = SaveGEName
|
||||||
|
VoiceSave.save_tensor(_pth_,ges,SaveGEName,file_names=names,access_list=names)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
if AddRandomSaltToSaveOutputAsUndecodedName:
|
||||||
|
ranA = uuid.uuid4()
|
||||||
|
ranB = uuid.uuid4()
|
||||||
|
SaveOutputAsUndecodedName = f"{SaveOutputAsUndecodedName}_{ranA}_{ranB}.voice"
|
||||||
|
try:
|
||||||
|
if SaveOutputAsUndecoded:
|
||||||
|
if is_v2pro:
|
||||||
|
z_p,mask,ge = vq_model.decode2(
|
||||||
|
pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0),
|
||||||
|
refers, speed=speed, sv_emb=sv_emb)
|
||||||
|
else:
|
||||||
|
z_p,mask,ge = vq_model.decode2(
|
||||||
|
pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0),
|
||||||
|
refers, speed=speed)
|
||||||
|
ret = [z_p.cpu().detach(),
|
||||||
|
mask.cpu().detach(),
|
||||||
|
ge.cpu().detach()]
|
||||||
|
names = [f"z_p_{str(ret[0].shape)}",
|
||||||
|
f"mask_{str(ret[1].shape)}",
|
||||||
|
f"ge_{str(ret[2].shape)}"]
|
||||||
|
undecoded_path = merge_dir_txt2(ROOT_DIR,"output","undecoded_opt")
|
||||||
|
if not os.path.exists(undecoded_path):
|
||||||
|
os.makedirs(undecoded_path,exist_ok=True)
|
||||||
|
if not os.path.exists(SaveOutputAsUndecodedName):
|
||||||
|
_pth_ = str(merge_dir_txt2(ROOT_DIR,"output","undecoded_opt",SaveOutputAsUndecodedName))
|
||||||
|
else:
|
||||||
|
_pth_ = SaveOutputAsUndecodedName
|
||||||
|
VoiceSave.save_tensor(_pth_,ret,SaveOutputAsUndecodedName,file_names=names,access_list=names)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
if is_v2pro:
|
if is_v2pro:
|
||||||
audio = vq_model.decode(
|
audio = vq_model.decode(
|
||||||
pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed, sv_emb=sv_emb
|
pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed, sv_emb=sv_emb
|
||||||
@ -1127,8 +1209,215 @@ def get_tts_wav(
|
|||||||
audio_opt /= max_audio
|
audio_opt /= max_audio
|
||||||
else:
|
else:
|
||||||
audio_opt = audio_opt.cpu().detach().numpy()
|
audio_opt = audio_opt.cpu().detach().numpy()
|
||||||
yield opt_sr, (audio_opt * 32767).astype(np.int16)
|
|
||||||
|
|
||||||
|
if ReturnWay == "yield":
|
||||||
|
yield opt_sr, (audio_opt * 32767).astype(np.int16)
|
||||||
|
else:
|
||||||
|
return opt_sr, (audio_opt * 32767).astype(np.int16)
|
||||||
|
|
||||||
|
def batched_tts_wav(
|
||||||
|
ref_wav_path,
|
||||||
|
prompt_text,
|
||||||
|
prompt_language,
|
||||||
|
texts,
|
||||||
|
text_language,
|
||||||
|
how_to_cut=i18n("不切"),
|
||||||
|
top_k=20,
|
||||||
|
top_p=0.6,
|
||||||
|
temperature=0.6,
|
||||||
|
ref_free=False,
|
||||||
|
speed=1,
|
||||||
|
if_freeze=False,
|
||||||
|
inp_refs=None,
|
||||||
|
sample_steps=8,
|
||||||
|
if_sr=False,
|
||||||
|
pause_second=0.3,
|
||||||
|
|
||||||
|
SaveSvEmb=False,
|
||||||
|
SaveRefers=False,
|
||||||
|
SaveSvEmbName="sv_emb.voice",
|
||||||
|
SaveRefersName="refers.voice",
|
||||||
|
|
||||||
|
SaveGE=False,
|
||||||
|
SaveGEName="ge.voice",
|
||||||
|
|
||||||
|
InjectSvEmb=False,
|
||||||
|
InjectRefers=False,
|
||||||
|
InjectSvEmbName="sv_emb.voice",
|
||||||
|
InjectRefersName="refers.voice",
|
||||||
|
|
||||||
|
EnableAudioLoad=True,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded=False,
|
||||||
|
SaveOutputAsUndecodedName="output.voice",
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName=False,
|
||||||
|
|
||||||
|
ReturnWay = "yield", # "yield" or "return"
|
||||||
|
):
|
||||||
|
count = 0
|
||||||
|
out = []
|
||||||
|
SaveDir = merge_dir_txt2(ROOT_DIR,"output","tts_output",f"batch_{uuid.uuid4()}")
|
||||||
|
if not os.path.exists(SaveDir):
|
||||||
|
os.makedirs(SaveDir,exist_ok=True)
|
||||||
|
for text in texts:
|
||||||
|
if text in [None, " ", ""]:
|
||||||
|
gr.Warning(i18n(f"输入文本第{count}行中有空行,已跳过"))
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
unparsed = get_tts_wav(
|
||||||
|
ref_wav_path,
|
||||||
|
prompt_text,
|
||||||
|
prompt_language,
|
||||||
|
text,
|
||||||
|
text_language,
|
||||||
|
how_to_cut,
|
||||||
|
top_k,
|
||||||
|
top_p,
|
||||||
|
temperature,
|
||||||
|
ref_free,
|
||||||
|
speed,
|
||||||
|
if_freeze,
|
||||||
|
inp_refs,
|
||||||
|
sample_steps,
|
||||||
|
if_sr,
|
||||||
|
pause_second,
|
||||||
|
|
||||||
|
SaveSvEmb,
|
||||||
|
SaveRefers,
|
||||||
|
SaveSvEmbName,
|
||||||
|
SaveRefersName,
|
||||||
|
|
||||||
|
SaveGE,
|
||||||
|
SaveGEName,
|
||||||
|
|
||||||
|
InjectSvEmb,
|
||||||
|
InjectRefers,
|
||||||
|
InjectSvEmbName,
|
||||||
|
InjectRefersName,
|
||||||
|
|
||||||
|
EnableAudioLoad,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded,
|
||||||
|
SaveOutputAsUndecodedName,
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName,
|
||||||
|
"yield",
|
||||||
|
)
|
||||||
|
unparsed = list(unparsed)
|
||||||
|
print(unparsed)
|
||||||
|
a = text.strip().replace(' ','_').replace('\n','_')
|
||||||
|
wav_path = os.path.join(SaveDir,f"tts_output_{a}_{str(uuid.uuid4())}.wav")
|
||||||
|
write(wav_path, unparsed[0][0], unparsed[0][1])
|
||||||
|
out.append(wav_path)
|
||||||
|
count += 1
|
||||||
|
if ReturnWay == "yield":
|
||||||
|
yield SaveDir
|
||||||
|
else:
|
||||||
|
return SaveDir
|
||||||
|
|
||||||
|
def read_tts_batch_file(file_path):
|
||||||
|
ret = []
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
for l in lines:
|
||||||
|
if l.strip() in [None, " ", ""]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
ret.append(l)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def batch_tts(
|
||||||
|
ref_wav_path,
|
||||||
|
prompt_text,
|
||||||
|
prompt_language,
|
||||||
|
text_paths,
|
||||||
|
text_language,
|
||||||
|
how_to_cut=i18n("不切"),
|
||||||
|
top_k=20,
|
||||||
|
top_p=0.6,
|
||||||
|
temperature=0.6,
|
||||||
|
ref_free=False,
|
||||||
|
speed=1,
|
||||||
|
if_freeze=False,
|
||||||
|
inp_refs=None,
|
||||||
|
sample_steps=8,
|
||||||
|
if_sr=False,
|
||||||
|
pause_second=0.3,
|
||||||
|
|
||||||
|
SaveSvEmb=False,
|
||||||
|
SaveRefers=False,
|
||||||
|
SaveSvEmbName="sv_emb.voice",
|
||||||
|
SaveRefersName="refers.voice",
|
||||||
|
|
||||||
|
SaveGE=False,
|
||||||
|
SaveGEName="ge.voice",
|
||||||
|
|
||||||
|
InjectSvEmb=False,
|
||||||
|
InjectRefers=False,
|
||||||
|
InjectSvEmbName="sv_emb.voice",
|
||||||
|
InjectRefersName="refers.voice",
|
||||||
|
|
||||||
|
EnableAudioLoad=True,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded=False,
|
||||||
|
SaveOutputAsUndecodedName="output.voice",
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName=False,
|
||||||
|
|
||||||
|
ReturnWay = "yield", # "yield" or "return"
|
||||||
|
):
|
||||||
|
print(text_paths)
|
||||||
|
text_list = []
|
||||||
|
for i in text_paths:
|
||||||
|
text_list.extend(read_tts_batch_file(i))
|
||||||
|
out = batched_tts_wav(
|
||||||
|
ref_wav_path,
|
||||||
|
prompt_text,
|
||||||
|
prompt_language,
|
||||||
|
text_list,
|
||||||
|
text_language,
|
||||||
|
how_to_cut,
|
||||||
|
top_k,
|
||||||
|
top_p,
|
||||||
|
temperature,
|
||||||
|
ref_free,
|
||||||
|
speed,
|
||||||
|
if_freeze,
|
||||||
|
inp_refs,
|
||||||
|
sample_steps,
|
||||||
|
if_sr,
|
||||||
|
pause_second,
|
||||||
|
|
||||||
|
SaveSvEmb,
|
||||||
|
SaveRefers,
|
||||||
|
SaveSvEmbName,
|
||||||
|
SaveRefersName,
|
||||||
|
|
||||||
|
SaveGE,
|
||||||
|
SaveGEName,
|
||||||
|
|
||||||
|
InjectSvEmb,
|
||||||
|
InjectRefers,
|
||||||
|
InjectSvEmbName,
|
||||||
|
InjectRefersName,
|
||||||
|
|
||||||
|
EnableAudioLoad,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded,
|
||||||
|
SaveOutputAsUndecodedName,
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName,
|
||||||
|
|
||||||
|
"yield"
|
||||||
|
)
|
||||||
|
out = list(out)
|
||||||
|
|
||||||
|
if ReturnWay == "yield":
|
||||||
|
yield out
|
||||||
|
else:
|
||||||
|
return out
|
||||||
|
def close_serv():
|
||||||
|
if running_on == "local":
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
gr.Warning(i18n("服务器环境下该功能不可用"))
|
||||||
|
|
||||||
def split(todo_text):
|
def split(todo_text):
|
||||||
todo_text = todo_text.replace("……", "。").replace("——", ",")
|
todo_text = todo_text.replace("……", "。").replace("——", ",")
|
||||||
@ -1307,6 +1596,112 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5, scale=1)
|
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5, scale=1)
|
||||||
|
|
||||||
|
|
||||||
|
SaveSvEmb = gr.Checkbox(
|
||||||
|
label=i18n("保存参考音频的语义向量"),
|
||||||
|
interactive=True,
|
||||||
|
show_label=True,
|
||||||
|
value = False,
|
||||||
|
visible=False if model_version not in {"v2Pro","v2ProPlus"} else True
|
||||||
|
)
|
||||||
|
SaveRefers = gr.Checkbox(
|
||||||
|
label=i18n("保存参考音频的声纹特征"),
|
||||||
|
interactive=True,
|
||||||
|
show_label=True,
|
||||||
|
value = False,
|
||||||
|
visible=True
|
||||||
|
|
||||||
|
)
|
||||||
|
SaveSvEmbName = gr.Textbox(
|
||||||
|
label=i18n("保存的语义向量文件名,默认保存在output/sv_emb_opt目录下"),
|
||||||
|
value="sv_emb.voice",
|
||||||
|
interactive=True,
|
||||||
|
visible=True,
|
||||||
|
)
|
||||||
|
SaveRefersName = gr.Textbox(
|
||||||
|
label=i18n("保存的声纹特征文件名,默认保存在output/refers_opt目录下"),
|
||||||
|
value="refers.voice",
|
||||||
|
interactive=True,
|
||||||
|
visible=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
InjectSvEmb = gr.Checkbox(
|
||||||
|
label=i18n("注入参考音频的语义向量"),
|
||||||
|
interactive=True,
|
||||||
|
show_label=True,
|
||||||
|
value = False,
|
||||||
|
visible=False if model_version not in {"v2Pro","v2ProPlus"} else True
|
||||||
|
)
|
||||||
|
InjectRefers = gr.Checkbox(
|
||||||
|
label=i18n("注入参考音频的声纹特征"),
|
||||||
|
interactive=True,
|
||||||
|
show_label=True,
|
||||||
|
value = False,
|
||||||
|
visible=True
|
||||||
|
)
|
||||||
|
|
||||||
|
InjectSvEmbName = gr.Textbox(
|
||||||
|
label=i18n("注入的语义向量文件名,默认保存在output/sv_emb_opt目录下"),
|
||||||
|
value="sv_emb.voice",
|
||||||
|
interactive=True,
|
||||||
|
visible=True,
|
||||||
|
)
|
||||||
|
InjectRefersName = gr.Textbox(
|
||||||
|
label=i18n("注入的声纹特征文件名,默认保存在output/refers_opt目录下"),
|
||||||
|
value="refers.voice",
|
||||||
|
interactive=True,
|
||||||
|
visible=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
EnableAudioLoad = gr.Checkbox(
|
||||||
|
label=i18n("启用音频加载。开启后会加载参考音频"),
|
||||||
|
value=True,
|
||||||
|
interactive=True,
|
||||||
|
show_label=True,
|
||||||
|
visible=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
SaveGE = gr.Checkbox(
|
||||||
|
label = i18n("保存GE"),
|
||||||
|
value = True,
|
||||||
|
interactive = True,
|
||||||
|
show_label = True,
|
||||||
|
visible = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
SaveGEName = gr.Textbox(
|
||||||
|
label = i18n("保存的GE文件名,默认保存在output/ge_opt目录下"),
|
||||||
|
value = "ge.voice",
|
||||||
|
interactive = True,
|
||||||
|
show_label = True,
|
||||||
|
visible = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded = gr.Checkbox(
|
||||||
|
label = i18n("保存未解码的输出"),
|
||||||
|
value = False,
|
||||||
|
interactive = True,
|
||||||
|
show_label = True,
|
||||||
|
visible = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
SaveOutputAsUndecodedName = gr.Textbox(
|
||||||
|
label = i18n("保存的未解码输出文件名,默认保存在output/undecoded_opt目录下"),
|
||||||
|
value = "output.voice",
|
||||||
|
interactive = True,
|
||||||
|
show_label = True,
|
||||||
|
visible = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName = gr.Checkbox(
|
||||||
|
label = i18n("给未解码输出文件名添加随机盐,防止覆盖"),
|
||||||
|
value = False,
|
||||||
|
interactive = True,
|
||||||
|
show_label = True,
|
||||||
|
visible = True,
|
||||||
|
)
|
||||||
|
|
||||||
with gr.Column(scale=14):
|
with gr.Column(scale=14):
|
||||||
prompt_language = gr.Dropdown(
|
prompt_language = gr.Dropdown(
|
||||||
label=i18n("参考音频的语种"),
|
label=i18n("参考音频的语种"),
|
||||||
@ -1329,6 +1724,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
|
|||||||
visible=False,
|
visible=False,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
sample_steps = (
|
sample_steps = (
|
||||||
gr.Radio(
|
gr.Radio(
|
||||||
label=i18n("采样步数,如果觉得电,提高试试,如果觉得慢,降低试试"),
|
label=i18n("采样步数,如果觉得电,提高试试,如果觉得慢,降低试试"),
|
||||||
@ -1351,6 +1747,25 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
|
|||||||
show_label=True,
|
show_label=True,
|
||||||
visible=False if model_version != "v3" else True,
|
visible=False if model_version != "v3" else True,
|
||||||
)
|
)
|
||||||
|
with gr.Row():
|
||||||
|
gr.Markdown(html_center(i18n("批量语音合成参数"), "h3"))
|
||||||
|
with gr.Column(scale=13):
|
||||||
|
txt_paths = gr.File(label=i18n("批量语音合成文本文件,每行一个文本"),
|
||||||
|
file_types=[".txt"],
|
||||||
|
interactive=True,
|
||||||
|
file_count="multiple",
|
||||||
|
scale=13)
|
||||||
|
with gr.Column(scale=7):
|
||||||
|
out = gr.File(label=i18n("批量合成输出的语音文件"),
|
||||||
|
file_types=[".wav"],
|
||||||
|
file_count="directory",)
|
||||||
|
start_batch_btn = gr.Button(i18n("开始批量合成"),
|
||||||
|
variant="primary",
|
||||||
|
size="lg",
|
||||||
|
interactive=True,
|
||||||
|
scale=25)
|
||||||
|
|
||||||
|
|
||||||
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"), "h3"))
|
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"), "h3"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column(scale=13):
|
with gr.Column(scale=13):
|
||||||
@ -1415,6 +1830,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
|
|||||||
inference_button = gr.Button(value=i18n("合成语音"), variant="primary", size="lg", scale=25)
|
inference_button = gr.Button(value=i18n("合成语音"), variant="primary", size="lg", scale=25)
|
||||||
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
close_button = gr.Button(value=i18n("关闭服务器"), variant="danger", size="lg", scale=25)
|
||||||
|
|
||||||
|
close_button.click(close_serv)
|
||||||
|
|
||||||
inference_button.click(
|
inference_button.click(
|
||||||
get_tts_wav,
|
get_tts_wav,
|
||||||
[
|
[
|
||||||
@ -1434,9 +1854,71 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
|
|||||||
sample_steps,
|
sample_steps,
|
||||||
if_sr_Checkbox,
|
if_sr_Checkbox,
|
||||||
pause_second_slider,
|
pause_second_slider,
|
||||||
|
|
||||||
|
SaveSvEmb,
|
||||||
|
SaveRefers,
|
||||||
|
SaveSvEmbName,
|
||||||
|
SaveRefersName,
|
||||||
|
SaveGE,
|
||||||
|
SaveGEName,
|
||||||
|
InjectSvEmb,
|
||||||
|
InjectRefers,
|
||||||
|
InjectSvEmbName,
|
||||||
|
InjectRefersName,
|
||||||
|
EnableAudioLoad,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded,
|
||||||
|
SaveOutputAsUndecodedName,
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName,
|
||||||
|
|
||||||
],
|
],
|
||||||
[output],
|
[output],
|
||||||
|
|
||||||
|
api_name="get_tts_wav",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
start_batch_btn.click(
|
||||||
|
batch_tts,
|
||||||
|
[
|
||||||
|
inp_ref,
|
||||||
|
prompt_text,
|
||||||
|
prompt_language,
|
||||||
|
txt_paths,
|
||||||
|
text_language,
|
||||||
|
how_to_cut,
|
||||||
|
top_k,
|
||||||
|
top_p,
|
||||||
|
temperature,
|
||||||
|
ref_text_free,
|
||||||
|
speed,
|
||||||
|
if_freeze,
|
||||||
|
inp_refs,
|
||||||
|
sample_steps,
|
||||||
|
if_sr_Checkbox,
|
||||||
|
pause_second_slider,
|
||||||
|
|
||||||
|
SaveSvEmb,
|
||||||
|
SaveRefers,
|
||||||
|
SaveSvEmbName,
|
||||||
|
SaveRefersName,
|
||||||
|
SaveGE,
|
||||||
|
SaveGEName,
|
||||||
|
InjectSvEmb,
|
||||||
|
InjectRefers,
|
||||||
|
InjectSvEmbName,
|
||||||
|
InjectRefersName,
|
||||||
|
EnableAudioLoad,
|
||||||
|
|
||||||
|
SaveOutputAsUndecoded,
|
||||||
|
SaveOutputAsUndecodedName,
|
||||||
|
AddRandomSaltToSaveOutputAsUndecodedName,
|
||||||
|
|
||||||
|
],
|
||||||
|
[out],
|
||||||
|
|
||||||
|
api_name="batch_tts",
|
||||||
|
)
|
||||||
|
|
||||||
SoVITS_dropdown.change(
|
SoVITS_dropdown.change(
|
||||||
change_sovits_weights,
|
change_sovits_weights,
|
||||||
[SoVITS_dropdown, prompt_language, text_language],
|
[SoVITS_dropdown, prompt_language, text_language],
|
||||||
|
|||||||
175
GPT_SoVITS/module/VoiceChange.py
Normal file
175
GPT_SoVITS/module/VoiceChange.py
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import numpy as np
|
||||||
|
import torchaudio
|
||||||
|
import math
|
||||||
|
from torchaudio.transforms import Resample
|
||||||
|
import VoiceSave
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
def get_train_set(voice_file_path):
|
||||||
|
if type(voice_file_path) == str:
|
||||||
|
voice_file_path = [voice_file_path]
|
||||||
|
ret = []
|
||||||
|
for i in voice_file_path:
|
||||||
|
tensors_ = VoiceSave.load_tensor(i,
|
||||||
|
f"get_{uuid.uuid4()}",
|
||||||
|
find_func=VoiceSave.__find_func__,
|
||||||
|
MySet=set())
|
||||||
|
ret.append(tensors_)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
class MelSpectrogram(nn.Module):
|
||||||
|
def __init__(self, hps):
|
||||||
|
super().__init__()
|
||||||
|
self.filter_length = hps.data.filter_length
|
||||||
|
self.hop_length = hps.data.hop_length
|
||||||
|
self.win_length = hps.data.win_length
|
||||||
|
self.sampling_rate = hps.data.sampling_rate
|
||||||
|
self.n_mel_channels = hps.data.n_mel_channels
|
||||||
|
self.mel_fmin = hps.data.mel_fmin if hasattr(hps.data, 'mel_fmin') else 0
|
||||||
|
self.mel_fmax = hps.data.mel_fmax if hasattr(hps.data, 'mel_fmax') else None
|
||||||
|
|
||||||
|
# 构建梅尔频谱变换
|
||||||
|
self.mel_transform = torchaudio.transforms.MelSpectrogram(
|
||||||
|
sample_rate=self.sampling_rate,
|
||||||
|
n_fft=self.filter_length,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
win_length=self.win_length,
|
||||||
|
f_min=self.mel_fmin,
|
||||||
|
f_max=self.mel_fmax,
|
||||||
|
n_mels=192, # self.n_mel_channels,
|
||||||
|
window_fn=torch.hann_window,
|
||||||
|
center=False,
|
||||||
|
power=1.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, audio):
|
||||||
|
"""
|
||||||
|
输入:audio [B, 1, T] 或 [1, T](单声道音频)
|
||||||
|
输出:mel_spec [B, n_mel_channels, T']
|
||||||
|
"""
|
||||||
|
if len(audio.shape) == 2:
|
||||||
|
audio = audio.unsqueeze(0) # [1, T] → [1, 1, T]
|
||||||
|
|
||||||
|
# 提取梅尔频谱
|
||||||
|
mel_spec = self.mel_transform(audio.squeeze(1)) # [B, n_mel, T']
|
||||||
|
|
||||||
|
# 对数缩放(TTS标准操作)
|
||||||
|
mel_spec = torch.log(torch.clamp(mel_spec, min=1e-5))
|
||||||
|
|
||||||
|
return mel_spec
|
||||||
|
|
||||||
|
class PositionalEncoding(nn.Module):
|
||||||
|
def __init__(self, d_model, max_seq_length=5000):
|
||||||
|
super(PositionalEncoding, self).__init__()
|
||||||
|
self.pe = torch.zeros(max_seq_length, d_model) # 初始化位置编码矩阵
|
||||||
|
position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
|
||||||
|
div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
|
||||||
|
self.pe[:, 0::2] = torch.sin(position * div_term) # 偶数位置使用正弦函数
|
||||||
|
self.pe[:, 1::2] = torch.cos(position * div_term) # 奇数位置使用余弦函数
|
||||||
|
self.register_buffer('pe', self.pe.unsqueeze(0)) # 注册为缓冲区
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# 将位置编码添加到输入中
|
||||||
|
return x + self.pe[:, :x.size(1)]
|
||||||
|
|
||||||
|
class Spliter(nn.Module):
|
||||||
|
'''output: z_p shape: torch.Size([1, 192, x]), y_mask shape: torch.Size([1, 1, x]), ge shape: torch.Size([1, 1024, 1])'''
|
||||||
|
def __init__(self,
|
||||||
|
hps,
|
||||||
|
ge,
|
||||||
|
device):
|
||||||
|
super().__init__()
|
||||||
|
self.hps = hps
|
||||||
|
|
||||||
|
self.ge = ge
|
||||||
|
self.device = device
|
||||||
|
#TODO: 将mel_spec与ge输入Transformer模型
|
||||||
|
self.mel_dim = 192
|
||||||
|
self.ge_dim = 1024
|
||||||
|
self.transformer_dim = 512
|
||||||
|
self.ge_proj = nn.Linear(self.ge_dim, self.transformer_dim).to(self.device)
|
||||||
|
self.mel_proj = nn.Linear(self.mel_dim, self.transformer_dim).to(self.device)
|
||||||
|
self.pos_encoder = PositionalEncoding(self.transformer_dim).to(self.device)
|
||||||
|
self.transformer = nn.TransformerEncoder(
|
||||||
|
nn.TransformerEncoderLayer(
|
||||||
|
d_model=self.transformer_dim,
|
||||||
|
nhead=hps.model.nhead,
|
||||||
|
dim_feedforward=hps.model.ffn_dim,
|
||||||
|
batch_first=False,
|
||||||
|
dropout=0.1
|
||||||
|
),
|
||||||
|
num_layers=hps.model.num_layers
|
||||||
|
).to(self.device)
|
||||||
|
|
||||||
|
self.out_proj = nn.Linear(self.transformer_dim, self.mel_dim).to(self.device)
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def mel_(self,audio_path, hps, device, dtype):
|
||||||
|
sr_target = int(hps.data.sampling_rate)
|
||||||
|
audio, sr_origin = torchaudio.load(audio_path)
|
||||||
|
if audio.shape[0] > 1:
|
||||||
|
audio = audio.mean(0, keepdim=True)
|
||||||
|
if sr_origin != sr_target:
|
||||||
|
resampler = Resample(sr_origin, sr_target).to(device)
|
||||||
|
audio = resampler(audio.to(device))
|
||||||
|
else:
|
||||||
|
audio = audio.to(device)
|
||||||
|
max_audio = audio.abs().max()
|
||||||
|
if max_audio > 1.0:
|
||||||
|
audio = audio / max_audio
|
||||||
|
mel_extractor = MelSpectrogram(hps).to(device)
|
||||||
|
mel_spec = mel_extractor(audio).to(dtype)
|
||||||
|
return mel_spec
|
||||||
|
|
||||||
|
def forward(self, audio_path, ge,device,dtype):
|
||||||
|
# 输入:audio_path, ge
|
||||||
|
# 输出:z_p, y_mask, ge
|
||||||
|
ge_ = ge
|
||||||
|
mel = self.mel_(audio_path, self.hps, device, dtype)
|
||||||
|
|
||||||
|
mel = mel.permute(2, 0, 1)
|
||||||
|
# 梅尔谱投影到Transformer维度:[T, 1, 512]
|
||||||
|
mel_feat = self.mel_proj(mel)
|
||||||
|
|
||||||
|
# 全局情感特征GE处理:[1,1024,1] → [1,1024] → [1,1,512]
|
||||||
|
ge = ge.to(device, dtype=dtype)
|
||||||
|
ge_squeeze = ge.squeeze(-1) # [1, 1024]
|
||||||
|
ge_feat = self.ge_proj(ge_squeeze).unsqueeze(0) # [1, 1, 512]
|
||||||
|
|
||||||
|
# ===================== 3. 特征融合与Transformer输入 =====================
|
||||||
|
# 将GE特征拼接在梅尔谱序列开头:[T+1, 1, 512]
|
||||||
|
self.transformer_input = torch.cat([ge_feat, mel_feat], dim=0)
|
||||||
|
# 添加位置编码
|
||||||
|
self.transformer_input = self.pos_encoder(self.transformer_input)
|
||||||
|
|
||||||
|
# ===================== 4. Transformer编码 =====================
|
||||||
|
transformer_out = self.transformer(self.transformer_input) # [T+1, 1, 512]
|
||||||
|
|
||||||
|
# ===================== 5. 输出特征重构 =====================
|
||||||
|
# 去除GE开头,提取梅尔谱对应的输出:[T, 1, 512]
|
||||||
|
mel_out = transformer_out[1:, :, :]
|
||||||
|
# 投影回原始梅尔维度:[T, 1, 192]
|
||||||
|
mel_out = self.out_proj(mel_out)
|
||||||
|
# 转换为目标格式:[1, 192, T] → z_p
|
||||||
|
z_p = mel_out.permute(1, 2, 0)
|
||||||
|
|
||||||
|
# ===================== 6. 生成掩码 =====================
|
||||||
|
T = z_p.shape[-1] # 梅尔谱时间步
|
||||||
|
y_mask = torch.ones(1, 1, T, device=device, dtype=dtype) # [1,1,T] 全1掩码
|
||||||
|
|
||||||
|
# ===================== 7. 输出(严格匹配注释格式) =====================
|
||||||
|
return z_p, y_mask, ge_
|
||||||
|
|
||||||
|
class SpliterDataset(torch.utils.data.Dataset):
|
||||||
|
def __init__(self, voice_file_paths):
|
||||||
|
self.voice_file_paths = voice_file_paths
|
||||||
|
self.datas = get_train_set(voice_file_paths)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.datas)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return self.datas[idx]
|
||||||
@ -25,6 +25,53 @@ import contextlib
|
|||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
import torchaudio
|
||||||
|
from torchaudio.transforms import Resample
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
def merge_dir_txt2(*TXT):
|
||||||
|
return Path(os.path.join(*TXT))
|
||||||
|
|
||||||
|
def get_my_dir():
|
||||||
|
return os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
def get_parent_dir(dir_path,depth=1):
|
||||||
|
parent_path = Path(dir_path)
|
||||||
|
for _ in range(depth):
|
||||||
|
parent_path = parent_path.parent
|
||||||
|
return parent_path
|
||||||
|
|
||||||
|
POOL:set = set()
|
||||||
|
def _get_unique_name(name,MySet:set=set()):
|
||||||
|
_id = 1
|
||||||
|
if name not in POOL and name not in MySet:
|
||||||
|
POOL.add(name)
|
||||||
|
return name
|
||||||
|
while name in POOL or name in MySet:
|
||||||
|
_id += 1
|
||||||
|
name = f'{name}_{_id}'
|
||||||
|
POOL.add(name)
|
||||||
|
return name
|
||||||
|
|
||||||
|
def find_func(zf,il):
|
||||||
|
f = zf.get_file_path("voice.json")
|
||||||
|
info = il.load_info(f)
|
||||||
|
if info is None:
|
||||||
|
return None
|
||||||
|
list_names = info["access_list"]
|
||||||
|
global POOL
|
||||||
|
POOL.update(list_names)
|
||||||
|
ret = []
|
||||||
|
for name in list_names:
|
||||||
|
try:
|
||||||
|
a = zf.get_file_path(name)
|
||||||
|
ret.append(a)
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
return ret
|
||||||
|
|
||||||
|
ROOT_DIR = str(get_parent_dir(get_my_dir()))
|
||||||
|
|
||||||
class StochasticDurationPredictor(nn.Module):
|
class StochasticDurationPredictor(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -153,7 +200,7 @@ class DurationPredictor(nn.Module):
|
|||||||
|
|
||||||
WINDOW = {}
|
WINDOW = {}
|
||||||
|
|
||||||
class TextEncoder(nn.Module):
|
class TextEncoder(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
out_channels,
|
out_channels,
|
||||||
@ -989,10 +1036,8 @@ class SynthesizerTrn(nn.Module):
|
|||||||
|
|
||||||
o = self.dec((z * y_mask)[:, :, :], g=ge)
|
o = self.dec((z * y_mask)[:, :, :], g=ge)
|
||||||
return o, y_mask, (z, z_p, m_p, logs_p)
|
return o, y_mask, (z, z_p, m_p, logs_p)
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def decode(self, codes, text, refer, noise_scale=0.5, speed=1, sv_emb=None):
|
def ge_(self, refer, sv_emb=None, InjectGE=False, GE=None, LoadGE=True):
|
||||||
def get_ge(refer, sv_emb):
|
def get_ge(refer, sv_emb):
|
||||||
ge = None
|
ge = None
|
||||||
if refer is not None:
|
if refer is not None:
|
||||||
@ -1006,16 +1051,36 @@ class SynthesizerTrn(nn.Module):
|
|||||||
sv_emb = self.sv_emb(sv_emb) # B*20480->B*512
|
sv_emb = self.sv_emb(sv_emb) # B*20480->B*512
|
||||||
ge += sv_emb.unsqueeze(-1)
|
ge += sv_emb.unsqueeze(-1)
|
||||||
ge = self.prelu(ge)
|
ge = self.prelu(ge)
|
||||||
|
print(f"ge.shape : {ge.shape}")
|
||||||
return ge
|
return ge
|
||||||
|
|
||||||
if type(refer) == list:
|
if LoadGE:
|
||||||
ges = []
|
if type(refer) == list:
|
||||||
for idx, _refer in enumerate(refer):
|
ges = []
|
||||||
ge = get_ge(_refer, sv_emb[idx] if self.is_v2pro else None)
|
for idx, _refer in enumerate(refer):
|
||||||
ges.append(ge)
|
ge = get_ge(_refer, sv_emb[idx] if self.is_v2pro else None)
|
||||||
ge = torch.stack(ges, 0).mean(0)
|
ges.append(ge)
|
||||||
|
ge = torch.stack(ges, 0).mean(0)
|
||||||
|
else:
|
||||||
|
ge = get_ge(refer, sv_emb)
|
||||||
else:
|
else:
|
||||||
ge = get_ge(refer, sv_emb)
|
if InjectGE:
|
||||||
|
if type(GE) == list:
|
||||||
|
GE = torch.stack(GE, 0).mean(0)
|
||||||
|
ge = GE
|
||||||
|
else:
|
||||||
|
raise ValueError("No GE stream provided!")
|
||||||
|
return ge
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def decode(self, codes, text, refer, noise_scale=0.5, speed=1, sv_emb=None,
|
||||||
|
InjectGE=False,GE=None,LoadGE=True,
|
||||||
|
InjectZP=False,ZP=None,LoadZP=True,
|
||||||
|
OverWrite_Mask=False,Mask=None,
|
||||||
|
SaveGE=False,SaveZP=False,SaveMask=False,
|
||||||
|
GE_Name=None, ZP_Name=None, Mask_Name=None,
|
||||||
|
VoiceSave=None):
|
||||||
|
ge = self.ge_(refer, sv_emb, InjectGE, GE, LoadGE)
|
||||||
|
|
||||||
y_lengths = torch.LongTensor([codes.size(2) * 2]).to(codes.device)
|
y_lengths = torch.LongTensor([codes.size(2) * 2]).to(codes.device)
|
||||||
text_lengths = torch.LongTensor([text.size(-1)]).to(text.device)
|
text_lengths = torch.LongTensor([text.size(-1)]).to(text.device)
|
||||||
@ -1031,14 +1096,75 @@ class SynthesizerTrn(nn.Module):
|
|||||||
self.ge_to512(ge.transpose(2, 1)).transpose(2, 1) if self.is_v2pro else ge,
|
self.ge_to512(ge.transpose(2, 1)).transpose(2, 1) if self.is_v2pro else ge,
|
||||||
speed,
|
speed,
|
||||||
)
|
)
|
||||||
z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
|
|
||||||
|
|
||||||
|
if InjectZP:
|
||||||
|
if type(ZP) == list:
|
||||||
|
ZP = torch.stack(ZP, 0).mean(0)
|
||||||
|
else:
|
||||||
|
ZP = ZP
|
||||||
|
z_p = ZP
|
||||||
|
else:
|
||||||
|
if LoadZP:
|
||||||
|
z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
|
||||||
|
else:
|
||||||
|
raise ValueError("No z_p stream provided!")
|
||||||
|
|
||||||
|
if OverWrite_Mask:
|
||||||
|
if type(Mask) == list:
|
||||||
|
Mask = torch.stack(Mask, 0).mean(0)
|
||||||
|
if Mask is None:
|
||||||
|
raise ValueError("No mask stream provided!")
|
||||||
|
y_mask = Mask
|
||||||
|
print(f"z_p shape: {z_p.shape}, y_mask shape: {y_mask.shape}, ge shape: {ge.shape}")
|
||||||
z = self.flow(z_p, y_mask, g=ge, reverse=True)
|
z = self.flow(z_p, y_mask, g=ge, reverse=True)
|
||||||
|
|
||||||
o = self.dec((z * y_mask)[:, :, :], g=ge)
|
o = self.dec((z * y_mask)[:, :, :], g=ge)
|
||||||
return o
|
return o
|
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def decode2(self, codes, text, refer, noise_scale=0.5, speed=1, sv_emb=None,
|
||||||
|
InjectGE=False,GE=None,LoadGE=True,
|
||||||
|
InjectZP=False,ZP=None,LoadZP=True,
|
||||||
|
OverWrite_Mask=False,Mask=None,):
|
||||||
|
ge = self.ge_(refer, sv_emb, InjectGE, GE, LoadGE)
|
||||||
|
|
||||||
|
y_lengths = torch.LongTensor([codes.size(2) * 2]).to(codes.device)
|
||||||
|
text_lengths = torch.LongTensor([text.size(-1)]).to(text.device)
|
||||||
|
|
||||||
|
quantized = self.quantizer.decode(codes)
|
||||||
|
if self.semantic_frame_rate == "25hz":
|
||||||
|
quantized = F.interpolate(quantized, size=int(quantized.shape[-1] * 2), mode="nearest")
|
||||||
|
x, m_p, logs_p, y_mask, _, _ = self.enc_p(
|
||||||
|
quantized,
|
||||||
|
y_lengths,
|
||||||
|
text,
|
||||||
|
text_lengths,
|
||||||
|
self.ge_to512(ge.transpose(2, 1)).transpose(2, 1) if self.is_v2pro else ge,
|
||||||
|
speed,
|
||||||
|
)
|
||||||
|
|
||||||
|
if InjectZP:
|
||||||
|
if type(ZP) == list:
|
||||||
|
ZP = torch.stack(ZP, 0).mean(0)
|
||||||
|
else:
|
||||||
|
ZP = ZP
|
||||||
|
z_p = ZP
|
||||||
|
else:
|
||||||
|
if LoadZP:
|
||||||
|
z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
|
||||||
|
else:
|
||||||
|
raise ValueError("No z_p stream provided!")
|
||||||
|
|
||||||
|
if OverWrite_Mask:
|
||||||
|
if type(Mask) == list:
|
||||||
|
Mask = torch.stack(Mask, 0).mean(0)
|
||||||
|
if Mask is None:
|
||||||
|
raise ValueError("No mask stream provided!")
|
||||||
|
y_mask = Mask
|
||||||
|
print(f"z_p shape: {z_p.shape}, y_mask shape: {y_mask.shape}, ge shape: {ge.shape}")
|
||||||
|
return z_p, y_mask, ge
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def decode_streaming(self, codes, text, refer, noise_scale=0.5, speed=1, sv_emb=None, result_length:int=None, overlap_frames:torch.Tensor=None, padding_length:int=None):
|
def decode_streaming(self, codes, text, refer, noise_scale=0.5, speed=1, sv_emb=None, result_length:int=None, overlap_frames:torch.Tensor=None, padding_length:int=None):
|
||||||
def get_ge(refer, sv_emb):
|
def get_ge(refer, sv_emb):
|
||||||
|
|||||||
@ -432,6 +432,8 @@ class ResidualCouplingLayer(nn.Module):
|
|||||||
self.post.bias.data.zero_()
|
self.post.bias.data.zero_()
|
||||||
|
|
||||||
def forward(self, x, x_mask, g=None, reverse=False):
|
def forward(self, x, x_mask, g=None, reverse=False):
|
||||||
|
|
||||||
|
print(f"x.shape: {x.shape}, x_mask.shape: {x_mask.shape}")
|
||||||
x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
|
x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
|
||||||
h = self.pre(x0) * x_mask
|
h = self.pre(x0) * x_mask
|
||||||
h = self.enc(h, x_mask, g=g)
|
h = self.enc(h, x_mask, g=g)
|
||||||
|
|||||||
@ -1,9 +1,10 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import torch
|
import torch
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
sys.path.append(f"{os.getcwd()}/GPT_SoVITS/eres2net")
|
sys.path.append(f"{str(Path(os.path.dirname(os.path.abspath(__file__))).parent)}/GPT_SoVITS/eres2net")
|
||||||
sv_path = "GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt"
|
sv_path = f"{str(Path(os.path.dirname(os.path.abspath(__file__))).parent)}/GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt"
|
||||||
from ERes2NetV2 import ERes2NetV2
|
from ERes2NetV2 import ERes2NetV2
|
||||||
import kaldi as Kaldi
|
import kaldi as Kaldi
|
||||||
|
|
||||||
|
|||||||
@ -80,6 +80,15 @@ conda activate GPTSoVits
|
|||||||
pwsh -F install.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
|
pwsh -F install.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If install.ps1 fails, you can try again or run the following commands:
|
||||||
|
|
||||||
|
```pwsh
|
||||||
|
conda create -n GPTSoVits python=3.10
|
||||||
|
conda activate GPTSoVits
|
||||||
|
inst.bat
|
||||||
|
pwsh -F inst2.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
|
||||||
|
```
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
3
conda-go-webui.bat
Normal file
3
conda-go-webui.bat
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
chcp 65001
|
||||||
|
cd /d %~dp0
|
||||||
|
conda activate %1 | python -I webui.py zh_CN
|
||||||
@ -14,7 +14,7 @@ def merge_dir_txt2(*TXT):
|
|||||||
config_json_location = merge_dir_txt2(current_dir,"config.json")
|
config_json_location = merge_dir_txt2(current_dir,"config.json")
|
||||||
with open(str(config_json_location),"r") as f:
|
with open(str(config_json_location),"r") as f:
|
||||||
__info__ = f.read()
|
__info__ = f.read()
|
||||||
|
__info__ = json.loads(__info__)
|
||||||
i18n = I18nAuto(language=os.environ.get("language", "Auto"))
|
i18n = I18nAuto(language=os.environ.get("language", "Auto"))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
3
inst.bat
Normal file
3
inst.bat
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
chcp 65001
|
||||||
|
conda install -y -c conda-forge ffmpeg
|
||||||
|
conda install -y -c conda-forge cmake
|
||||||
209
inst2.ps1
Normal file
209
inst2.ps1
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
Param (
|
||||||
|
[Parameter(Mandatory=$true)][ValidateSet("CU126", "CU128", "CPU")][string]$Device,
|
||||||
|
[Parameter(Mandatory=$true)][ValidateSet("HF", "HF-Mirror", "ModelScope")][string]$Source,
|
||||||
|
[switch]$DownloadUVR5
|
||||||
|
)
|
||||||
|
|
||||||
|
$global:ErrorActionPreference = 'Stop'
|
||||||
|
|
||||||
|
trap {
|
||||||
|
Write-ErrorLog $_
|
||||||
|
}
|
||||||
|
|
||||||
|
function Write-ErrorLog {
|
||||||
|
param (
|
||||||
|
[System.Management.Automation.ErrorRecord]$ErrorRecord
|
||||||
|
)
|
||||||
|
|
||||||
|
Write-Host "`n[ERROR] Command failed:" -ForegroundColor Red
|
||||||
|
if (-not $ErrorRecord.Exception.Message){
|
||||||
|
} else {
|
||||||
|
Write-Host "Message:" -ForegroundColor Red
|
||||||
|
$ErrorRecord.Exception.Message -split "`n" | ForEach-Object {
|
||||||
|
Write-Host " $_"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host "Command:" -ForegroundColor Red -NoNewline
|
||||||
|
Write-Host " $($ErrorRecord.InvocationInfo.Line)".Replace("`r", "").Replace("`n", "")
|
||||||
|
Write-Host "Location:" -ForegroundColor Red -NoNewline
|
||||||
|
Write-Host " $($ErrorRecord.InvocationInfo.ScriptName):$($ErrorRecord.InvocationInfo.ScriptLineNumber)"
|
||||||
|
Write-Host "Call Stack:" -ForegroundColor DarkRed
|
||||||
|
$ErrorRecord.ScriptStackTrace -split "`n" | ForEach-Object {
|
||||||
|
Write-Host " $_" -ForegroundColor DarkRed
|
||||||
|
}
|
||||||
|
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
function Write-Info($msg) {
|
||||||
|
Write-Host "[INFO]:" -ForegroundColor Green -NoNewline
|
||||||
|
Write-Host " $msg"
|
||||||
|
}
|
||||||
|
function Write-Success($msg) {
|
||||||
|
Write-Host "[SUCCESS]:" -ForegroundColor Blue -NoNewline
|
||||||
|
Write-Host " $msg"
|
||||||
|
}
|
||||||
|
|
||||||
|
function Invoke-Pip {
|
||||||
|
param (
|
||||||
|
[Parameter(ValueFromRemainingArguments = $true)]
|
||||||
|
[string[]]$Args
|
||||||
|
)
|
||||||
|
|
||||||
|
$output = & pip install @Args 2>&1
|
||||||
|
$exitCode = $LASTEXITCODE
|
||||||
|
|
||||||
|
if ($exitCode -ne 0) {
|
||||||
|
$errorMessages = @()
|
||||||
|
Write-Host "Pip Install $Args Failed" -ForegroundColor Red
|
||||||
|
foreach ($item in $output) {
|
||||||
|
if ($item -is [System.Management.Automation.ErrorRecord]) {
|
||||||
|
$msg = $item.Exception.Message
|
||||||
|
Write-Host "$msg" -ForegroundColor Red
|
||||||
|
$errorMessages += $msg
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Write-Host $item
|
||||||
|
$errorMessages += $item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw [System.Exception]::new(($errorMessages -join "`n"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function Invoke-Download {
|
||||||
|
param (
|
||||||
|
[Parameter(Mandatory = $true)]
|
||||||
|
[string]$Uri,
|
||||||
|
|
||||||
|
[Parameter()]
|
||||||
|
[string]$OutFile
|
||||||
|
)
|
||||||
|
|
||||||
|
try {
|
||||||
|
$params = @{
|
||||||
|
Uri = $Uri
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($OutFile) {
|
||||||
|
$params["OutFile"] = $OutFile
|
||||||
|
}
|
||||||
|
|
||||||
|
$null = Invoke-WebRequest @params -ErrorAction Stop
|
||||||
|
|
||||||
|
} catch {
|
||||||
|
Write-Host "Failed to download:" -ForegroundColor Red
|
||||||
|
Write-Host " $Uri"
|
||||||
|
throw
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function Invoke-Unzip {
|
||||||
|
param($ZipPath, $DestPath)
|
||||||
|
Expand-Archive -Path $ZipPath -DestinationPath $DestPath -Force
|
||||||
|
Remove-Item $ZipPath -Force
|
||||||
|
}
|
||||||
|
|
||||||
|
chcp 65001
|
||||||
|
Set-Location $PSScriptRoot
|
||||||
|
|
||||||
|
$PretrainedURL = ""
|
||||||
|
$G2PWURL = ""
|
||||||
|
$UVR5URL = ""
|
||||||
|
$NLTKURL = ""
|
||||||
|
$OpenJTalkURL = ""
|
||||||
|
|
||||||
|
switch ($Source) {
|
||||||
|
"HF" {
|
||||||
|
Write-Info "Download Model From HuggingFace"
|
||||||
|
$PretrainedURL = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/pretrained_models.zip"
|
||||||
|
$G2PWURL = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/G2PWModel.zip"
|
||||||
|
$UVR5URL = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/uvr5_weights.zip"
|
||||||
|
$NLTKURL = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/nltk_data.zip"
|
||||||
|
$OpenJTalkURL = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/open_jtalk_dic_utf_8-1.11.tar.gz"
|
||||||
|
}
|
||||||
|
"HF-Mirror" {
|
||||||
|
Write-Info "Download Model From HuggingFace-Mirror"
|
||||||
|
$PretrainedURL = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/pretrained_models.zip"
|
||||||
|
$G2PWURL = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/G2PWModel.zip"
|
||||||
|
$UVR5URL = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/uvr5_weights.zip"
|
||||||
|
$NLTKURL = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/nltk_data.zip"
|
||||||
|
$OpenJTalkURL = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/open_jtalk_dic_utf_8-1.11.tar.gz"
|
||||||
|
}
|
||||||
|
"ModelScope" {
|
||||||
|
Write-Info "Download Model From ModelScope"
|
||||||
|
$PretrainedURL = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/pretrained_models.zip"
|
||||||
|
$G2PWURL = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/G2PWModel.zip"
|
||||||
|
$UVR5URL = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/uvr5_weights.zip"
|
||||||
|
$NLTKURL = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/nltk_data.zip"
|
||||||
|
$OpenJTalkURL = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/open_jtalk_dic_utf_8-1.11.tar.gz"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-not (Test-Path "GPT_SoVITS/pretrained_models/sv")) {
|
||||||
|
Write-Info "Downloading Pretrained Models..."
|
||||||
|
Invoke-Download -Uri $PretrainedURL -OutFile "pretrained_models.zip"
|
||||||
|
Invoke-Unzip "pretrained_models.zip" "GPT_SoVITS"
|
||||||
|
Write-Success "Pretrained Models Downloaded"
|
||||||
|
} else {
|
||||||
|
Write-Info "Pretrained Model Exists"
|
||||||
|
Write-Info "Skip Downloading Pretrained Models"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (-not (Test-Path "GPT_SoVITS/text/G2PWModel")) {
|
||||||
|
Write-Info "Downloading G2PWModel..."
|
||||||
|
Invoke-Download -Uri $G2PWURL -OutFile "G2PWModel.zip"
|
||||||
|
Invoke-Unzip "G2PWModel.zip" "GPT_SoVITS/text"
|
||||||
|
Write-Success "G2PWModel Downloaded"
|
||||||
|
} else {
|
||||||
|
Write-Info "G2PWModel Exists"
|
||||||
|
Write-Info "Skip Downloading G2PWModel"
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($DownloadUVR5) {
|
||||||
|
if (-not (Test-Path "tools/uvr5/uvr5_weights")) {
|
||||||
|
Write-Info "Downloading UVR5 Models..."
|
||||||
|
Invoke-Download -Uri $UVR5URL -OutFile "uvr5_weights.zip"
|
||||||
|
Invoke-Unzip "uvr5_weights.zip" "tools/uvr5"
|
||||||
|
Write-Success "UVR5 Models Downloaded"
|
||||||
|
} else {
|
||||||
|
Write-Info "UVR5 Models Exists"
|
||||||
|
Write-Info "Skip Downloading UVR5 Models"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch ($Device) {
|
||||||
|
"CU128" {
|
||||||
|
Write-Info "Installing PyTorch For CUDA 12.8..."
|
||||||
|
Invoke-Pip torch --index-url "https://download.pytorch.org/whl/cu128"
|
||||||
|
}
|
||||||
|
"CU126" {
|
||||||
|
Write-Info "Installing PyTorch For CUDA 12.6..."
|
||||||
|
Invoke-Pip torch --index-url "https://download.pytorch.org/whl/cu126"
|
||||||
|
}
|
||||||
|
"CPU" {
|
||||||
|
Write-Info "Installing PyTorch For CPU..."
|
||||||
|
Invoke-Pip torch --index-url "https://download.pytorch.org/whl/cpu"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Write-Success "PyTorch Installed"
|
||||||
|
|
||||||
|
Write-Info "Installing Python Dependencies From requirements.txt..."
|
||||||
|
Invoke-Pip -r extra-req.txt --no-deps
|
||||||
|
Invoke-Pip -r requirements.txt
|
||||||
|
Write-Success "Python Dependencies Installed"
|
||||||
|
|
||||||
|
Write-Info "Downloading NLTK Data..."
|
||||||
|
Invoke-Download -Uri $NLTKURL -OutFile "nltk_data.zip"
|
||||||
|
Invoke-Unzip "nltk_data.zip" (python -c "import sys; print(sys.prefix)").Trim()
|
||||||
|
|
||||||
|
Write-Info "Downloading Open JTalk Dict..."
|
||||||
|
Invoke-Download -Uri $OpenJTalkURL -OutFile "open_jtalk_dic_utf_8-1.11.tar.gz"
|
||||||
|
$target = (python -c "import os, pyopenjtalk; print(os.path.dirname(pyopenjtalk.__file__))").Trim()
|
||||||
|
tar -xzf open_jtalk_dic_utf_8-1.11.tar.gz -C $target
|
||||||
|
Remove-Item "open_jtalk_dic_utf_8-1.11.tar.gz" -Force
|
||||||
|
Write-Success "Open JTalk Dic Downloaded"
|
||||||
|
|
||||||
|
Write-Success "Installation Completed"
|
||||||
@ -52,7 +52,7 @@ function Invoke-Conda {
|
|||||||
[string[]]$Args
|
[string[]]$Args
|
||||||
)
|
)
|
||||||
|
|
||||||
$output = & conda install -y -q -c conda-forge @Args 2>&1
|
$output = & conda install -y -c conda-forge @Args 2>&1
|
||||||
$exitCode = $LASTEXITCODE
|
$exitCode = $LASTEXITCODE
|
||||||
|
|
||||||
if ($exitCode -ne 0) {
|
if ($exitCode -ne 0) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user