mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-06-29 00:59:16 +08:00
Compare commits
7 Commits
dd9145c58a
...
7d84c9685a
Author | SHA1 | Date | |
---|---|---|---|
|
7d84c9685a | ||
|
ee4a466f79 | ||
|
b65ea9181e | ||
|
c0ce55a132 | ||
|
13573a1b06 | ||
|
fef65d40fe | ||
|
09f28ae518 |
@ -3,3 +3,44 @@ from modelscope import snapshot_download
|
||||
model_dir = snapshot_download('damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',revision="v2.0.4")
|
||||
model_dir = snapshot_download('damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',revision="v2.0.4")
|
||||
model_dir = snapshot_download('damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',revision="v2.0.4")
|
||||
|
||||
import nltk
|
||||
nltk.download('averaged_perceptron_tagger_eng')
|
||||
|
||||
# Download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip unzip and rename to G2PWModel, and then place them in GPT_SoVITS/text.
|
||||
|
||||
import os
|
||||
import zipfile
|
||||
import shutil
|
||||
import requests
|
||||
|
||||
# 获取当前文件的路径
|
||||
current_file_path = os.path.abspath(__file__)
|
||||
current_dir = os.path.dirname(current_file_path)
|
||||
|
||||
# 定义下载链接和目标路径
|
||||
url = 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip'
|
||||
download_path = os.path.join(current_dir, 'G2PWModel_1.1.zip')
|
||||
target_dir = os.path.join(current_dir, '../GPT_SoVITS/text/')
|
||||
|
||||
# 下载文件
|
||||
response = requests.get(url)
|
||||
with open(download_path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
|
||||
# 解压文件
|
||||
with zipfile.ZipFile(download_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(current_dir)
|
||||
|
||||
# 重命名解压后的文件夹
|
||||
os.rename(os.path.join(current_dir, 'G2PWModel_1.1'), os.path.join(current_dir, 'G2PWModel'))
|
||||
|
||||
# 移动文件夹到目标目录
|
||||
if not os.path.exists(target_dir):
|
||||
os.makedirs(target_dir)
|
||||
shutil.move(os.path.join(current_dir, 'G2PWModel'), target_dir)
|
||||
|
||||
# 清理临时文件
|
||||
os.remove(download_path)
|
||||
|
||||
print("Download G2PWModel successfully")
|
@ -17,6 +17,12 @@ https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-la
|
||||
out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin
|
||||
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/tokenizer.json
|
||||
out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/tokenizer.json
|
||||
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
out=GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/gsv-v2final-pretrained/s2D2333k.pth
|
||||
out=GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2D2333k.pth
|
||||
https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/gsv-v2final-pretrained/s2G2333k.pth
|
||||
out=GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
# UVR5
|
||||
https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth
|
||||
out=tools/uvr5/uvr5_weights/HP2_all_vocals.pth
|
||||
|
@ -12,33 +12,33 @@ import torch
|
||||
|
||||
|
||||
def multi_head_attention_forward_patched(
|
||||
query: Tensor,
|
||||
key: Tensor,
|
||||
value: Tensor,
|
||||
embed_dim_to_check: int,
|
||||
num_heads: int,
|
||||
in_proj_weight: Optional[Tensor],
|
||||
in_proj_bias: Optional[Tensor],
|
||||
bias_k: Optional[Tensor],
|
||||
bias_v: Optional[Tensor],
|
||||
add_zero_attn: bool,
|
||||
query,
|
||||
key,
|
||||
value,
|
||||
embed_dim_to_check,
|
||||
num_heads,
|
||||
in_proj_weight,
|
||||
in_proj_bias,
|
||||
bias_k,
|
||||
bias_v,
|
||||
add_zero_attn,
|
||||
dropout_p: float,
|
||||
out_proj_weight: Tensor,
|
||||
out_proj_bias: Optional[Tensor],
|
||||
training: bool = True,
|
||||
key_padding_mask: Optional[Tensor] = None,
|
||||
need_weights: bool = True,
|
||||
attn_mask: Optional[Tensor] = None,
|
||||
use_separate_proj_weight: bool = False,
|
||||
q_proj_weight: Optional[Tensor] = None,
|
||||
k_proj_weight: Optional[Tensor] = None,
|
||||
v_proj_weight: Optional[Tensor] = None,
|
||||
static_k: Optional[Tensor] = None,
|
||||
static_v: Optional[Tensor] = None,
|
||||
average_attn_weights: bool = True,
|
||||
is_causal: bool = False,
|
||||
out_proj_weight,
|
||||
out_proj_bias,
|
||||
training = True,
|
||||
key_padding_mask = None,
|
||||
need_weights = True,
|
||||
attn_mask = None,
|
||||
use_separate_proj_weight = False,
|
||||
q_proj_weight = None,
|
||||
k_proj_weight = None,
|
||||
v_proj_weight = None,
|
||||
static_k = None,
|
||||
static_v = None,
|
||||
average_attn_weights = True,
|
||||
is_causal = False,
|
||||
cache=None,
|
||||
) -> Tuple[Tensor, Optional[Tensor]]:
|
||||
):
|
||||
r"""
|
||||
Args:
|
||||
query, key, value: map a query and a set of key-value pairs to an output.
|
||||
|
@ -1,5 +1,6 @@
|
||||
|
||||
import os, sys
|
||||
import threading
|
||||
|
||||
from tqdm import tqdm
|
||||
now_dir = os.getcwd()
|
||||
@ -54,6 +55,7 @@ class TextPreprocessor:
|
||||
self.bert_model = bert_model
|
||||
self.tokenizer = tokenizer
|
||||
self.device = device
|
||||
self.bert_lock = threading.RLock()
|
||||
|
||||
def preprocess(self, text:str, lang:str, text_split_method:str, version:str="v2")->List[Dict]:
|
||||
print(f'############ {i18n("切分文本")} ############')
|
||||
@ -117,70 +119,71 @@ class TextPreprocessor:
|
||||
return self.get_phones_and_bert(text, language, version)
|
||||
|
||||
def get_phones_and_bert(self, text:str, language:str, version:str, final:bool=False):
|
||||
if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
|
||||
# language = language.replace("all_","")
|
||||
formattext = text
|
||||
while " " in formattext:
|
||||
formattext = formattext.replace(" ", " ")
|
||||
if language == "all_zh":
|
||||
if re.search(r'[A-Za-z]', formattext):
|
||||
formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
|
||||
formattext = chinese.mix_text_normalize(formattext)
|
||||
return self.get_phones_and_bert(formattext,"zh",version)
|
||||
else:
|
||||
phones, word2ph, norm_text = self.clean_text_inf(formattext, language, version)
|
||||
bert = self.get_bert_feature(norm_text, word2ph).to(self.device)
|
||||
elif language == "all_yue" and re.search(r'[A-Za-z]', formattext):
|
||||
formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
|
||||
formattext = chinese.mix_text_normalize(formattext)
|
||||
return self.get_phones_and_bert(formattext,"yue",version)
|
||||
else:
|
||||
phones, word2ph, norm_text = self.clean_text_inf(formattext, language, version)
|
||||
bert = torch.zeros(
|
||||
(1024, len(phones)),
|
||||
dtype=torch.float32,
|
||||
).to(self.device)
|
||||
elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
|
||||
textlist=[]
|
||||
langlist=[]
|
||||
if language == "auto":
|
||||
for tmp in LangSegmenter.getTexts(text):
|
||||
langlist.append(tmp["lang"])
|
||||
textlist.append(tmp["text"])
|
||||
elif language == "auto_yue":
|
||||
for tmp in LangSegmenter.getTexts(text):
|
||||
if tmp["lang"] == "zh":
|
||||
tmp["lang"] = "yue"
|
||||
langlist.append(tmp["lang"])
|
||||
textlist.append(tmp["text"])
|
||||
else:
|
||||
for tmp in LangSegmenter.getTexts(text):
|
||||
if tmp["lang"] == "en":
|
||||
langlist.append(tmp["lang"])
|
||||
else:
|
||||
# 因无法区别中日韩文汉字,以用户输入为准
|
||||
langlist.append(language)
|
||||
textlist.append(tmp["text"])
|
||||
# print(textlist)
|
||||
# print(langlist)
|
||||
phones_list = []
|
||||
bert_list = []
|
||||
norm_text_list = []
|
||||
for i in range(len(textlist)):
|
||||
lang = langlist[i]
|
||||
phones, word2ph, norm_text = self.clean_text_inf(textlist[i], lang, version)
|
||||
bert = self.get_bert_inf(phones, word2ph, norm_text, lang)
|
||||
phones_list.append(phones)
|
||||
norm_text_list.append(norm_text)
|
||||
bert_list.append(bert)
|
||||
bert = torch.cat(bert_list, dim=1)
|
||||
phones = sum(phones_list, [])
|
||||
norm_text = ''.join(norm_text_list)
|
||||
with self.bert_lock:
|
||||
if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
|
||||
# language = language.replace("all_","")
|
||||
formattext = text
|
||||
while " " in formattext:
|
||||
formattext = formattext.replace(" ", " ")
|
||||
if language == "all_zh":
|
||||
if re.search(r'[A-Za-z]', formattext):
|
||||
formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
|
||||
formattext = chinese.mix_text_normalize(formattext)
|
||||
return self.get_phones_and_bert(formattext,"zh",version)
|
||||
else:
|
||||
phones, word2ph, norm_text = self.clean_text_inf(formattext, language, version)
|
||||
bert = self.get_bert_feature(norm_text, word2ph).to(self.device)
|
||||
elif language == "all_yue" and re.search(r'[A-Za-z]', formattext):
|
||||
formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
|
||||
formattext = chinese.mix_text_normalize(formattext)
|
||||
return self.get_phones_and_bert(formattext,"yue",version)
|
||||
else:
|
||||
phones, word2ph, norm_text = self.clean_text_inf(formattext, language, version)
|
||||
bert = torch.zeros(
|
||||
(1024, len(phones)),
|
||||
dtype=torch.float32,
|
||||
).to(self.device)
|
||||
elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
|
||||
textlist=[]
|
||||
langlist=[]
|
||||
if language == "auto":
|
||||
for tmp in LangSegmenter.getTexts(text):
|
||||
langlist.append(tmp["lang"])
|
||||
textlist.append(tmp["text"])
|
||||
elif language == "auto_yue":
|
||||
for tmp in LangSegmenter.getTexts(text):
|
||||
if tmp["lang"] == "zh":
|
||||
tmp["lang"] = "yue"
|
||||
langlist.append(tmp["lang"])
|
||||
textlist.append(tmp["text"])
|
||||
else:
|
||||
for tmp in LangSegmenter.getTexts(text):
|
||||
if tmp["lang"] == "en":
|
||||
langlist.append(tmp["lang"])
|
||||
else:
|
||||
# 因无法区别中日韩文汉字,以用户输入为准
|
||||
langlist.append(language)
|
||||
textlist.append(tmp["text"])
|
||||
# print(textlist)
|
||||
# print(langlist)
|
||||
phones_list = []
|
||||
bert_list = []
|
||||
norm_text_list = []
|
||||
for i in range(len(textlist)):
|
||||
lang = langlist[i]
|
||||
phones, word2ph, norm_text = self.clean_text_inf(textlist[i], lang, version)
|
||||
bert = self.get_bert_inf(phones, word2ph, norm_text, lang)
|
||||
phones_list.append(phones)
|
||||
norm_text_list.append(norm_text)
|
||||
bert_list.append(bert)
|
||||
bert = torch.cat(bert_list, dim=1)
|
||||
phones = sum(phones_list, [])
|
||||
norm_text = ''.join(norm_text_list)
|
||||
|
||||
if not final and len(phones) < 6:
|
||||
return self.get_phones_and_bert("." + text,language,version,final=True)
|
||||
if not final and len(phones) < 6:
|
||||
return self.get_phones_and_bert("." + text,language,version,final=True)
|
||||
|
||||
return phones, bert, norm_text
|
||||
return phones, bert, norm_text
|
||||
|
||||
|
||||
def get_bert_feature(self, text:str, word2ph:list)->torch.Tensor:
|
||||
|
@ -81,7 +81,7 @@ if os.path.exists(semantic_path) == False:
|
||||
# utils.load_checkpoint(pretrained_s2G, vq_model, None, True)
|
||||
print(
|
||||
vq_model.load_state_dict(
|
||||
torch.load(pretrained_s2G, map_location="cpu")["weight"], strict=False
|
||||
torch.load(pretrained_s2G, map_location="cpu", weights_only=False)["weight"], strict=False
|
||||
)
|
||||
)
|
||||
|
||||
|
59
install.sh
59
install.sh
@ -2,8 +2,13 @@
|
||||
|
||||
# 安装构建工具
|
||||
# Install build tools
|
||||
echo "Installing GCC..."
|
||||
conda install -c conda-forge gcc=14
|
||||
|
||||
echo "Installing G++..."
|
||||
conda install -c conda-forge gxx
|
||||
|
||||
echo "Installing ffmpeg and cmake..."
|
||||
conda install ffmpeg cmake
|
||||
|
||||
# 设置编译环境
|
||||
@ -12,10 +17,60 @@ export CMAKE_MAKE_PROGRAM="$CONDA_PREFIX/bin/cmake"
|
||||
export CC="$CONDA_PREFIX/bin/gcc"
|
||||
export CXX="$CONDA_PREFIX/bin/g++"
|
||||
|
||||
conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 pytorch-cuda=11.8 -c pytorch -c nvidia
|
||||
echo "Checking for CUDA installation..."
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
USE_CUDA=true
|
||||
echo "CUDA found."
|
||||
else
|
||||
echo "CUDA not found."
|
||||
USE_CUDA=false
|
||||
fi
|
||||
|
||||
|
||||
if [ "$USE_CUDA" = false ]; then
|
||||
echo "Checking for ROCm installation..."
|
||||
if [ -d "/opt/rocm" ]; then
|
||||
USE_ROCM=true
|
||||
echo "ROCm found."
|
||||
if grep -qi "microsoft" /proc/version; then
|
||||
echo "You are running WSL."
|
||||
IS_WSL=true
|
||||
else
|
||||
echo "You are NOT running WSL."
|
||||
IS_WSL=false
|
||||
fi
|
||||
else
|
||||
echo "ROCm not found."
|
||||
USE_ROCM=false
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$USE_CUDA" = true ]; then
|
||||
echo "Installing PyTorch with CUDA support..."
|
||||
conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 pytorch-cuda=11.8 -c pytorch -c nvidia
|
||||
elif [ "$USE_ROCM" = true ] ; then
|
||||
echo "Installing PyTorch with ROCm support..."
|
||||
pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/rocm6.2
|
||||
else
|
||||
echo "Installing PyTorch for CPU..."
|
||||
conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 cpuonly -c pytorch
|
||||
fi
|
||||
|
||||
|
||||
echo "Installing Python dependencies from requirements.txt..."
|
||||
|
||||
# 刷新环境
|
||||
# Refresh environment
|
||||
hash -r
|
||||
pip install -r requirements.txt
|
||||
|
||||
if [ "$USE_ROCM" = true ] && [ "$IS_WSL" = true ] ; then
|
||||
echo "Update to WSL compatible runtime lib..."
|
||||
location=`pip show torch | grep Location | awk -F ": " '{print $2}'`
|
||||
cd ${location}/torch/lib/
|
||||
rm libhsa-runtime64.so*
|
||||
cp /opt/rocm/lib/libhsa-runtime64.so.1.2 libhsa-runtime64.so
|
||||
fi
|
||||
|
||||
echo "Installation completed successfully!"
|
||||
|
||||
pip install -r requirements.txt
|
@ -32,7 +32,7 @@ def clean_path(path_str:str):
|
||||
if path_str.endswith(('\\','/')):
|
||||
return clean_path(path_str[0:-1])
|
||||
path_str = path_str.replace('/', os.sep).replace('\\', os.sep)
|
||||
return path_str.strip(" ").strip('\'').strip("\n").strip('"').strip(" ").strip("\u202a")
|
||||
return path_str.strip(" \'\n\"\u202a")#path_str.strip(" ").strip('\'').strip("\n").strip('"').strip(" ").strip("\u202a")
|
||||
|
||||
|
||||
def check_for_existance(file_list:list=None,is_train=False,is_dataset_processing=False):
|
||||
|
Loading…
x
Reference in New Issue
Block a user