feat: update import path

Signed-off-by: Guoxian Dai <guoxian@peta-mini1.lan>
This commit is contained in:
Guoxian Dai 2024-11-26 18:44:13 -08:00
parent a70e1ad30c
commit b51a5d6c37
29 changed files with 101 additions and 81 deletions

2
.gitignore vendored
View File

@ -13,6 +13,6 @@ SoVITS_weights
GPT_weights_v2
SoVITS_weights_v2
TEMP
weight.json
#weight.json
ffmpeg*
ffprobe*

View File

@ -8,9 +8,9 @@ from typing import Dict
import torch
from pytorch_lightning import LightningModule
from AR.models.t2s_model import Text2SemanticDecoder
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
from AR.modules.optim import ScaledAdam
from GPT_SoVITS.AR.models.t2s_model import Text2SemanticDecoder
from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
from GPT_SoVITS.AR.modules.optim import ScaledAdam
class Text2SemanticLightningModule(LightningModule):
def __init__(self, config, output_dir, is_train=True):

View File

@ -5,8 +5,8 @@ from typing import List, Optional
import torch
from tqdm import tqdm
from AR.models.utils import make_pad_mask
from AR.models.utils import (
from GPT_SoVITS.AR.models.utils import make_pad_mask
from GPT_SoVITS.AR.models.utils import (
topk_sampling,
sample,
logits_to_probs,
@ -15,11 +15,11 @@ from AR.models.utils import (
make_reject_y,
get_batch_logps
)
from AR.modules.embedding import SinePositionalEmbedding
from AR.modules.embedding import TokenEmbedding
from AR.modules.transformer import LayerNorm
from AR.modules.transformer import TransformerEncoder
from AR.modules.transformer import TransformerEncoderLayer
from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding
from GPT_SoVITS.AR.modules.embedding import TokenEmbedding
from GPT_SoVITS.AR.modules.transformer import LayerNorm
from GPT_SoVITS.AR.modules.transformer import TransformerEncoder
from GPT_SoVITS.AR.modules.transformer import TransformerEncoderLayer
from torch import nn
from torch.nn import functional as F
from torchmetrics.classification import MulticlassAccuracy

View File

@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
from torch.nn.parameter import Parameter
from torch.nn import functional as F
from AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched
from GPT_SoVITS.AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched
F.multi_head_attention_forward = multi_head_attention_forward_patched

View File

@ -10,8 +10,8 @@ from typing import Tuple
from typing import Union
import torch
from AR.modules.activation import MultiheadAttention
from AR.modules.scaling import BalancedDoubleSwish
from GPT_SoVITS.AR.modules.activation import MultiheadAttention
from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish
from torch import nn
from torch import Tensor
from torch.nn import functional as F

View File

@ -21,8 +21,8 @@ from feature_extractor.cnhubert import CNHubert
from module.models import SynthesizerTrn
import librosa
from time import time as ttime
from tools.i18n.i18n import I18nAuto, scan_language_list
from tools.my_utils import load_audio
from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list
from GPT_SoVITS.tools.my_utils import load_audio
from module.mel_processing import spectrogram_torch
from TTS_infer_pack.text_segmentation_method import splits
from TTS_infer_pack.TextPreprocessor import TextPreprocessor

View File

@ -15,7 +15,7 @@ from text import cleaned_text_to_sequence
from transformers import AutoModelForMaskedLM, AutoTokenizer
from TTS_infer_pack.text_segmentation_method import split_big_text, splits, get_method as get_seg_method
from tools.i18n.i18n import I18nAuto, scan_language_list
from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list
language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language

View File

@ -16,7 +16,7 @@ from transformers import (
HubertModel,
)
import utils
import GPT_SoVITS.utils
import torch.nn as nn
cnhubert_base_path = None
@ -102,9 +102,10 @@ def get_content(hmodel, wav_16k_tensor):
if __name__ == "__main__":
from GPT_SoVITS.utils import load_wav_to_torch_and_resample
model = get_model()
src_path = "/Users/Shared/原音频2.wav"
wav_16k_tensor = utils.load_wav_to_torch_and_resample(src_path, 16000)
wav_16k_tensor = load_wav_to_torch_and_resample(src_path, 16000)
model = model
wav_16k_tensor = wav_16k_tensor
feats = get_content(model, wav_16k_tensor)

View File

@ -2,9 +2,15 @@ import argparse
import os
import soundfile as sf
from tools.i18n.i18n import I18nAuto
import GPT_SoVITS
import sys
from GPT_SoVITS.tools.i18n.i18n import I18nAuto
from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
i18n = I18nAuto()
def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
@ -18,6 +24,7 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
# Change model weights
change_gpt_weights(gpt_path=GPT_model_path)
change_sovits_weights(sovits_path=SoVITS_model_path)
# Synthesize audio
@ -29,6 +36,8 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
result_list = list(synthesis_result)
if not os.path.isdir(output_path):
os.makedirs(output_path, exist_ok=True)
if result_list:
last_sampling_rate, last_audio_data = result_list[-1]
output_wav_path = os.path.join(output_path, "output.wav")

View File

@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushB
from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox
import soundfile as sf
from tools.i18n.i18n import I18nAuto
from GPT_SoVITS.tools.i18n.i18n import I18nAuto
i18n = I18nAuto()
from inference_webui import gpt_path, sovits_path, change_gpt_weights, change_sovits_weights, get_tts_wav

View File

@ -27,8 +27,8 @@ try:
except:...
version=os.environ.get("version","v2")
pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
pretrained_sovits_name=["pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "pretrained_models/s2G488k.pth"]
pretrained_gpt_name=["pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
_ =[[],[]]
for i in range(2):
@ -39,13 +39,18 @@ for i in range(2):
pretrained_gpt_name,pretrained_sovits_name = _
weight_json_file_path = os.path.join(
os.path.dirname(__file__),
"weight.json"
)
if os.path.exists(f"./weight.json"):
print("weight_json_file_path: ", weight_json_file_path)
if os.path.exists(weight_json_file_path):
pass
else:
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
with open(weight_json_file_path, 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
with open(f"./weight.json", 'r', encoding="utf-8") as file:
with open(weight_json_file_path, 'r', encoding="utf-8") as file:
weight_data = file.read()
weight_data=json.loads(weight_data)
gpt_path = os.environ.get(
@ -62,11 +67,12 @@ with open(f"./weight.json", 'r', encoding="utf-8") as file:
# )
# sovits_path = os.environ.get("sovits_path", pretrained_sovits_name)
cnhubert_base_path = os.environ.get(
"cnhubert_base_path", "GPT_SoVITS/pretrained_models/chinese-hubert-base"
"cnhubert_base_path", "pretrained_models/chinese-hubert-base"
)
bert_path = os.environ.get(
"bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
"bert_path", "pretrained_models/chinese-roberta-wwm-ext-large"
)
infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
infer_ttswebui = int(infer_ttswebui)
is_share = os.environ.get("is_share", "False")
@ -79,18 +85,18 @@ import gradio as gr
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
import librosa
from feature_extractor import cnhubert
from GPT_SoVITS.feature_extractor import cnhubert
cnhubert.cnhubert_base_path = cnhubert_base_path
from module.models import SynthesizerTrn
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
from text import cleaned_text_to_sequence
from text.cleaner import clean_text
from GPT_SoVITS.module.models import SynthesizerTrn
from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
from GPT_SoVITS.text import cleaned_text_to_sequence
from GPT_SoVITS.text.cleaner import clean_text
from time import time as ttime
from module.mel_processing import spectrogram_torch
from tools.my_utils import load_audio
from tools.i18n.i18n import I18nAuto, scan_language_list
from GPT_SoVITS.module.mel_processing import spectrogram_torch
from GPT_SoVITS.tools.my_utils import load_audio
from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list
language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
@ -126,6 +132,7 @@ dict_language_v2 = {
}
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
tokenizer = AutoTokenizer.from_pretrained(bert_path)
bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
if is_half == True:
@ -187,6 +194,9 @@ else:
def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
global vq_model, hps, version, dict_language
print("sovits_path: ", sovits_path)
print("os.path.isfile(sovits_path) = ", os.path.isfile(sovits_path))
dict_s2 = torch.load(sovits_path, map_location="cpu")
hps = dict_s2["config"]
hps = DictToAttrRecursive(hps)
@ -212,11 +222,11 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
vq_model.eval()
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
with open("./weight.json")as f:
with open(weight_json_file_path)as f:
data=f.read()
data=json.loads(data)
data["SoVITS"][version]=sovits_path
with open("./weight.json","w")as f:f.write(json.dumps(data))
with open(weight_json_file_path,"w")as f:f.write(json.dumps(data))
if prompt_language is not None and text_language is not None:
if prompt_language in list(dict_language.keys()):
prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
@ -249,11 +259,11 @@ def change_gpt_weights(gpt_path):
t2s_model.eval()
total = sum([param.nelement() for param in t2s_model.parameters()])
print("Number of parameter: %.2fM" % (total / 1e6))
with open("./weight.json")as f:
with open(weight_json_file_path)as f:
data=f.read()
data=json.loads(data)
data["GPT"][version]=gpt_path
with open("./weight.json","w")as f:f.write(json.dumps(data))
with open(weight_json_file_path,"w")as f:f.write(json.dumps(data))
change_gpt_weights(gpt_path)
@ -303,7 +313,7 @@ def get_first(text):
text = re.split(pattern, text)[0].strip()
return text
from text import chinese
from GPT_SoVITS.text import chinese
def get_phones_and_bert(text,language,version,final=False):
if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
language = language.replace("all_","")

View File

@ -46,7 +46,7 @@ version=os.environ.get("version","v2")
import gradio as gr
from TTS_infer_pack.TTS import TTS, TTS_Config
from TTS_infer_pack.text_segmentation_method import get_method
from tools.i18n.i18n import I18nAuto, scan_language_list
from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list
language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language

View File

@ -3,8 +3,8 @@ import torch
from torch import nn
from torch.nn import functional as F
from module import commons
from module.modules import LayerNorm
from GPT_SoVITS.module import commons
from GPT_SoVITS.module.modules import LayerNorm
class Encoder(nn.Module):

View File

@ -17,7 +17,7 @@ from functools import lru_cache
import requests
from scipy.io import wavfile
from io import BytesIO
from tools.my_utils import load_audio
from GPT_SoVITS.tools.my_utils import load_audio
version = os.environ.get('version',None)
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
class TextAudioSpeakerLoader(torch.utils.data.Dataset):

View File

@ -9,18 +9,18 @@ import torch
from torch import nn
from torch.nn import functional as F
from module import commons
from module import modules
from module import attentions
from GPT_SoVITS.module import commons
from GPT_SoVITS.module import modules
from GPT_SoVITS.module import attentions
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from module.commons import init_weights, get_padding
from module.mrte_model import MRTE
from module.quantize import ResidualVectorQuantizer
from GPT_SoVITS.module.commons import init_weights, get_padding
from GPT_SoVITS.module.mrte_model import MRTE
from GPT_SoVITS.module.quantize import ResidualVectorQuantizer
# from text import symbols
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from GPT_SoVITS.text import symbols as symbols_v1
from GPT_SoVITS.text import symbols2 as symbols_v2
from torch.cuda.amp import autocast
import contextlib

View File

@ -7,9 +7,9 @@ from torch.nn import functional as F
from torch.nn import Conv1d
from torch.nn.utils import weight_norm, remove_weight_norm
from module import commons
from module.commons import init_weights, get_padding
from module.transforms import piecewise_rational_quadratic_transform
from GPT_SoVITS.module import commons
from GPT_SoVITS.module.commons import init_weights, get_padding
from GPT_SoVITS.module.transforms import piecewise_rational_quadratic_transform
import torch.distributions as D

View File

@ -3,7 +3,7 @@
import torch
from torch import nn
from torch.nn.utils import remove_weight_norm, weight_norm
from module.attentions import MultiHeadAttention
from GPT_SoVITS.module.attentions import MultiHeadAttention
class MRTE(nn.Module):

View File

@ -13,7 +13,7 @@ import typing as tp
import torch
from torch import nn
from module.core_vq import ResidualVectorQuantization
from GPT_SoVITS.module.core_vq import ResidualVectorQuantization
@dataclass

View File

@ -10,7 +10,7 @@ cnhubert.cnhubert_base_path = cnhubert_base_path
ssl_model = cnhubert.get_model()
from text import cleaned_text_to_sequence
import soundfile
from tools.my_utils import load_audio
from GPT_SoVITS.tools.my_utils import load_audio
import os
import json

View File

@ -21,7 +21,7 @@ from tqdm import tqdm
from text.cleaner import clean_text
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
from tools.my_utils import clean_path
from GPT_SoVITS.tools.my_utils import clean_path
# inp_text=sys.argv[1]
# inp_wav_dir=sys.argv[2]

View File

@ -19,7 +19,7 @@ from scipy.io import wavfile
import librosa
now_dir = os.getcwd()
sys.path.append(now_dir)
from tools.my_utils import load_audio,clean_path
from GPT_SoVITS.tools.my_utils import load_audio,clean_path
# from config import cnhubert_base_path
# cnhubert.cnhubert_base_path=cnhubert_base_path

View File

@ -24,7 +24,7 @@ from glob import glob
from tqdm import tqdm
import logging, librosa, utils
from module.models import SynthesizerTrn
from tools.my_utils import clean_path
from GPT_SoVITS.tools.my_utils import clean_path
logging.getLogger("numba").setLevel(logging.WARNING)
# from config import pretrained_s2G

View File

@ -3,7 +3,7 @@ from collections import OrderedDict
from time import time as ttime
import shutil,os
import torch
from tools.i18n.i18n import I18nAuto
from GPT_SoVITS.tools.i18n.i18n import I18nAuto
i18n = I18nAuto()

View File

@ -4,8 +4,8 @@ import os
# else:
# from text.symbols2 import symbols
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from GPT_SoVITS.text import symbols as symbols_v1
from GPT_SoVITS.text import symbols2 as symbols_v2
_symbol_to_id_v1 = {s: i for i, s in enumerate(symbols_v1.symbols)}
_symbol_to_id_v2 = {s: i for i, s in enumerate(symbols_v2.symbols)}

View File

@ -5,9 +5,9 @@ import re
import cn2an
from pypinyin import lazy_pinyin, Style
from text.symbols import punctuation
from text.tone_sandhi import ToneSandhi
from text.zh_normalization.text_normlization import TextNormalizer
from GPT_SoVITS.text.symbols import punctuation
from GPT_SoVITS.text.tone_sandhi import ToneSandhi
from GPT_SoVITS.text.zh_normalization.text_normlization import TextNormalizer
normalizer = lambda x: cn2an.transform(x, "an2cn")

View File

@ -6,9 +6,9 @@ import cn2an
from pypinyin import lazy_pinyin, Style
from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
from text.symbols import punctuation
from text.tone_sandhi import ToneSandhi
from text.zh_normalization.text_normlization import TextNormalizer
from GPT_SoVITS.text.symbols import punctuation
from GPT_SoVITS.text.tone_sandhi import ToneSandhi
from GPT_SoVITS.text.zh_normalization.text_normlization import TextNormalizer
normalizer = lambda x: cn2an.transform(x, "an2cn")
@ -25,9 +25,9 @@ import jieba_fast.posseg as psg
is_g2pw = True#True if is_g2pw_str.lower() == 'true' else False
if is_g2pw:
print("当前使用g2pw进行拼音推理")
from text.g2pw import G2PWPinyin, correct_pronunciation
from GPT_SoVITS.text.g2pw import G2PWPinyin, correct_pronunciation
parent_directory = os.path.dirname(current_file_path)
g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True)
g2pw = G2PWPinyin(model_dir=f"{current_file_path}/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True)
rep_map = {
"": ",",

View File

@ -1,4 +1,4 @@
from text import cleaned_text_to_sequence
from GPT_SoVITS.text import cleaned_text_to_sequence
import os
# if os.environ.get("version","v1")=="v1":
# from text import chinese
@ -7,8 +7,8 @@ import os
# from text import chinese2 as chinese
# from text.symbols2 import symbols
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from GPT_SoVITS.text import symbols as symbols_v1
from GPT_SoVITS.text import symbols2 as symbols_v2
special = [
# ("%", "zh", "SP"),
@ -33,7 +33,7 @@ def clean_text(text, language, version=None):
for special_s, special_l, target_symbol in special:
if special_s in text and language == special_l:
return clean_special(text, language, special_s, target_symbol, version)
language_module = __import__("text."+language_module_map[language],fromlist=[language_module_map[language]])
language_module = __import__("GPT_SoVITS.text."+language_module_map[language],fromlist=[language_module_map[language]])
if hasattr(language_module,"text_normalize"):
norm_text = language_module.text_normalize(text)
else:

View File

@ -1 +1 @@
from text.g2pw.g2pw import *
from GPT_SoVITS.text.g2pw.g2pw import *

View File

@ -11,4 +11,4 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from text.zh_normalization.text_normlization import *
from GPT_SoVITS.text.zh_normalization.text_normlization import *