diff --git a/.gitignore b/.gitignore index e86640a6..464d79f9 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,6 @@ SoVITS_weights GPT_weights_v2 SoVITS_weights_v2 TEMP -weight.json +#weight.json ffmpeg* -ffprobe* \ No newline at end of file +ffprobe* diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py index 2dd3f392..c45e2a72 100644 --- a/GPT_SoVITS/AR/models/t2s_lightning_module.py +++ b/GPT_SoVITS/AR/models/t2s_lightning_module.py @@ -8,9 +8,9 @@ from typing import Dict import torch from pytorch_lightning import LightningModule -from AR.models.t2s_model import Text2SemanticDecoder -from AR.modules.lr_schedulers import WarmupCosineLRSchedule -from AR.modules.optim import ScaledAdam +from GPT_SoVITS.AR.models.t2s_model import Text2SemanticDecoder +from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule +from GPT_SoVITS.AR.modules.optim import ScaledAdam class Text2SemanticLightningModule(LightningModule): def __init__(self, config, output_dir, is_train=True): diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index fb528914..541b68d9 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -5,8 +5,8 @@ from typing import List, Optional import torch from tqdm import tqdm -from AR.models.utils import make_pad_mask -from AR.models.utils import ( +from GPT_SoVITS.AR.models.utils import make_pad_mask +from GPT_SoVITS.AR.models.utils import ( topk_sampling, sample, logits_to_probs, @@ -15,11 +15,11 @@ from AR.models.utils import ( make_reject_y, get_batch_logps ) -from AR.modules.embedding import SinePositionalEmbedding -from AR.modules.embedding import TokenEmbedding -from AR.modules.transformer import LayerNorm -from AR.modules.transformer import TransformerEncoder -from AR.modules.transformer import TransformerEncoderLayer +from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding +from GPT_SoVITS.AR.modules.embedding import TokenEmbedding +from GPT_SoVITS.AR.modules.transformer import LayerNorm +from GPT_SoVITS.AR.modules.transformer import TransformerEncoder +from GPT_SoVITS.AR.modules.transformer import TransformerEncoderLayer from torch import nn from torch.nn import functional as F from torchmetrics.classification import MulticlassAccuracy diff --git a/GPT_SoVITS/AR/modules/activation.py b/GPT_SoVITS/AR/modules/activation.py index 5ca888b5..92f57056 100644 --- a/GPT_SoVITS/AR/modules/activation.py +++ b/GPT_SoVITS/AR/modules/activation.py @@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear from torch.nn.parameter import Parameter from torch.nn import functional as F -from AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched +from GPT_SoVITS.AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched F.multi_head_attention_forward = multi_head_attention_forward_patched diff --git a/GPT_SoVITS/AR/modules/transformer.py b/GPT_SoVITS/AR/modules/transformer.py index 7921f48e..2ddff82d 100644 --- a/GPT_SoVITS/AR/modules/transformer.py +++ b/GPT_SoVITS/AR/modules/transformer.py @@ -10,8 +10,8 @@ from typing import Tuple from typing import Union import torch -from AR.modules.activation import MultiheadAttention -from AR.modules.scaling import BalancedDoubleSwish +from GPT_SoVITS.AR.modules.activation import MultiheadAttention +from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish from torch import nn from torch import Tensor from torch.nn import functional as F diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index a1eeb28c..c7675467 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -21,8 +21,8 @@ from feature_extractor.cnhubert import CNHubert from module.models import SynthesizerTrn import librosa from time import time as ttime -from tools.i18n.i18n import I18nAuto, scan_language_list -from tools.my_utils import load_audio +from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list +from GPT_SoVITS.tools.my_utils import load_audio from module.mel_processing import spectrogram_torch from TTS_infer_pack.text_segmentation_method import splits from TTS_infer_pack.TextPreprocessor import TextPreprocessor diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py index b90bd929..0263af1f 100644 --- a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py +++ b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py @@ -15,7 +15,7 @@ from text import cleaned_text_to_sequence from transformers import AutoModelForMaskedLM, AutoTokenizer from TTS_infer_pack.text_segmentation_method import split_big_text, splits, get_method as get_seg_method -from tools.i18n.i18n import I18nAuto, scan_language_list +from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list language=os.environ.get("language","Auto") language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language diff --git a/GPT_SoVITS/feature_extractor/cnhubert.py b/GPT_SoVITS/feature_extractor/cnhubert.py index 013e462b..9ab07a7a 100644 --- a/GPT_SoVITS/feature_extractor/cnhubert.py +++ b/GPT_SoVITS/feature_extractor/cnhubert.py @@ -16,7 +16,7 @@ from transformers import ( HubertModel, ) -import utils +import GPT_SoVITS.utils import torch.nn as nn cnhubert_base_path = None @@ -102,9 +102,10 @@ def get_content(hmodel, wav_16k_tensor): if __name__ == "__main__": + from GPT_SoVITS.utils import load_wav_to_torch_and_resample model = get_model() src_path = "/Users/Shared/原音频2.wav" - wav_16k_tensor = utils.load_wav_to_torch_and_resample(src_path, 16000) + wav_16k_tensor = load_wav_to_torch_and_resample(src_path, 16000) model = model wav_16k_tensor = wav_16k_tensor feats = get_content(model, wav_16k_tensor) diff --git a/GPT_SoVITS/inference_cli.py b/GPT_SoVITS/inference_cli.py index bd987aaf..36c11a67 100644 --- a/GPT_SoVITS/inference_cli.py +++ b/GPT_SoVITS/inference_cli.py @@ -2,9 +2,15 @@ import argparse import os import soundfile as sf -from tools.i18n.i18n import I18nAuto +import GPT_SoVITS +import sys + +from GPT_SoVITS.tools.i18n.i18n import I18nAuto from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav + + + i18n = I18nAuto() def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path): @@ -18,6 +24,7 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, # Change model weights change_gpt_weights(gpt_path=GPT_model_path) + change_sovits_weights(sovits_path=SoVITS_model_path) # Synthesize audio @@ -29,6 +36,8 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, result_list = list(synthesis_result) + if not os.path.isdir(output_path): + os.makedirs(output_path, exist_ok=True) if result_list: last_sampling_rate, last_audio_data = result_list[-1] output_wav_path = os.path.join(output_path, "output.wav") diff --git a/GPT_SoVITS/inference_gui.py b/GPT_SoVITS/inference_gui.py index 2059155d..9d890edb 100644 --- a/GPT_SoVITS/inference_gui.py +++ b/GPT_SoVITS/inference_gui.py @@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushB from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox import soundfile as sf -from tools.i18n.i18n import I18nAuto +from GPT_SoVITS.tools.i18n.i18n import I18nAuto i18n = I18nAuto() from inference_webui import gpt_path, sovits_path, change_gpt_weights, change_sovits_weights, get_tts_wav diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 5aff4ae5..c9f90d58 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -27,8 +27,8 @@ try: except:... version=os.environ.get("version","v2") -pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"] -pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"] +pretrained_sovits_name=["pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "pretrained_models/s2G488k.pth"] +pretrained_gpt_name=["pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"] _ =[[],[]] for i in range(2): @@ -38,14 +38,19 @@ for i in range(2): _[-1].append(pretrained_sovits_name[i]) pretrained_gpt_name,pretrained_sovits_name = _ - -if os.path.exists(f"./weight.json"): +weight_json_file_path = os.path.join( + os.path.dirname(__file__), + "weight.json" +) + +print("weight_json_file_path: ", weight_json_file_path) +if os.path.exists(weight_json_file_path): pass else: - with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file) + with open(weight_json_file_path, 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file) -with open(f"./weight.json", 'r', encoding="utf-8") as file: +with open(weight_json_file_path, 'r', encoding="utf-8") as file: weight_data = file.read() weight_data=json.loads(weight_data) gpt_path = os.environ.get( @@ -62,11 +67,12 @@ with open(f"./weight.json", 'r', encoding="utf-8") as file: # ) # sovits_path = os.environ.get("sovits_path", pretrained_sovits_name) cnhubert_base_path = os.environ.get( - "cnhubert_base_path", "GPT_SoVITS/pretrained_models/chinese-hubert-base" + "cnhubert_base_path", "pretrained_models/chinese-hubert-base" ) bert_path = os.environ.get( - "bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" + "bert_path", "pretrained_models/chinese-roberta-wwm-ext-large" ) + infer_ttswebui = os.environ.get("infer_ttswebui", 9872) infer_ttswebui = int(infer_ttswebui) is_share = os.environ.get("is_share", "False") @@ -79,18 +85,18 @@ import gradio as gr from transformers import AutoModelForMaskedLM, AutoTokenizer import numpy as np import librosa -from feature_extractor import cnhubert +from GPT_SoVITS.feature_extractor import cnhubert cnhubert.cnhubert_base_path = cnhubert_base_path -from module.models import SynthesizerTrn -from AR.models.t2s_lightning_module import Text2SemanticLightningModule -from text import cleaned_text_to_sequence -from text.cleaner import clean_text +from GPT_SoVITS.module.models import SynthesizerTrn +from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule +from GPT_SoVITS.text import cleaned_text_to_sequence +from GPT_SoVITS.text.cleaner import clean_text from time import time as ttime -from module.mel_processing import spectrogram_torch -from tools.my_utils import load_audio -from tools.i18n.i18n import I18nAuto, scan_language_list +from GPT_SoVITS.module.mel_processing import spectrogram_torch +from GPT_SoVITS.tools.my_utils import load_audio +from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list language=os.environ.get("language","Auto") language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language @@ -126,6 +132,7 @@ dict_language_v2 = { } dict_language = dict_language_v1 if version =='v1' else dict_language_v2 + tokenizer = AutoTokenizer.from_pretrained(bert_path) bert_model = AutoModelForMaskedLM.from_pretrained(bert_path) if is_half == True: @@ -187,6 +194,9 @@ else: def change_sovits_weights(sovits_path,prompt_language=None,text_language=None): global vq_model, hps, version, dict_language + print("sovits_path: ", sovits_path) + print("os.path.isfile(sovits_path) = ", os.path.isfile(sovits_path)) + dict_s2 = torch.load(sovits_path, map_location="cpu") hps = dict_s2["config"] hps = DictToAttrRecursive(hps) @@ -212,11 +222,11 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None): vq_model.eval() print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) dict_language = dict_language_v1 if version =='v1' else dict_language_v2 - with open("./weight.json")as f: + with open(weight_json_file_path)as f: data=f.read() data=json.loads(data) data["SoVITS"][version]=sovits_path - with open("./weight.json","w")as f:f.write(json.dumps(data)) + with open(weight_json_file_path,"w")as f:f.write(json.dumps(data)) if prompt_language is not None and text_language is not None: if prompt_language in list(dict_language.keys()): prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language} @@ -249,11 +259,11 @@ def change_gpt_weights(gpt_path): t2s_model.eval() total = sum([param.nelement() for param in t2s_model.parameters()]) print("Number of parameter: %.2fM" % (total / 1e6)) - with open("./weight.json")as f: + with open(weight_json_file_path)as f: data=f.read() data=json.loads(data) data["GPT"][version]=gpt_path - with open("./weight.json","w")as f:f.write(json.dumps(data)) + with open(weight_json_file_path,"w")as f:f.write(json.dumps(data)) change_gpt_weights(gpt_path) @@ -303,7 +313,7 @@ def get_first(text): text = re.split(pattern, text)[0].strip() return text -from text import chinese +from GPT_SoVITS.text import chinese def get_phones_and_bert(text,language,version,final=False): if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}: language = language.replace("all_","") diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py index dcc2bcf9..072b2f40 100644 --- a/GPT_SoVITS/inference_webui_fast.py +++ b/GPT_SoVITS/inference_webui_fast.py @@ -46,7 +46,7 @@ version=os.environ.get("version","v2") import gradio as gr from TTS_infer_pack.TTS import TTS, TTS_Config from TTS_infer_pack.text_segmentation_method import get_method -from tools.i18n.i18n import I18nAuto, scan_language_list +from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list language=os.environ.get("language","Auto") language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language diff --git a/GPT_SoVITS/module/attentions.py b/GPT_SoVITS/module/attentions.py index a2e9e515..12b5620f 100644 --- a/GPT_SoVITS/module/attentions.py +++ b/GPT_SoVITS/module/attentions.py @@ -3,8 +3,8 @@ import torch from torch import nn from torch.nn import functional as F -from module import commons -from module.modules import LayerNorm +from GPT_SoVITS.module import commons +from GPT_SoVITS.module.modules import LayerNorm class Encoder(nn.Module): diff --git a/GPT_SoVITS/module/data_utils.py b/GPT_SoVITS/module/data_utils.py index bfa29e87..8536a043 100644 --- a/GPT_SoVITS/module/data_utils.py +++ b/GPT_SoVITS/module/data_utils.py @@ -17,7 +17,7 @@ from functools import lru_cache import requests from scipy.io import wavfile from io import BytesIO -from tools.my_utils import load_audio +from GPT_SoVITS.tools.my_utils import load_audio version = os.environ.get('version',None) # ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79) class TextAudioSpeakerLoader(torch.utils.data.Dataset): diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py index 968c4cbf..55822699 100644 --- a/GPT_SoVITS/module/models.py +++ b/GPT_SoVITS/module/models.py @@ -9,18 +9,18 @@ import torch from torch import nn from torch.nn import functional as F -from module import commons -from module import modules -from module import attentions +from GPT_SoVITS.module import commons +from GPT_SoVITS.module import modules +from GPT_SoVITS.module import attentions from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm -from module.commons import init_weights, get_padding -from module.mrte_model import MRTE -from module.quantize import ResidualVectorQuantizer +from GPT_SoVITS.module.commons import init_weights, get_padding +from GPT_SoVITS.module.mrte_model import MRTE +from GPT_SoVITS.module.quantize import ResidualVectorQuantizer # from text import symbols -from text import symbols as symbols_v1 -from text import symbols2 as symbols_v2 +from GPT_SoVITS.text import symbols as symbols_v1 +from GPT_SoVITS.text import symbols2 as symbols_v2 from torch.cuda.amp import autocast import contextlib diff --git a/GPT_SoVITS/module/modules.py b/GPT_SoVITS/module/modules.py index f4447455..03034042 100644 --- a/GPT_SoVITS/module/modules.py +++ b/GPT_SoVITS/module/modules.py @@ -7,9 +7,9 @@ from torch.nn import functional as F from torch.nn import Conv1d from torch.nn.utils import weight_norm, remove_weight_norm -from module import commons -from module.commons import init_weights, get_padding -from module.transforms import piecewise_rational_quadratic_transform +from GPT_SoVITS.module import commons +from GPT_SoVITS.module.commons import init_weights, get_padding +from GPT_SoVITS.module.transforms import piecewise_rational_quadratic_transform import torch.distributions as D diff --git a/GPT_SoVITS/module/mrte_model.py b/GPT_SoVITS/module/mrte_model.py index b0cd242c..68abdf9a 100644 --- a/GPT_SoVITS/module/mrte_model.py +++ b/GPT_SoVITS/module/mrte_model.py @@ -3,7 +3,7 @@ import torch from torch import nn from torch.nn.utils import remove_weight_norm, weight_norm -from module.attentions import MultiHeadAttention +from GPT_SoVITS.module.attentions import MultiHeadAttention class MRTE(nn.Module): diff --git a/GPT_SoVITS/module/quantize.py b/GPT_SoVITS/module/quantize.py index f9a5c632..434ca1ab 100644 --- a/GPT_SoVITS/module/quantize.py +++ b/GPT_SoVITS/module/quantize.py @@ -13,7 +13,7 @@ import typing as tp import torch from torch import nn -from module.core_vq import ResidualVectorQuantization +from GPT_SoVITS.module.core_vq import ResidualVectorQuantization @dataclass diff --git a/GPT_SoVITS/onnx_export.py b/GPT_SoVITS/onnx_export.py index 43aac19a..3e9796e3 100644 --- a/GPT_SoVITS/onnx_export.py +++ b/GPT_SoVITS/onnx_export.py @@ -10,7 +10,7 @@ cnhubert.cnhubert_base_path = cnhubert_base_path ssl_model = cnhubert.get_model() from text import cleaned_text_to_sequence import soundfile -from tools.my_utils import load_audio +from GPT_SoVITS.tools.my_utils import load_audio import os import json diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index bdeacc7b..4aec1524 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -21,7 +21,7 @@ from tqdm import tqdm from text.cleaner import clean_text from transformers import AutoModelForMaskedLM, AutoTokenizer import numpy as np -from tools.my_utils import clean_path +from GPT_SoVITS.tools.my_utils import clean_path # inp_text=sys.argv[1] # inp_wav_dir=sys.argv[2] diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 27b61f27..c5673ffe 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -19,7 +19,7 @@ from scipy.io import wavfile import librosa now_dir = os.getcwd() sys.path.append(now_dir) -from tools.my_utils import load_audio,clean_path +from GPT_SoVITS.tools.my_utils import load_audio,clean_path # from config import cnhubert_base_path # cnhubert.cnhubert_base_path=cnhubert_base_path diff --git a/GPT_SoVITS/prepare_datasets/3-get-semantic.py b/GPT_SoVITS/prepare_datasets/3-get-semantic.py index a29a6629..711febbc 100644 --- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py +++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py @@ -24,7 +24,7 @@ from glob import glob from tqdm import tqdm import logging, librosa, utils from module.models import SynthesizerTrn -from tools.my_utils import clean_path +from GPT_SoVITS.tools.my_utils import clean_path logging.getLogger("numba").setLevel(logging.WARNING) # from config import pretrained_s2G diff --git a/GPT_SoVITS/process_ckpt.py b/GPT_SoVITS/process_ckpt.py index 3a436f10..c76b5639 100644 --- a/GPT_SoVITS/process_ckpt.py +++ b/GPT_SoVITS/process_ckpt.py @@ -3,7 +3,7 @@ from collections import OrderedDict from time import time as ttime import shutil,os import torch -from tools.i18n.i18n import I18nAuto +from GPT_SoVITS.tools.i18n.i18n import I18nAuto i18n = I18nAuto() diff --git a/GPT_SoVITS/text/__init__.py b/GPT_SoVITS/text/__init__.py index 2791d7ab..5dd83b48 100644 --- a/GPT_SoVITS/text/__init__.py +++ b/GPT_SoVITS/text/__init__.py @@ -4,8 +4,8 @@ import os # else: # from text.symbols2 import symbols -from text import symbols as symbols_v1 -from text import symbols2 as symbols_v2 +from GPT_SoVITS.text import symbols as symbols_v1 +from GPT_SoVITS.text import symbols2 as symbols_v2 _symbol_to_id_v1 = {s: i for i, s in enumerate(symbols_v1.symbols)} _symbol_to_id_v2 = {s: i for i, s in enumerate(symbols_v2.symbols)} diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py index 2255c6e1..3e50a39a 100644 --- a/GPT_SoVITS/text/chinese.py +++ b/GPT_SoVITS/text/chinese.py @@ -5,9 +5,9 @@ import re import cn2an from pypinyin import lazy_pinyin, Style -from text.symbols import punctuation -from text.tone_sandhi import ToneSandhi -from text.zh_normalization.text_normlization import TextNormalizer +from GPT_SoVITS.text.symbols import punctuation +from GPT_SoVITS.text.tone_sandhi import ToneSandhi +from GPT_SoVITS.text.zh_normalization.text_normlization import TextNormalizer normalizer = lambda x: cn2an.transform(x, "an2cn") diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index f716b410..de8791d1 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -6,9 +6,9 @@ import cn2an from pypinyin import lazy_pinyin, Style from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals -from text.symbols import punctuation -from text.tone_sandhi import ToneSandhi -from text.zh_normalization.text_normlization import TextNormalizer +from GPT_SoVITS.text.symbols import punctuation +from GPT_SoVITS.text.tone_sandhi import ToneSandhi +from GPT_SoVITS.text.zh_normalization.text_normlization import TextNormalizer normalizer = lambda x: cn2an.transform(x, "an2cn") @@ -25,9 +25,9 @@ import jieba_fast.posseg as psg is_g2pw = True#True if is_g2pw_str.lower() == 'true' else False if is_g2pw: print("当前使用g2pw进行拼音推理") - from text.g2pw import G2PWPinyin, correct_pronunciation + from GPT_SoVITS.text.g2pw import G2PWPinyin, correct_pronunciation parent_directory = os.path.dirname(current_file_path) - g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True) + g2pw = G2PWPinyin(model_dir=f"{current_file_path}/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True) rep_map = { ":": ",", diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py index 98535f27..cd35a6ec 100644 --- a/GPT_SoVITS/text/cleaner.py +++ b/GPT_SoVITS/text/cleaner.py @@ -1,4 +1,4 @@ -from text import cleaned_text_to_sequence +from GPT_SoVITS.text import cleaned_text_to_sequence import os # if os.environ.get("version","v1")=="v1": # from text import chinese @@ -7,8 +7,8 @@ import os # from text import chinese2 as chinese # from text.symbols2 import symbols -from text import symbols as symbols_v1 -from text import symbols2 as symbols_v2 +from GPT_SoVITS.text import symbols as symbols_v1 +from GPT_SoVITS.text import symbols2 as symbols_v2 special = [ # ("%", "zh", "SP"), @@ -33,7 +33,7 @@ def clean_text(text, language, version=None): for special_s, special_l, target_symbol in special: if special_s in text and language == special_l: return clean_special(text, language, special_s, target_symbol, version) - language_module = __import__("text."+language_module_map[language],fromlist=[language_module_map[language]]) + language_module = __import__("GPT_SoVITS.text."+language_module_map[language],fromlist=[language_module_map[language]]) if hasattr(language_module,"text_normalize"): norm_text = language_module.text_normalize(text) else: diff --git a/GPT_SoVITS/text/g2pw/__init__.py b/GPT_SoVITS/text/g2pw/__init__.py index d7126755..3436e58b 100644 --- a/GPT_SoVITS/text/g2pw/__init__.py +++ b/GPT_SoVITS/text/g2pw/__init__.py @@ -1 +1 @@ -from text.g2pw.g2pw import * \ No newline at end of file +from GPT_SoVITS.text.g2pw.g2pw import * \ No newline at end of file diff --git a/GPT_SoVITS/text/zh_normalization/__init__.py b/GPT_SoVITS/text/zh_normalization/__init__.py index 46b367a6..ad201c81 100644 --- a/GPT_SoVITS/text/zh_normalization/__init__.py +++ b/GPT_SoVITS/text/zh_normalization/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from text.zh_normalization.text_normlization import * +from GPT_SoVITS.text.zh_normalization.text_normlization import *