diff --git a/GPT_SoVITS/AR/data/data_module.py b/GPT_SoVITS/AR/data/data_module.py index cb947959..9dd4addd 100644 --- a/GPT_SoVITS/AR/data/data_module.py +++ b/GPT_SoVITS/AR/data/data_module.py @@ -1,8 +1,8 @@ # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/data_module.py # reference: https://github.com/lifeiteng/vall-e from pytorch_lightning import LightningDataModule -from AR.data.bucket_sampler import DistributedBucketSampler -from AR.data.dataset import Text2SemanticDataset +from GPT_SoVITS.AR.data.bucket_sampler import DistributedBucketSampler +from GPT_SoVITS.AR.data.dataset import Text2SemanticDataset from torch.utils.data import DataLoader diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py index 2dd3f392..c45e2a72 100644 --- a/GPT_SoVITS/AR/models/t2s_lightning_module.py +++ b/GPT_SoVITS/AR/models/t2s_lightning_module.py @@ -8,9 +8,9 @@ from typing import Dict import torch from pytorch_lightning import LightningModule -from AR.models.t2s_model import Text2SemanticDecoder -from AR.modules.lr_schedulers import WarmupCosineLRSchedule -from AR.modules.optim import ScaledAdam +from GPT_SoVITS.AR.models.t2s_model import Text2SemanticDecoder +from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule +from GPT_SoVITS.AR.modules.optim import ScaledAdam class Text2SemanticLightningModule(LightningModule): def __init__(self, config, output_dir, is_train=True): diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py b/GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py index 487edb01..04db0713 100644 --- a/GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py +++ b/GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py @@ -8,9 +8,9 @@ from typing import Dict import torch from pytorch_lightning import LightningModule -from AR.models.t2s_model_onnx import Text2SemanticDecoder -from AR.modules.lr_schedulers import WarmupCosineLRSchedule -from AR.modules.optim import ScaledAdam +from GPT_SoVITS.AR.models.t2s_model_onnx import Text2SemanticDecoder +from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule +from GPT_SoVITS.AR.modules.optim import ScaledAdam class Text2SemanticLightningModule(LightningModule): diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index fb528914..541b68d9 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -5,8 +5,8 @@ from typing import List, Optional import torch from tqdm import tqdm -from AR.models.utils import make_pad_mask -from AR.models.utils import ( +from GPT_SoVITS.AR.models.utils import make_pad_mask +from GPT_SoVITS.AR.models.utils import ( topk_sampling, sample, logits_to_probs, @@ -15,11 +15,11 @@ from AR.models.utils import ( make_reject_y, get_batch_logps ) -from AR.modules.embedding import SinePositionalEmbedding -from AR.modules.embedding import TokenEmbedding -from AR.modules.transformer import LayerNorm -from AR.modules.transformer import TransformerEncoder -from AR.modules.transformer import TransformerEncoderLayer +from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding +from GPT_SoVITS.AR.modules.embedding import TokenEmbedding +from GPT_SoVITS.AR.modules.transformer import LayerNorm +from GPT_SoVITS.AR.modules.transformer import TransformerEncoder +from GPT_SoVITS.AR.modules.transformer import TransformerEncoderLayer from torch import nn from torch.nn import functional as F from torchmetrics.classification import MulticlassAccuracy diff --git a/GPT_SoVITS/AR/models/t2s_model_onnx.py b/GPT_SoVITS/AR/models/t2s_model_onnx.py index 7834297d..83c8cda4 100644 --- a/GPT_SoVITS/AR/models/t2s_model_onnx.py +++ b/GPT_SoVITS/AR/models/t2s_model_onnx.py @@ -3,11 +3,11 @@ import torch from tqdm import tqdm -from AR.modules.embedding_onnx import SinePositionalEmbedding -from AR.modules.embedding_onnx import TokenEmbedding -from AR.modules.transformer_onnx import LayerNorm -from AR.modules.transformer_onnx import TransformerEncoder -from AR.modules.transformer_onnx import TransformerEncoderLayer +from GPT_SoVITS.AR.modules.embedding_onnx import SinePositionalEmbedding +from GPT_SoVITS.AR.modules.embedding_onnx import TokenEmbedding +from GPT_SoVITS.AR.modules.transformer_onnx import LayerNorm +from GPT_SoVITS.AR.modules.transformer_onnx import TransformerEncoder +from GPT_SoVITS.AR.modules.transformer_onnx import TransformerEncoderLayer from torch import nn from torch.nn import functional as F from torchmetrics.classification import MulticlassAccuracy diff --git a/GPT_SoVITS/AR/modules/activation.py b/GPT_SoVITS/AR/modules/activation.py index 5ca888b5..5e9b0293 100644 --- a/GPT_SoVITS/AR/modules/activation.py +++ b/GPT_SoVITS/AR/modules/activation.py @@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear from torch.nn.parameter import Parameter from torch.nn import functional as F -from AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched +from GPT_SoVITS.AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched F.multi_head_attention_forward = multi_head_attention_forward_patched @@ -152,14 +152,14 @@ class MultiheadAttention(Module): self.in_proj_linear = linear1_cls( embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs ) - self.in_proj_weight = self.in_proj_linear.weight + self.in_proj_weight = self.in_proj_lineGPT_SoVITS.AR.weight self.register_parameter("q_proj_weight", None) self.register_parameter("k_proj_weight", None) self.register_parameter("v_proj_weight", None) if bias: - self.in_proj_bias = self.in_proj_linear.bias + self.in_proj_bias = self.in_proj_lineGPT_SoVITS.AR.bias else: self.register_parameter("in_proj_bias", None) diff --git a/GPT_SoVITS/AR/modules/activation_onnx.py b/GPT_SoVITS/AR/modules/activation_onnx.py index b54acd99..92d89ca4 100644 --- a/GPT_SoVITS/AR/modules/activation_onnx.py +++ b/GPT_SoVITS/AR/modules/activation_onnx.py @@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear from torch.nn.parameter import Parameter from torch.nn import functional as F -from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched +from GPT_SoVITS.AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched class MultiheadAttention(Module): @@ -95,14 +95,14 @@ class MultiheadAttention(Module): self.in_proj_linear = linear1_cls( embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs ) - self.in_proj_weight = self.in_proj_linear.weight + self.in_proj_weight = self.in_proj_lineGPT_SoVITS.AR.weight self.register_parameter("q_proj_weight", None) self.register_parameter("k_proj_weight", None) self.register_parameter("v_proj_weight", None) if bias: - self.in_proj_bias = self.in_proj_linear.bias + self.in_proj_bias = self.in_proj_lineGPT_SoVITS.AR.bias else: self.register_parameter("in_proj_bias", None) diff --git a/GPT_SoVITS/AR/modules/transformer.py b/GPT_SoVITS/AR/modules/transformer.py index 7921f48e..2ddff82d 100644 --- a/GPT_SoVITS/AR/modules/transformer.py +++ b/GPT_SoVITS/AR/modules/transformer.py @@ -10,8 +10,8 @@ from typing import Tuple from typing import Union import torch -from AR.modules.activation import MultiheadAttention -from AR.modules.scaling import BalancedDoubleSwish +from GPT_SoVITS.AR.modules.activation import MultiheadAttention +from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish from torch import nn from torch import Tensor from torch.nn import functional as F diff --git a/GPT_SoVITS/AR/modules/transformer_onnx.py b/GPT_SoVITS/AR/modules/transformer_onnx.py index a3f68b43..fd9853a0 100644 --- a/GPT_SoVITS/AR/modules/transformer_onnx.py +++ b/GPT_SoVITS/AR/modules/transformer_onnx.py @@ -10,8 +10,8 @@ from typing import Tuple from typing import Union import torch -from AR.modules.activation_onnx import MultiheadAttention -from AR.modules.scaling import BalancedDoubleSwish +from GPT_SoVITS.AR.modules.activation_onnx import MultiheadAttention +from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish from torch import nn from torch import Tensor from torch.nn import functional as F diff --git a/GPT_SoVITS/AR/text_processing/phonemizer.py b/GPT_SoVITS/AR/text_processing/phonemizer.py index 9c5f58fb..7a33ba9d 100644 --- a/GPT_SoVITS/AR/text_processing/phonemizer.py +++ b/GPT_SoVITS/AR/text_processing/phonemizer.py @@ -9,7 +9,7 @@ import regex from gruut import sentences from gruut.const import Sentence from gruut.const import Word -from AR.text_processing.symbols import SYMBOL_TO_ID +from GPT_SoVITS.AR.text_processing.symbols import SYMBOL_TO_ID class GruutPhonemizer: diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index b7e7b468..fe5f7055 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -25,15 +25,15 @@ from typing import Generator, List, Tuple, Union from tqdm import tqdm -from AR.models.t2s_lightning_module import Text2SemanticLightningModule -from feature_extractor.cnhubert import CNHubert -from module.models import SynthesizerTrn -from module.mel_processing import spectrogram_torch -from tools.i18n.i18n import I18nAuto, scan_language_list -from tools.my_utils import load_audio +from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule +from GPT_SoVITS.feature_extractor.cnhubert import CNHubert +from GPT_SoVITS.module.models import SynthesizerTrn +from GPT_SoVITS.module.mel_processing import spectrogram_torch +from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list +from GPT_SoVITS.tools.my_utils import load_audio from transformers import AutoModelForMaskedLM, AutoTokenizer -from TTS_infer_pack.text_segmentation_method import splits -from TTS_infer_pack.TextPreprocessor import TextPreprocessor +from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import splits +from GPT_SoVITS.TTS_infer_pack.TextPreprocessor import TextPreprocessor language=os.environ.get("language","Auto") language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language diff --git a/GPT_SoVITS/export_torch_script.py b/GPT_SoVITS/export_torch_script.py index f7bef133..5be41e19 100644 --- a/GPT_SoVITS/export_torch_script.py +++ b/GPT_SoVITS/export_torch_script.py @@ -13,7 +13,7 @@ from torch.nn import functional as F from transformers import AutoModelForMaskedLM, AutoTokenizer from feature_extractor import cnhubert -from AR.models.t2s_lightning_module import Text2SemanticLightningModule +from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule from module.models_onnx import SynthesizerTrn from inference_webui import get_phones_and_bert diff --git a/GPT_SoVITS/inference_gui.py b/GPT_SoVITS/inference_gui.py index 2059155d..93ea0f5d 100644 --- a/GPT_SoVITS/inference_gui.py +++ b/GPT_SoVITS/inference_gui.py @@ -299,7 +299,7 @@ class GPTSoVITSGUI(QMainWindow): result = "Audio saved to " + output_wav_path - self.status_bar.showMessage("合成完成!输出路径:" + output_wav_path, 5000) + self.status_bGPT_SoVITS.AR.showMessage("合成完成!输出路径:" + output_wav_path, 5000) self.output_text.append("处理结果:\n" + result) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 5aff4ae5..69b30294 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -84,7 +84,7 @@ from feature_extractor import cnhubert cnhubert.cnhubert_base_path = cnhubert_base_path from module.models import SynthesizerTrn -from AR.models.t2s_lightning_module import Text2SemanticLightningModule +from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule from text import cleaned_text_to_sequence from text.cleaner import clean_text from time import time as ttime diff --git a/GPT_SoVITS/module/attentions.py b/GPT_SoVITS/module/attentions.py index a2e9e515..c136a07b 100644 --- a/GPT_SoVITS/module/attentions.py +++ b/GPT_SoVITS/module/attentions.py @@ -365,7 +365,7 @@ class MultiHeadAttention(nn.Module): def _attention_bias_proximal(self, length): """Bias for self-attention to encourage attention to close positions. Args: - length: an integer scalar. + length: an integer scalGPT_SoVITS.AR. Returns: a Tensor with shape [1, 1, length, length] """ diff --git a/GPT_SoVITS/module/attentions_onnx.py b/GPT_SoVITS/module/attentions_onnx.py index 097b1b9c..3cfbd55d 100644 --- a/GPT_SoVITS/module/attentions_onnx.py +++ b/GPT_SoVITS/module/attentions_onnx.py @@ -303,7 +303,7 @@ class MultiHeadAttention(nn.Module): def _attention_bias_proximal(self, length): """Bias for self-attention to encourage attention to close positions. Args: - length: an integer scalar. + length: an integer scalGPT_SoVITS.AR. Returns: a Tensor with shape [1, 1, length, length] """ diff --git a/GPT_SoVITS/onnx_export.py b/GPT_SoVITS/onnx_export.py index 43aac19a..3fdc6918 100644 --- a/GPT_SoVITS/onnx_export.py +++ b/GPT_SoVITS/onnx_export.py @@ -1,5 +1,5 @@ from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2 -from AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule +from GPT_SoVITS.AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule import torch import torchaudio from torch import nn diff --git a/GPT_SoVITS/s1_train.py b/GPT_SoVITS/s1_train.py index 898ca549..bfd8fb22 100644 --- a/GPT_SoVITS/s1_train.py +++ b/GPT_SoVITS/s1_train.py @@ -14,14 +14,14 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.loggers import TensorBoardLogger # WandbLogger from pytorch_lightning.strategies import DDPStrategy -from AR.data.data_module import Text2SemanticDataModule -from AR.models.t2s_lightning_module import Text2SemanticLightningModule -from AR.utils.io import load_yaml_config +from GPT_SoVITS.AR.data.data_module import Text2SemanticDataModule +from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule +from GPT_SoVITS.AR.utils.io import load_yaml_config logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) torch.set_float32_matmul_precision("high") -from AR.utils import get_newest_ckpt +from GPT_SoVITS.AR.utils import get_newest_ckpt from collections import OrderedDict from time import time as ttime diff --git a/GPT_SoVITS/tools/i18n/locale/pt_BR.json b/GPT_SoVITS/tools/i18n/locale/pt_BR.json index 1a52de67..3e95d3a7 100644 --- a/GPT_SoVITS/tools/i18n/locale/pt_BR.json +++ b/GPT_SoVITS/tools/i18n/locale/pt_BR.json @@ -106,7 +106,7 @@ "开启一键三连": "Ativar um clique", "开启打标WebUI": "Abrir Labeling WebUI", "开启文本获取": "Ativar obtenção de texto", - "开启无参考文本模式。不填参考文本亦相当于开启。": "Ativar o modo sem texto de referência. Não preencher o texto de referência também equivale a ativar.", + "开启无参考文本模式。不填参考文本亦相当于开启。": "Ativar o modo sem texto de referência. Não preencher o texto de referência também equivale a ativGPT_SoVITS.AR.", "开启离线批量ASR": "Ativar ASR offline em lote", "开启语义token提取": "Ativar extração de token semântico", "开启语音切割": "Ativar corte de voz", diff --git a/GPT_SoVITS/tools/uvr5/bsroformer.py b/GPT_SoVITS/tools/uvr5/bsroformer.py index d1620326..3d0cd1da 100644 --- a/GPT_SoVITS/tools/uvr5/bsroformer.py +++ b/GPT_SoVITS/tools/uvr5/bsroformer.py @@ -62,7 +62,7 @@ class BsRoformer_Loader: length_init = mix.shape[-1] progress_bar = tqdm(total=length_init // step + 1) - progress_bar.set_description("Processing") + progress_bGPT_SoVITS.AR.set_description("Processing") # Do pad from the beginning and end to account floating window results better if length_init > 2 * border and (border > 0): @@ -102,7 +102,7 @@ class BsRoformer_Loader: batch_data.append(part) batch_locations.append((i, length)) i += step - progress_bar.update(1) + progress_bGPT_SoVITS.AR.update(1) if len(batch_data) >= batch_size or (i >= mix.shape[1]): arr = torch.stack(batch_data, dim=0) @@ -131,7 +131,7 @@ class BsRoformer_Loader: # Remove pad estimated_sources = estimated_sources[..., border:-border] - progress_bar.close() + progress_bGPT_SoVITS.AR.close() return {k: v for k, v in zip(['vocals', 'other'], estimated_sources)} diff --git a/GPT_SoVITS/tools/uvr5/mdxnet.py b/GPT_SoVITS/tools/uvr5/mdxnet.py index 372db25b..3587ff00 100644 --- a/GPT_SoVITS/tools/uvr5/mdxnet.py +++ b/GPT_SoVITS/tools/uvr5/mdxnet.py @@ -143,7 +143,7 @@ class Predictor: def demix_base(self, mixes, margin_size): chunked_sources = [] progress_bar = tqdm(total=len(mixes)) - progress_bar.set_description("Processing") + progress_bGPT_SoVITS.AR.set_description("Processing") for mix in mixes: cmix = mixes[mix] sources = [] @@ -188,12 +188,12 @@ class Predictor: end = None sources.append(tar_signal[:, start:end]) - progress_bar.update(1) + progress_bGPT_SoVITS.AR.update(1) chunked_sources.append(sources) _sources = np.concatenate(chunked_sources, axis=-1) # del self.model - progress_bar.close() + progress_bGPT_SoVITS.AR.close() return _sources def prediction(self, m, vocal_root, others_root, format): diff --git a/api.py b/api.py index c5f7024f..2ebc5310 100644 --- a/api.py +++ b/api.py @@ -163,7 +163,7 @@ import numpy as np from feature_extractor import cnhubert from io import BytesIO from module.models import SynthesizerTrn -from AR.models.t2s_lightning_module import Text2SemanticLightningModule +from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule from text import cleaned_text_to_sequence from text.cleaner import clean_text from module.mel_processing import spectrogram_torch