mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-21 19:50:09 +08:00
change imports
This commit is contained in:
parent
8292d7bfc2
commit
c5490bb2a2
@ -1,8 +1,8 @@
|
||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/data_module.py
|
||||
# reference: https://github.com/lifeiteng/vall-e
|
||||
from pytorch_lightning import LightningDataModule
|
||||
from AR.data.bucket_sampler import DistributedBucketSampler
|
||||
from AR.data.dataset import Text2SemanticDataset
|
||||
from GPT_SoVITS.AR.data.bucket_sampler import DistributedBucketSampler
|
||||
from GPT_SoVITS.AR.data.dataset import Text2SemanticDataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
|
@ -8,9 +8,9 @@ from typing import Dict
|
||||
|
||||
import torch
|
||||
from pytorch_lightning import LightningModule
|
||||
from AR.models.t2s_model import Text2SemanticDecoder
|
||||
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
||||
from AR.modules.optim import ScaledAdam
|
||||
from GPT_SoVITS.AR.models.t2s_model import Text2SemanticDecoder
|
||||
from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
||||
from GPT_SoVITS.AR.modules.optim import ScaledAdam
|
||||
|
||||
class Text2SemanticLightningModule(LightningModule):
|
||||
def __init__(self, config, output_dir, is_train=True):
|
||||
|
@ -8,9 +8,9 @@ from typing import Dict
|
||||
|
||||
import torch
|
||||
from pytorch_lightning import LightningModule
|
||||
from AR.models.t2s_model_onnx import Text2SemanticDecoder
|
||||
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
||||
from AR.modules.optim import ScaledAdam
|
||||
from GPT_SoVITS.AR.models.t2s_model_onnx import Text2SemanticDecoder
|
||||
from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
|
||||
from GPT_SoVITS.AR.modules.optim import ScaledAdam
|
||||
|
||||
|
||||
class Text2SemanticLightningModule(LightningModule):
|
||||
|
@ -5,8 +5,8 @@ from typing import List, Optional
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from AR.models.utils import make_pad_mask
|
||||
from AR.models.utils import (
|
||||
from GPT_SoVITS.AR.models.utils import make_pad_mask
|
||||
from GPT_SoVITS.AR.models.utils import (
|
||||
topk_sampling,
|
||||
sample,
|
||||
logits_to_probs,
|
||||
@ -15,11 +15,11 @@ from AR.models.utils import (
|
||||
make_reject_y,
|
||||
get_batch_logps
|
||||
)
|
||||
from AR.modules.embedding import SinePositionalEmbedding
|
||||
from AR.modules.embedding import TokenEmbedding
|
||||
from AR.modules.transformer import LayerNorm
|
||||
from AR.modules.transformer import TransformerEncoder
|
||||
from AR.modules.transformer import TransformerEncoderLayer
|
||||
from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding
|
||||
from GPT_SoVITS.AR.modules.embedding import TokenEmbedding
|
||||
from GPT_SoVITS.AR.modules.transformer import LayerNorm
|
||||
from GPT_SoVITS.AR.modules.transformer import TransformerEncoder
|
||||
from GPT_SoVITS.AR.modules.transformer import TransformerEncoderLayer
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torchmetrics.classification import MulticlassAccuracy
|
||||
|
@ -3,11 +3,11 @@
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from AR.modules.embedding_onnx import SinePositionalEmbedding
|
||||
from AR.modules.embedding_onnx import TokenEmbedding
|
||||
from AR.modules.transformer_onnx import LayerNorm
|
||||
from AR.modules.transformer_onnx import TransformerEncoder
|
||||
from AR.modules.transformer_onnx import TransformerEncoderLayer
|
||||
from GPT_SoVITS.AR.modules.embedding_onnx import SinePositionalEmbedding
|
||||
from GPT_SoVITS.AR.modules.embedding_onnx import TokenEmbedding
|
||||
from GPT_SoVITS.AR.modules.transformer_onnx import LayerNorm
|
||||
from GPT_SoVITS.AR.modules.transformer_onnx import TransformerEncoder
|
||||
from GPT_SoVITS.AR.modules.transformer_onnx import TransformerEncoderLayer
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torchmetrics.classification import MulticlassAccuracy
|
||||
|
@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from torch.nn import functional as F
|
||||
from AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched
|
||||
from GPT_SoVITS.AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched
|
||||
|
||||
F.multi_head_attention_forward = multi_head_attention_forward_patched
|
||||
|
||||
@ -152,14 +152,14 @@ class MultiheadAttention(Module):
|
||||
self.in_proj_linear = linear1_cls(
|
||||
embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs
|
||||
)
|
||||
self.in_proj_weight = self.in_proj_linear.weight
|
||||
self.in_proj_weight = self.in_proj_lineGPT_SoVITS.AR.weight
|
||||
|
||||
self.register_parameter("q_proj_weight", None)
|
||||
self.register_parameter("k_proj_weight", None)
|
||||
self.register_parameter("v_proj_weight", None)
|
||||
|
||||
if bias:
|
||||
self.in_proj_bias = self.in_proj_linear.bias
|
||||
self.in_proj_bias = self.in_proj_lineGPT_SoVITS.AR.bias
|
||||
else:
|
||||
self.register_parameter("in_proj_bias", None)
|
||||
|
||||
|
@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from torch.nn import functional as F
|
||||
from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched
|
||||
from GPT_SoVITS.AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched
|
||||
|
||||
|
||||
class MultiheadAttention(Module):
|
||||
@ -95,14 +95,14 @@ class MultiheadAttention(Module):
|
||||
self.in_proj_linear = linear1_cls(
|
||||
embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs
|
||||
)
|
||||
self.in_proj_weight = self.in_proj_linear.weight
|
||||
self.in_proj_weight = self.in_proj_lineGPT_SoVITS.AR.weight
|
||||
|
||||
self.register_parameter("q_proj_weight", None)
|
||||
self.register_parameter("k_proj_weight", None)
|
||||
self.register_parameter("v_proj_weight", None)
|
||||
|
||||
if bias:
|
||||
self.in_proj_bias = self.in_proj_linear.bias
|
||||
self.in_proj_bias = self.in_proj_lineGPT_SoVITS.AR.bias
|
||||
else:
|
||||
self.register_parameter("in_proj_bias", None)
|
||||
|
||||
|
@ -10,8 +10,8 @@ from typing import Tuple
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
from AR.modules.activation import MultiheadAttention
|
||||
from AR.modules.scaling import BalancedDoubleSwish
|
||||
from GPT_SoVITS.AR.modules.activation import MultiheadAttention
|
||||
from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish
|
||||
from torch import nn
|
||||
from torch import Tensor
|
||||
from torch.nn import functional as F
|
||||
|
@ -10,8 +10,8 @@ from typing import Tuple
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
from AR.modules.activation_onnx import MultiheadAttention
|
||||
from AR.modules.scaling import BalancedDoubleSwish
|
||||
from GPT_SoVITS.AR.modules.activation_onnx import MultiheadAttention
|
||||
from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish
|
||||
from torch import nn
|
||||
from torch import Tensor
|
||||
from torch.nn import functional as F
|
||||
|
@ -9,7 +9,7 @@ import regex
|
||||
from gruut import sentences
|
||||
from gruut.const import Sentence
|
||||
from gruut.const import Word
|
||||
from AR.text_processing.symbols import SYMBOL_TO_ID
|
||||
from GPT_SoVITS.AR.text_processing.symbols import SYMBOL_TO_ID
|
||||
|
||||
|
||||
class GruutPhonemizer:
|
||||
|
@ -25,15 +25,15 @@ from typing import Generator, List, Tuple, Union
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from feature_extractor.cnhubert import CNHubert
|
||||
from module.models import SynthesizerTrn
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from tools.my_utils import load_audio
|
||||
from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from GPT_SoVITS.feature_extractor.cnhubert import CNHubert
|
||||
from GPT_SoVITS.module.models import SynthesizerTrn
|
||||
from GPT_SoVITS.module.mel_processing import spectrogram_torch
|
||||
from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from GPT_SoVITS.tools.my_utils import load_audio
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
from TTS_infer_pack.text_segmentation_method import splits
|
||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||
from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import splits
|
||||
from GPT_SoVITS.TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||
|
||||
language=os.environ.get("language","Auto")
|
||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
|
@ -13,7 +13,7 @@ from torch.nn import functional as F
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
from feature_extractor import cnhubert
|
||||
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from module.models_onnx import SynthesizerTrn
|
||||
|
||||
from inference_webui import get_phones_and_bert
|
||||
|
@ -299,7 +299,7 @@ class GPTSoVITSGUI(QMainWindow):
|
||||
|
||||
result = "Audio saved to " + output_wav_path
|
||||
|
||||
self.status_bar.showMessage("合成完成!输出路径:" + output_wav_path, 5000)
|
||||
self.status_bGPT_SoVITS.AR.showMessage("合成完成!输出路径:" + output_wav_path, 5000)
|
||||
self.output_text.append("处理结果:\n" + result)
|
||||
|
||||
|
||||
|
@ -84,7 +84,7 @@ from feature_extractor import cnhubert
|
||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||
|
||||
from module.models import SynthesizerTrn
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from text import cleaned_text_to_sequence
|
||||
from text.cleaner import clean_text
|
||||
from time import time as ttime
|
||||
|
@ -365,7 +365,7 @@ class MultiHeadAttention(nn.Module):
|
||||
def _attention_bias_proximal(self, length):
|
||||
"""Bias for self-attention to encourage attention to close positions.
|
||||
Args:
|
||||
length: an integer scalar.
|
||||
length: an integer scalGPT_SoVITS.AR.
|
||||
Returns:
|
||||
a Tensor with shape [1, 1, length, length]
|
||||
"""
|
||||
|
@ -303,7 +303,7 @@ class MultiHeadAttention(nn.Module):
|
||||
def _attention_bias_proximal(self, length):
|
||||
"""Bias for self-attention to encourage attention to close positions.
|
||||
Args:
|
||||
length: an integer scalar.
|
||||
length: an integer scalGPT_SoVITS.AR.
|
||||
Returns:
|
||||
a Tensor with shape [1, 1, length, length]
|
||||
"""
|
||||
|
@ -1,5 +1,5 @@
|
||||
from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2
|
||||
from AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule
|
||||
from GPT_SoVITS.AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule
|
||||
import torch
|
||||
import torchaudio
|
||||
from torch import nn
|
||||
|
@ -14,14 +14,14 @@ from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
from pytorch_lightning.loggers import TensorBoardLogger # WandbLogger
|
||||
from pytorch_lightning.strategies import DDPStrategy
|
||||
from AR.data.data_module import Text2SemanticDataModule
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from AR.utils.io import load_yaml_config
|
||||
from GPT_SoVITS.AR.data.data_module import Text2SemanticDataModule
|
||||
from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from GPT_SoVITS.AR.utils.io import load_yaml_config
|
||||
|
||||
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
||||
torch.set_float32_matmul_precision("high")
|
||||
from AR.utils import get_newest_ckpt
|
||||
from GPT_SoVITS.AR.utils import get_newest_ckpt
|
||||
|
||||
from collections import OrderedDict
|
||||
from time import time as ttime
|
||||
|
@ -106,7 +106,7 @@
|
||||
"开启一键三连": "Ativar um clique",
|
||||
"开启打标WebUI": "Abrir Labeling WebUI",
|
||||
"开启文本获取": "Ativar obtenção de texto",
|
||||
"开启无参考文本模式。不填参考文本亦相当于开启。": "Ativar o modo sem texto de referência. Não preencher o texto de referência também equivale a ativar.",
|
||||
"开启无参考文本模式。不填参考文本亦相当于开启。": "Ativar o modo sem texto de referência. Não preencher o texto de referência também equivale a ativGPT_SoVITS.AR.",
|
||||
"开启离线批量ASR": "Ativar ASR offline em lote",
|
||||
"开启语义token提取": "Ativar extração de token semântico",
|
||||
"开启语音切割": "Ativar corte de voz",
|
||||
|
@ -62,7 +62,7 @@ class BsRoformer_Loader:
|
||||
length_init = mix.shape[-1]
|
||||
|
||||
progress_bar = tqdm(total=length_init // step + 1)
|
||||
progress_bar.set_description("Processing")
|
||||
progress_bGPT_SoVITS.AR.set_description("Processing")
|
||||
|
||||
# Do pad from the beginning and end to account floating window results better
|
||||
if length_init > 2 * border and (border > 0):
|
||||
@ -102,7 +102,7 @@ class BsRoformer_Loader:
|
||||
batch_data.append(part)
|
||||
batch_locations.append((i, length))
|
||||
i += step
|
||||
progress_bar.update(1)
|
||||
progress_bGPT_SoVITS.AR.update(1)
|
||||
|
||||
if len(batch_data) >= batch_size or (i >= mix.shape[1]):
|
||||
arr = torch.stack(batch_data, dim=0)
|
||||
@ -131,7 +131,7 @@ class BsRoformer_Loader:
|
||||
# Remove pad
|
||||
estimated_sources = estimated_sources[..., border:-border]
|
||||
|
||||
progress_bar.close()
|
||||
progress_bGPT_SoVITS.AR.close()
|
||||
|
||||
return {k: v for k, v in zip(['vocals', 'other'], estimated_sources)}
|
||||
|
||||
|
@ -143,7 +143,7 @@ class Predictor:
|
||||
def demix_base(self, mixes, margin_size):
|
||||
chunked_sources = []
|
||||
progress_bar = tqdm(total=len(mixes))
|
||||
progress_bar.set_description("Processing")
|
||||
progress_bGPT_SoVITS.AR.set_description("Processing")
|
||||
for mix in mixes:
|
||||
cmix = mixes[mix]
|
||||
sources = []
|
||||
@ -188,12 +188,12 @@ class Predictor:
|
||||
end = None
|
||||
sources.append(tar_signal[:, start:end])
|
||||
|
||||
progress_bar.update(1)
|
||||
progress_bGPT_SoVITS.AR.update(1)
|
||||
|
||||
chunked_sources.append(sources)
|
||||
_sources = np.concatenate(chunked_sources, axis=-1)
|
||||
# del self.model
|
||||
progress_bar.close()
|
||||
progress_bGPT_SoVITS.AR.close()
|
||||
return _sources
|
||||
|
||||
def prediction(self, m, vocal_root, others_root, format):
|
||||
|
2
api.py
2
api.py
@ -163,7 +163,7 @@ import numpy as np
|
||||
from feature_extractor import cnhubert
|
||||
from io import BytesIO
|
||||
from module.models import SynthesizerTrn
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from text import cleaned_text_to_sequence
|
||||
from text.cleaner import clean_text
|
||||
from module.mel_processing import spectrogram_torch
|
||||
|
Loading…
x
Reference in New Issue
Block a user