change imports

This commit is contained in:
Jarod Mica 2024-12-23 01:52:08 -08:00
parent 8292d7bfc2
commit c5490bb2a2
22 changed files with 57 additions and 57 deletions

View File

@ -1,8 +1,8 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/data_module.py # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/data_module.py
# reference: https://github.com/lifeiteng/vall-e # reference: https://github.com/lifeiteng/vall-e
from pytorch_lightning import LightningDataModule from pytorch_lightning import LightningDataModule
from AR.data.bucket_sampler import DistributedBucketSampler from GPT_SoVITS.AR.data.bucket_sampler import DistributedBucketSampler
from AR.data.dataset import Text2SemanticDataset from GPT_SoVITS.AR.data.dataset import Text2SemanticDataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader

View File

@ -8,9 +8,9 @@ from typing import Dict
import torch import torch
from pytorch_lightning import LightningModule from pytorch_lightning import LightningModule
from AR.models.t2s_model import Text2SemanticDecoder from GPT_SoVITS.AR.models.t2s_model import Text2SemanticDecoder
from AR.modules.lr_schedulers import WarmupCosineLRSchedule from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
from AR.modules.optim import ScaledAdam from GPT_SoVITS.AR.modules.optim import ScaledAdam
class Text2SemanticLightningModule(LightningModule): class Text2SemanticLightningModule(LightningModule):
def __init__(self, config, output_dir, is_train=True): def __init__(self, config, output_dir, is_train=True):

View File

@ -8,9 +8,9 @@ from typing import Dict
import torch import torch
from pytorch_lightning import LightningModule from pytorch_lightning import LightningModule
from AR.models.t2s_model_onnx import Text2SemanticDecoder from GPT_SoVITS.AR.models.t2s_model_onnx import Text2SemanticDecoder
from AR.modules.lr_schedulers import WarmupCosineLRSchedule from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
from AR.modules.optim import ScaledAdam from GPT_SoVITS.AR.modules.optim import ScaledAdam
class Text2SemanticLightningModule(LightningModule): class Text2SemanticLightningModule(LightningModule):

View File

@ -5,8 +5,8 @@ from typing import List, Optional
import torch import torch
from tqdm import tqdm from tqdm import tqdm
from AR.models.utils import make_pad_mask from GPT_SoVITS.AR.models.utils import make_pad_mask
from AR.models.utils import ( from GPT_SoVITS.AR.models.utils import (
topk_sampling, topk_sampling,
sample, sample,
logits_to_probs, logits_to_probs,
@ -15,11 +15,11 @@ from AR.models.utils import (
make_reject_y, make_reject_y,
get_batch_logps get_batch_logps
) )
from AR.modules.embedding import SinePositionalEmbedding from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding
from AR.modules.embedding import TokenEmbedding from GPT_SoVITS.AR.modules.embedding import TokenEmbedding
from AR.modules.transformer import LayerNorm from GPT_SoVITS.AR.modules.transformer import LayerNorm
from AR.modules.transformer import TransformerEncoder from GPT_SoVITS.AR.modules.transformer import TransformerEncoder
from AR.modules.transformer import TransformerEncoderLayer from GPT_SoVITS.AR.modules.transformer import TransformerEncoderLayer
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
from torchmetrics.classification import MulticlassAccuracy from torchmetrics.classification import MulticlassAccuracy

View File

@ -3,11 +3,11 @@
import torch import torch
from tqdm import tqdm from tqdm import tqdm
from AR.modules.embedding_onnx import SinePositionalEmbedding from GPT_SoVITS.AR.modules.embedding_onnx import SinePositionalEmbedding
from AR.modules.embedding_onnx import TokenEmbedding from GPT_SoVITS.AR.modules.embedding_onnx import TokenEmbedding
from AR.modules.transformer_onnx import LayerNorm from GPT_SoVITS.AR.modules.transformer_onnx import LayerNorm
from AR.modules.transformer_onnx import TransformerEncoder from GPT_SoVITS.AR.modules.transformer_onnx import TransformerEncoder
from AR.modules.transformer_onnx import TransformerEncoderLayer from GPT_SoVITS.AR.modules.transformer_onnx import TransformerEncoderLayer
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
from torchmetrics.classification import MulticlassAccuracy from torchmetrics.classification import MulticlassAccuracy

View File

@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from torch.nn import functional as F from torch.nn import functional as F
from AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched from GPT_SoVITS.AR.modules.patched_mha_with_cache import multi_head_attention_forward_patched
F.multi_head_attention_forward = multi_head_attention_forward_patched F.multi_head_attention_forward = multi_head_attention_forward_patched
@ -152,14 +152,14 @@ class MultiheadAttention(Module):
self.in_proj_linear = linear1_cls( self.in_proj_linear = linear1_cls(
embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs
) )
self.in_proj_weight = self.in_proj_linear.weight self.in_proj_weight = self.in_proj_lineGPT_SoVITS.AR.weight
self.register_parameter("q_proj_weight", None) self.register_parameter("q_proj_weight", None)
self.register_parameter("k_proj_weight", None) self.register_parameter("k_proj_weight", None)
self.register_parameter("v_proj_weight", None) self.register_parameter("v_proj_weight", None)
if bias: if bias:
self.in_proj_bias = self.in_proj_linear.bias self.in_proj_bias = self.in_proj_lineGPT_SoVITS.AR.bias
else: else:
self.register_parameter("in_proj_bias", None) self.register_parameter("in_proj_bias", None)

View File

@ -12,7 +12,7 @@ from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from torch.nn import functional as F from torch.nn import functional as F
from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched from GPT_SoVITS.AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched
class MultiheadAttention(Module): class MultiheadAttention(Module):
@ -95,14 +95,14 @@ class MultiheadAttention(Module):
self.in_proj_linear = linear1_cls( self.in_proj_linear = linear1_cls(
embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs embed_dim, 3 * embed_dim, bias=bias, **factory_kwargs
) )
self.in_proj_weight = self.in_proj_linear.weight self.in_proj_weight = self.in_proj_lineGPT_SoVITS.AR.weight
self.register_parameter("q_proj_weight", None) self.register_parameter("q_proj_weight", None)
self.register_parameter("k_proj_weight", None) self.register_parameter("k_proj_weight", None)
self.register_parameter("v_proj_weight", None) self.register_parameter("v_proj_weight", None)
if bias: if bias:
self.in_proj_bias = self.in_proj_linear.bias self.in_proj_bias = self.in_proj_lineGPT_SoVITS.AR.bias
else: else:
self.register_parameter("in_proj_bias", None) self.register_parameter("in_proj_bias", None)

View File

@ -10,8 +10,8 @@ from typing import Tuple
from typing import Union from typing import Union
import torch import torch
from AR.modules.activation import MultiheadAttention from GPT_SoVITS.AR.modules.activation import MultiheadAttention
from AR.modules.scaling import BalancedDoubleSwish from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish
from torch import nn from torch import nn
from torch import Tensor from torch import Tensor
from torch.nn import functional as F from torch.nn import functional as F

View File

@ -10,8 +10,8 @@ from typing import Tuple
from typing import Union from typing import Union
import torch import torch
from AR.modules.activation_onnx import MultiheadAttention from GPT_SoVITS.AR.modules.activation_onnx import MultiheadAttention
from AR.modules.scaling import BalancedDoubleSwish from GPT_SoVITS.AR.modules.scaling import BalancedDoubleSwish
from torch import nn from torch import nn
from torch import Tensor from torch import Tensor
from torch.nn import functional as F from torch.nn import functional as F

View File

@ -9,7 +9,7 @@ import regex
from gruut import sentences from gruut import sentences
from gruut.const import Sentence from gruut.const import Sentence
from gruut.const import Word from gruut.const import Word
from AR.text_processing.symbols import SYMBOL_TO_ID from GPT_SoVITS.AR.text_processing.symbols import SYMBOL_TO_ID
class GruutPhonemizer: class GruutPhonemizer:

View File

@ -25,15 +25,15 @@ from typing import Generator, List, Tuple, Union
from tqdm import tqdm from tqdm import tqdm
from AR.models.t2s_lightning_module import Text2SemanticLightningModule from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
from feature_extractor.cnhubert import CNHubert from GPT_SoVITS.feature_extractor.cnhubert import CNHubert
from module.models import SynthesizerTrn from GPT_SoVITS.module.models import SynthesizerTrn
from module.mel_processing import spectrogram_torch from GPT_SoVITS.module.mel_processing import spectrogram_torch
from tools.i18n.i18n import I18nAuto, scan_language_list from GPT_SoVITS.tools.i18n.i18n import I18nAuto, scan_language_list
from tools.my_utils import load_audio from GPT_SoVITS.tools.my_utils import load_audio
from transformers import AutoModelForMaskedLM, AutoTokenizer from transformers import AutoModelForMaskedLM, AutoTokenizer
from TTS_infer_pack.text_segmentation_method import splits from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import splits
from TTS_infer_pack.TextPreprocessor import TextPreprocessor from GPT_SoVITS.TTS_infer_pack.TextPreprocessor import TextPreprocessor
language=os.environ.get("language","Auto") language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language

View File

@ -13,7 +13,7 @@ from torch.nn import functional as F
from transformers import AutoModelForMaskedLM, AutoTokenizer from transformers import AutoModelForMaskedLM, AutoTokenizer
from feature_extractor import cnhubert from feature_extractor import cnhubert
from AR.models.t2s_lightning_module import Text2SemanticLightningModule from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
from module.models_onnx import SynthesizerTrn from module.models_onnx import SynthesizerTrn
from inference_webui import get_phones_and_bert from inference_webui import get_phones_and_bert

View File

@ -299,7 +299,7 @@ class GPTSoVITSGUI(QMainWindow):
result = "Audio saved to " + output_wav_path result = "Audio saved to " + output_wav_path
self.status_bar.showMessage("合成完成!输出路径:" + output_wav_path, 5000) self.status_bGPT_SoVITS.AR.showMessage("合成完成!输出路径:" + output_wav_path, 5000)
self.output_text.append("处理结果:\n" + result) self.output_text.append("处理结果:\n" + result)

View File

@ -84,7 +84,7 @@ from feature_extractor import cnhubert
cnhubert.cnhubert_base_path = cnhubert_base_path cnhubert.cnhubert_base_path = cnhubert_base_path
from module.models import SynthesizerTrn from module.models import SynthesizerTrn
from AR.models.t2s_lightning_module import Text2SemanticLightningModule from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
from text import cleaned_text_to_sequence from text import cleaned_text_to_sequence
from text.cleaner import clean_text from text.cleaner import clean_text
from time import time as ttime from time import time as ttime

View File

@ -365,7 +365,7 @@ class MultiHeadAttention(nn.Module):
def _attention_bias_proximal(self, length): def _attention_bias_proximal(self, length):
"""Bias for self-attention to encourage attention to close positions. """Bias for self-attention to encourage attention to close positions.
Args: Args:
length: an integer scalar. length: an integer scalGPT_SoVITS.AR.
Returns: Returns:
a Tensor with shape [1, 1, length, length] a Tensor with shape [1, 1, length, length]
""" """

View File

@ -303,7 +303,7 @@ class MultiHeadAttention(nn.Module):
def _attention_bias_proximal(self, length): def _attention_bias_proximal(self, length):
"""Bias for self-attention to encourage attention to close positions. """Bias for self-attention to encourage attention to close positions.
Args: Args:
length: an integer scalar. length: an integer scalGPT_SoVITS.AR.
Returns: Returns:
a Tensor with shape [1, 1, length, length] a Tensor with shape [1, 1, length, length]
""" """

View File

@ -1,5 +1,5 @@
from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2 from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2
from AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule from GPT_SoVITS.AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule
import torch import torch
import torchaudio import torchaudio
from torch import nn from torch import nn

View File

@ -14,14 +14,14 @@ from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger # WandbLogger from pytorch_lightning.loggers import TensorBoardLogger # WandbLogger
from pytorch_lightning.strategies import DDPStrategy from pytorch_lightning.strategies import DDPStrategy
from AR.data.data_module import Text2SemanticDataModule from GPT_SoVITS.AR.data.data_module import Text2SemanticDataModule
from AR.models.t2s_lightning_module import Text2SemanticLightningModule from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
from AR.utils.io import load_yaml_config from GPT_SoVITS.AR.utils.io import load_yaml_config
logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING)
torch.set_float32_matmul_precision("high") torch.set_float32_matmul_precision("high")
from AR.utils import get_newest_ckpt from GPT_SoVITS.AR.utils import get_newest_ckpt
from collections import OrderedDict from collections import OrderedDict
from time import time as ttime from time import time as ttime

View File

@ -106,7 +106,7 @@
"开启一键三连": "Ativar um clique", "开启一键三连": "Ativar um clique",
"开启打标WebUI": "Abrir Labeling WebUI", "开启打标WebUI": "Abrir Labeling WebUI",
"开启文本获取": "Ativar obtenção de texto", "开启文本获取": "Ativar obtenção de texto",
"开启无参考文本模式。不填参考文本亦相当于开启。": "Ativar o modo sem texto de referência. Não preencher o texto de referência também equivale a ativar.", "开启无参考文本模式。不填参考文本亦相当于开启。": "Ativar o modo sem texto de referência. Não preencher o texto de referência também equivale a ativGPT_SoVITS.AR.",
"开启离线批量ASR": "Ativar ASR offline em lote", "开启离线批量ASR": "Ativar ASR offline em lote",
"开启语义token提取": "Ativar extração de token semântico", "开启语义token提取": "Ativar extração de token semântico",
"开启语音切割": "Ativar corte de voz", "开启语音切割": "Ativar corte de voz",

View File

@ -62,7 +62,7 @@ class BsRoformer_Loader:
length_init = mix.shape[-1] length_init = mix.shape[-1]
progress_bar = tqdm(total=length_init // step + 1) progress_bar = tqdm(total=length_init // step + 1)
progress_bar.set_description("Processing") progress_bGPT_SoVITS.AR.set_description("Processing")
# Do pad from the beginning and end to account floating window results better # Do pad from the beginning and end to account floating window results better
if length_init > 2 * border and (border > 0): if length_init > 2 * border and (border > 0):
@ -102,7 +102,7 @@ class BsRoformer_Loader:
batch_data.append(part) batch_data.append(part)
batch_locations.append((i, length)) batch_locations.append((i, length))
i += step i += step
progress_bar.update(1) progress_bGPT_SoVITS.AR.update(1)
if len(batch_data) >= batch_size or (i >= mix.shape[1]): if len(batch_data) >= batch_size or (i >= mix.shape[1]):
arr = torch.stack(batch_data, dim=0) arr = torch.stack(batch_data, dim=0)
@ -131,7 +131,7 @@ class BsRoformer_Loader:
# Remove pad # Remove pad
estimated_sources = estimated_sources[..., border:-border] estimated_sources = estimated_sources[..., border:-border]
progress_bar.close() progress_bGPT_SoVITS.AR.close()
return {k: v for k, v in zip(['vocals', 'other'], estimated_sources)} return {k: v for k, v in zip(['vocals', 'other'], estimated_sources)}

View File

@ -143,7 +143,7 @@ class Predictor:
def demix_base(self, mixes, margin_size): def demix_base(self, mixes, margin_size):
chunked_sources = [] chunked_sources = []
progress_bar = tqdm(total=len(mixes)) progress_bar = tqdm(total=len(mixes))
progress_bar.set_description("Processing") progress_bGPT_SoVITS.AR.set_description("Processing")
for mix in mixes: for mix in mixes:
cmix = mixes[mix] cmix = mixes[mix]
sources = [] sources = []
@ -188,12 +188,12 @@ class Predictor:
end = None end = None
sources.append(tar_signal[:, start:end]) sources.append(tar_signal[:, start:end])
progress_bar.update(1) progress_bGPT_SoVITS.AR.update(1)
chunked_sources.append(sources) chunked_sources.append(sources)
_sources = np.concatenate(chunked_sources, axis=-1) _sources = np.concatenate(chunked_sources, axis=-1)
# del self.model # del self.model
progress_bar.close() progress_bGPT_SoVITS.AR.close()
return _sources return _sources
def prediction(self, m, vocal_root, others_root, format): def prediction(self, m, vocal_root, others_root, format):

2
api.py
View File

@ -163,7 +163,7 @@ import numpy as np
from feature_extractor import cnhubert from feature_extractor import cnhubert
from io import BytesIO from io import BytesIO
from module.models import SynthesizerTrn from module.models import SynthesizerTrn
from AR.models.t2s_lightning_module import Text2SemanticLightningModule from GPT_SoVITS.AR.models.t2s_lightning_module import Text2SemanticLightningModule
from text import cleaned_text_to_sequence from text import cleaned_text_to_sequence
from text.cleaner import clean_text from text.cleaner import clean_text
from module.mel_processing import spectrogram_torch from module.mel_processing import spectrogram_torch