ruff check --fix

This commit is contained in:
XXXXRT666 2025-04-01 06:33:22 +01:00
parent 28bdff356f
commit a893a4e283
66 changed files with 149 additions and 213 deletions

View File

@ -1,19 +1,17 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/dataset.py
# reference: https://github.com/lifeiteng/vall-e
import pdb
import sys
# sys.path.append("/data/docker/liujing04/gpt-vits/mq-vits-s1bert_no_bert")
import traceback, os
import traceback
import os
from typing import Dict
from typing import List
import numpy as np
import pandas as pd
import torch, json
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from transformers import AutoTokenizer
version = os.environ.get('version',None)

View File

@ -1,6 +1,7 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
# reference: https://github.com/lifeiteng/vall-e
import os, sys
import os
import sys
now_dir = os.getcwd()
sys.path.append(now_dir)

View File

@ -1,6 +1,7 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
# reference: https://github.com/lifeiteng/vall-e
import os, sys
import os
import sys
now_dir = os.getcwd()
sys.path.append(now_dir)

View File

@ -9,8 +9,6 @@ from AR.models.utils import make_pad_mask, make_pad_mask_left
from AR.models.utils import (
topk_sampling,
sample,
logits_to_probs,
multinomial_sample_one_no_sync,
dpo_loss,
make_reject_y,
get_batch_logps
@ -718,7 +716,7 @@ class Text2SemanticDecoder(nn.Module):
idx_list[batch_index] = idx
y_list[batch_index] = y[i, :-1]
if not (None in idx_list):
if None not in idx_list:
stop = True
if stop:

View File

@ -1,7 +1,6 @@
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
# reference: https://github.com/lifeiteng/vall-e
import torch
from tqdm import tqdm
from AR.modules.embedding_onnx import SinePositionalEmbedding
from AR.modules.embedding_onnx import TokenEmbedding

View File

@ -130,7 +130,7 @@ def topk_sampling(logits, top_k=10, top_p=1.0, temperature=1.0):
return token
from typing import Optional, Tuple
from typing import Optional
def multinomial_sample_one_no_sync(

View File

@ -11,7 +11,6 @@ from torch.nn.init import xavier_uniform_
from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
from torch.nn.parameter import Parameter
from torch.nn import functional as F
from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched

View File

@ -5,7 +5,6 @@ from torch.nn.functional import (
_none_or_dtype,
_in_projection_packed,
)
from torch.nn import functional as F
import torch
# Tensor = torch.Tensor
# from typing import Callable, List, Optional, Tuple, Union

View File

@ -1,9 +1,6 @@
from torch.nn.functional import *
from torch.nn.functional import (
_mha_shape_check,
_canonical_mask,
_none_or_dtype,
_in_projection_packed,
)
def multi_head_attention_forward_patched(

View File

@ -13,12 +13,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import math
import random
from typing import Optional
from typing import Tuple
from typing import Union
import torch
import torch.nn as nn

View File

@ -451,13 +451,13 @@ class BigVGAN(
# instantiate BigVGAN using h
if use_cuda_kernel:
print(
f"[WARNING] You have specified use_cuda_kernel=True during BigVGAN.from_pretrained(). Only inference is supported (training is not implemented)!"
"[WARNING] You have specified use_cuda_kernel=True during BigVGAN.from_pretrained(). Only inference is supported (training is not implemented)!"
)
print(
f"[WARNING] You need nvcc and ninja installed in your system that matches your PyTorch build is using to build the kernel. If not, the model will fail to initialize or generate incorrect waveform!"
"[WARNING] You need nvcc and ninja installed in your system that matches your PyTorch build is using to build the kernel. If not, the model will fail to initialize or generate incorrect waveform!"
)
print(
f"[WARNING] For detail, see the official GitHub repository: https://github.com/NVIDIA/BigVGAN?tab=readme-ov-file#using-custom-cuda-kernel-for-synthesis"
"[WARNING] For detail, see the official GitHub repository: https://github.com/NVIDIA/BigVGAN?tab=readme-ov-file#using-custom-cuda-kernel-for-synthesis"
)
model = cls(h, use_cuda_kernel=use_cuda_kernel)
@ -485,7 +485,7 @@ class BigVGAN(
model.load_state_dict(checkpoint_dict["generator"])
except RuntimeError:
print(
f"[INFO] the pretrained checkpoint does not contain weight norm. Loading the checkpoint after removing weight norm!"
"[INFO] the pretrained checkpoint does not contain weight norm. Loading the checkpoint after removing weight norm!"
)
model.remove_weight_norm()
model.load_state_dict(checkpoint_dict["generator"])

View File

@ -15,7 +15,7 @@ from torchaudio.transforms import Spectrogram, Resample
from env import AttrDict
from utils import get_padding
import typing
from typing import Optional, List, Union, Dict, Tuple
from typing import List, Tuple
class DiscriminatorP(torch.nn.Module):
@ -508,7 +508,7 @@ class DiscriminatorCQT(nn.Module):
self.cqtd_normalize_volume = self.cfg.get("cqtd_normalize_volume", False)
if self.cqtd_normalize_volume:
print(
f"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
)
def get_2d_padding(

View File

@ -6,13 +6,12 @@
import torch
import torch.nn.functional as F
import torch.nn as nn
from librosa.filters import mel as librosa_mel_fn
from scipy import signal
import typing
from typing import Optional, List, Union, Dict, Tuple
from typing import List, Tuple
from collections import namedtuple
import math
import functools

View File

@ -328,7 +328,7 @@ def train(rank, a, h):
# PESQ calculation. only evaluate PESQ if it's speech signal (nonspeech PESQ will error out)
if (
not "nonspeech" in mode
"nonspeech" not in mode
): # Skips if the name of dataset (in mode string) contains "nonspeech"
# Resample to 16000 for pesq

View File

@ -1,6 +1,8 @@
from copy import deepcopy
import math
import os, sys, gc
import os
import sys
import gc
import random
import traceback
import time
@ -10,7 +12,7 @@ now_dir = os.getcwd()
sys.path.append(now_dir)
import ffmpeg
import os
from typing import Generator, List, Tuple, Union
from typing import List, Tuple, Union
import numpy as np
import torch
import torch.nn.functional as F
@ -22,14 +24,13 @@ from feature_extractor.cnhubert import CNHubert
from module.models import SynthesizerTrn, SynthesizerTrnV3
from peft import LoraConfig, get_peft_model
import librosa
from time import time as ttime
from tools.i18n.i18n import I18nAuto, scan_language_list
from tools.my_utils import load_audio
from module.mel_processing import spectrogram_torch
from TTS_infer_pack.text_segmentation_method import splits
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
from BigVGAN.bigvgan import BigVGAN
from module.mel_processing import spectrogram_torch,mel_spectrogram_torch
from module.mel_processing import mel_spectrogram_torch
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
@ -250,7 +251,7 @@ class TTS_Config:
self.device = self.configs.get("device", torch.device("cpu"))
if "cuda" in str(self.device) and not torch.cuda.is_available():
print(f"Warning: CUDA is not available, set device to CPU.")
print("Warning: CUDA is not available, set device to CPU.")
self.device = torch.device("cpu")
self.is_half = self.configs.get("is_half", False)

View File

@ -1,5 +1,6 @@
import os, sys
import os
import sys
import threading
from tqdm import tqdm

View File

@ -1,4 +1,5 @@
import os, sys
import os
import sys
now_dir = os.getcwd()
sys.path.insert(0, now_dir)
from text.g2pw import G2PWPinyin

View File

@ -3,7 +3,6 @@
import argparse
from typing import Optional
from my_utils import load_audio
from text import cleaned_text_to_sequence
import torch
import torchaudio
@ -813,11 +812,11 @@ import json
def export_symbel(version='v2'):
if version=='v1':
symbols = text._symbol_to_id_v1
with open(f"onnx/symbols_v1.json", "w") as file:
with open("onnx/symbols_v1.json", "w") as file:
json.dump(symbols, file, indent=4)
else:
symbols = text._symbol_to_id_v2
with open(f"onnx/symbols_v2.json", "w") as file:
with open("onnx/symbols_v2.json", "w") as file:
json.dump(symbols, file, indent=4)
def main():

View File

@ -6,16 +6,16 @@ from export_torch_script import (
spectrogram_torch,
)
from f5_tts.model.backbones.dit import DiT
from feature_extractor import cnhubert
from inference_webui import get_phones_and_bert
import librosa
from module import commons
from module.mel_processing import mel_spectrogram_torch, spectral_normalize_torch
from module.mel_processing import mel_spectrogram_torch
from module.models_onnx import CFM, SynthesizerTrnV3
import numpy as np
import torch._dynamo.config
import torchaudio
import logging, uvicorn
import logging
import uvicorn
import torch
import soundfile
from librosa.filters import mel as librosa_mel_fn
@ -942,7 +942,7 @@ def test_():
cfm.eval()
logger.info(f"cfm ok")
logger.info("cfm ok")
dict_s1 = torch.load("GPT_SoVITS/pretrained_models/s1v3.ckpt")
# v2 的 gpt 也可以用
@ -957,7 +957,7 @@ def test_():
t2s_m = torch.jit.script(t2s_m)
t2s_m.eval()
# t2s_m.top_k = 15
logger.info(f"t2s_m ok")
logger.info("t2s_m ok")
vq_model: torch.jit.ScriptModule = torch.jit.load(
@ -967,7 +967,7 @@ def test_():
# vq_model = vq_model.half().to(device)
vq_model.eval()
# vq_model = sovits.vq_model
logger.info(f"vq_model ok")
logger.info("vq_model ok")
# gpt_sovits_v3_half = torch.jit.load("onnx/ad/gpt_sovits_v3_half.pt")
# gpt_sovits_v3_half = torch.jit.optimize_for_inference(gpt_sovits_v3_half)
@ -975,7 +975,7 @@ def test_():
# gpt_sovits_v3_half = gpt_sovits_v3_half.cuda()
# gpt_sovits_v3_half.eval()
gpt_sovits_v3_half = ExportGPTSovitsHalf(sovits.hps, t2s_m, vq_model)
logger.info(f"gpt_sovits_v3_half ok")
logger.info("gpt_sovits_v3_half ok")
# init_bigvgan()
# global bigvgan_model
@ -985,7 +985,7 @@ def test_():
bigvgan_model = bigvgan_model.cuda()
bigvgan_model.eval()
logger.info(f"bigvgan ok")
logger.info("bigvgan ok")
gpt_sovits_v3 = GPTSoVITSV3(gpt_sovits_v3_half, cfm, bigvgan_model)
gpt_sovits_v3 = torch.jit.script(gpt_sovits_v3)

View File

@ -11,7 +11,6 @@ from __future__ import annotations
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.checkpoint import checkpoint
from x_transformers.x_transformers import RotaryEmbedding

View File

@ -1,9 +1,5 @@
import time
import librosa
import torch
import torch.nn.functional as F
import soundfile as sf
import os
from transformers import logging as tf_logging
tf_logging.set_verbosity_error()

View File

@ -7,7 +7,9 @@
全部按日文识别
'''
import logging
import traceback,torchaudio,warnings
import traceback
import torchaudio
import warnings
logging.getLogger("markdown_it").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("httpcore").setLevel(logging.ERROR)
@ -18,8 +20,10 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
warnings.simplefilter(action='ignore', category=FutureWarning)
import os, re, sys, json
import pdb
import os
import re
import sys
import json
import torch
from text.LangSegmenter import LangSegmenter
@ -42,12 +46,12 @@ for i in range(3):
pretrained_gpt_name,pretrained_sovits_name = _
if os.path.exists(f"./weight.json"):
if os.path.exists("./weight.json"):
pass
else:
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
with open("./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
with open(f"./weight.json", 'r', encoding="utf-8") as file:
with open("./weight.json", 'r', encoding="utf-8") as file:
weight_data = file.read()
weight_data=json.loads(weight_data)
gpt_path = os.environ.get(
@ -87,7 +91,6 @@ from feature_extractor import cnhubert
cnhubert.cnhubert_base_path = cnhubert_base_path
from GPT_SoVITS.module.models import SynthesizerTrn,SynthesizerTrnV3
import numpy as np
import random
def set_seed(seed):
if seed == -1:
@ -104,9 +107,8 @@ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
from text import cleaned_text_to_sequence
from text.cleaner import clean_text
from time import time as ttime
from tools.my_utils import load_audio
from tools.i18n.i18n import I18nAuto, scan_language_list
from peft import LoraConfig, PeftModel, get_peft_model
from peft import LoraConfig, get_peft_model
language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
@ -327,9 +329,8 @@ def change_gpt_weights(gpt_path):
change_gpt_weights(gpt_path)
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
import torch,soundfile
import torch
now_dir = os.getcwd()
import soundfile
def init_bigvgan():
global bigvgan_model

View File

@ -7,7 +7,10 @@
全部按日文识别
'''
import random
import os, re, logging, json
import os
import re
import logging
import json
import sys
now_dir = os.getcwd()
sys.path.append(now_dir)
@ -20,7 +23,6 @@ logging.getLogger("httpx").setLevel(logging.ERROR)
logging.getLogger("asyncio").setLevel(logging.ERROR)
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
import pdb
import torch
try:
@ -182,12 +184,12 @@ for i in range(3):
pretrained_gpt_name,pretrained_sovits_name = _
if os.path.exists(f"./weight.json"):
if os.path.exists("./weight.json"):
pass
else:
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
with open("./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
with open(f"./weight.json", 'r', encoding="utf-8") as file:
with open("./weight.json", 'r', encoding="utf-8") as file:
weight_data = file.read()
weight_data=json.loads(weight_data)
gpt_path = os.environ.get(

View File

@ -1,22 +1,13 @@
import time
import logging
import os
import random
import traceback
import numpy as np
import torch
import torch.utils.data
from tqdm import tqdm
from module import commons
from module.mel_processing import spectrogram_torch,spec_to_mel_torch
from text import cleaned_text_to_sequence
from utils import load_wav_to_torch, load_filepaths_and_text
import torch.nn.functional as F
from functools import lru_cache
import requests
from scipy.io import wavfile
from io import BytesIO
from tools.my_utils import load_audio
version = os.environ.get('version',None)
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)

View File

@ -1,7 +1,6 @@
import math
import torch
from torch.nn import functional as F
def feature_loss(fmap_r, fmap_g):

View File

@ -1,16 +1,5 @@
import math
import os
import random
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.data
import numpy as np
import librosa
import librosa.util as librosa_util
from librosa.util import normalize, pad_center, tiny
from scipy.signal import get_window
from scipy.io.wavfile import read
from librosa.filters import mel as librosa_mel_fn
MAX_WAV_VALUE = 32768.0

View File

@ -1,9 +1,6 @@
import warnings
warnings.filterwarnings("ignore")
import copy
import math
import os
import pdb
import torch
from torch import nn
@ -13,7 +10,7 @@ from module import commons
from module import modules
from module import attentions
from f5_tts.model import DiT
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from module.commons import init_weights, get_padding
from module.mrte_model import MRTE
@ -22,7 +19,8 @@ from module.quantize import ResidualVectorQuantizer
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from torch.cuda.amp import autocast
import contextlib,random
import contextlib
import random
class StochasticDurationPredictor(nn.Module):

View File

@ -1,4 +1,3 @@
import copy
import math
from typing import Optional
import torch
@ -11,14 +10,13 @@ from module import attentions_onnx as attentions
from f5_tts.model import DiT
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from module.commons import init_weights, get_padding
from module.quantize import ResidualVectorQuantizer
# from text import symbols
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from torch.cuda.amp import autocast
class StochasticDurationPredictor(nn.Module):

View File

@ -7,7 +7,6 @@
"""Residual vector quantizer implementation."""
from dataclasses import dataclass, field
import math
import typing as tp
import torch

View File

@ -10,7 +10,6 @@ cnhubert.cnhubert_base_path = cnhubert_base_path
ssl_model = cnhubert.get_model()
from text import cleaned_text_to_sequence
import soundfile
from tools.my_utils import load_audio
import os
import json

View File

@ -14,13 +14,10 @@ bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
version = os.environ.get('version', None)
import sys, numpy as np, traceback, pdb
import traceback
import os.path
from glob import glob
from tqdm import tqdm
from text.cleaner import clean_text
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
from tools.my_utils import clean_path
# inp_text=sys.argv[1]

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import sys,os
import sys
import os
inp_text= os.environ.get("inp_text")
inp_wav_dir= os.environ.get("inp_wav_dir")
exp_name= os.environ.get("exp_name")
@ -14,7 +15,8 @@ cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir")
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import pdb,traceback,numpy as np,logging
import traceback
import numpy as np
from scipy.io import wavfile
import librosa
now_dir = os.getcwd()

View File

@ -26,17 +26,13 @@ else:
version = "v3"
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import math, traceback
import multiprocessing
import sys, pdb
import traceback
import sys
now_dir = os.getcwd()
sys.path.append(now_dir)
from random import shuffle
import torch.multiprocessing as mp
from glob import glob
from tqdm import tqdm
import logging, librosa, utils
import logging
import utils
if version!="v3":
from module.models import SynthesizerTrn
else:

View File

@ -1,7 +1,8 @@
import traceback
from collections import OrderedDict
from time import time as ttime
import shutil,os
import shutil
import os
import torch
from tools.i18n.i18n import I18nAuto

View File

@ -1,6 +1,5 @@
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py
import os
import pdb
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
@ -8,7 +7,8 @@ import argparse
import logging
from pathlib import Path
import torch, platform
import torch
import platform
from pytorch_lightning import seed_everything
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
@ -24,8 +24,6 @@ torch.set_float32_matmul_precision("high")
from AR.utils import get_newest_ckpt
from collections import OrderedDict
from time import time as ttime
import shutil
from process_ckpt import my_save

View File

@ -1,6 +1,7 @@
import warnings
warnings.filterwarnings("ignore")
import utils, os
import utils
import os
hps = utils.get_hparams(stage=2)
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
import torch
@ -8,11 +9,11 @@ from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.multiprocessing as mp
import torch.distributed as dist, traceback
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import logging, traceback
import logging
logging.getLogger("matplotlib").setLevel(logging.INFO)
logging.getLogger("h5py").setLevel(logging.INFO)

View File

@ -1,18 +1,18 @@
import warnings
warnings.filterwarnings("ignore")
import utils, os
import utils
import os
hps = utils.get_hparams(stage=2)
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.multiprocessing as mp
import torch.distributed as dist, traceback
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import logging, traceback
import logging
logging.getLogger("matplotlib").setLevel(logging.INFO)
logging.getLogger("h5py").setLevel(logging.INFO)
@ -27,10 +27,7 @@ from module.data_utils import (
)
from module.models import (
SynthesizerTrnV3 as SynthesizerTrn,
MultiPeriodDiscriminator,
)
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from process_ckpt import savee
torch.backends.cudnn.benchmark = False

View File

@ -1,25 +1,25 @@
import warnings
warnings.filterwarnings("ignore")
import utils, os
import utils
import os
hps = utils.get_hparams(stage=2)
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.multiprocessing as mp
import torch.distributed as dist, traceback
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import logging, traceback
import logging
logging.getLogger("matplotlib").setLevel(logging.INFO)
logging.getLogger("h5py").setLevel(logging.INFO)
logging.getLogger("numba").setLevel(logging.INFO)
from random import randint
from module import commons
from peft import LoraConfig, PeftModel, get_peft_model
from peft import LoraConfig, get_peft_model
from module.data_utils import (
TextAudioSpeakerLoaderV3 as TextAudioSpeakerLoader,
TextAudioSpeakerCollateV3 as TextAudioSpeakerCollate,
@ -27,10 +27,7 @@ from module.data_utils import (
)
from module.models import (
SynthesizerTrnV3 as SynthesizerTrn,
MultiPeriodDiscriminator,
)
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from process_ckpt import savee
from collections import OrderedDict as od
torch.backends.cudnn.benchmark = False

View File

@ -1,6 +1,5 @@
# reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py
import sys
import re
import cn2an
import ToJyutping

View File

@ -1,5 +1,4 @@
import os
import pdb
import re
import cn2an
@ -17,7 +16,8 @@ pinyin_to_symbol_map = {
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
}
import jieba_fast, logging
import jieba_fast
import logging
jieba_fast.setLogLevel(logging.CRITICAL)
import jieba_fast.posseg as psg

View File

@ -1,10 +1,9 @@
import os
import pdb
import re
import cn2an
from pypinyin import lazy_pinyin, Style
from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
from pypinyin.contrib.tone_convert import to_finals_tone3, to_initials
from text.symbols import punctuation
from text.tone_sandhi import ToneSandhi
@ -18,7 +17,8 @@ pinyin_to_symbol_map = {
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
}
import jieba_fast, logging
import jieba_fast
import logging
jieba_fast.setLogLevel(logging.CRITICAL)
import jieba_fast.posseg as psg

View File

@ -8,7 +8,6 @@ from text.symbols import punctuation
from text.symbols2 import symbols
import unicodedata
from builtins import str as unicode
from text.en_normalization.expend import normalize
from nltk.tokenize import TweetTokenizer

View File

@ -5,7 +5,8 @@ import warnings
warnings.filterwarnings("ignore")
import json
import os
import zipfile,requests
import zipfile
import requests
from typing import Any
from typing import Dict
from typing import List

View File

@ -58,7 +58,7 @@ try:
if os.path.exists(USERDIC_BIN_PATH):
pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
except Exception as e:
except Exception:
# print(e)
import pyopenjtalk
# failed to load user dictionary, ignore.

View File

@ -16,7 +16,7 @@ if os.name == 'nt':
spam_spec = importlib.util.find_spec("eunjeon")
non_found = spam_spec is None
if non_found:
print(f'you have to install eunjeon. install it...')
print('you have to install eunjeon. install it...')
else:
installpath = spam_spec.submodule_search_locations[0]
if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)):

View File

@ -1,4 +1,3 @@
import os
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
punctuation = ["!", "?", "", ",", "."] # @是SP停顿

View File

@ -1,4 +1,3 @@
import os
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
punctuation = ["!", "?", "", ",", "."] # @是SP停顿

View File

@ -9,9 +9,7 @@ import traceback
import librosa
import numpy as np
from scipy.io.wavfile import read
import torch
import logging
logging.getLogger("numba").setLevel(logging.ERROR)
logging.getLogger("matplotlib").setLevel(logging.ERROR)
@ -132,7 +130,6 @@ def plot_spectrogram_to_numpy(spectrogram):
mpl_logger = logging.getLogger("matplotlib")
mpl_logger.setLevel(logging.WARNING)
import matplotlib.pylab as plt
import numpy as np
fig, ax = plt.subplots(figsize=(10, 2))
im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
@ -158,7 +155,6 @@ def plot_alignment_to_numpy(alignment, info=None):
mpl_logger = logging.getLogger("matplotlib")
mpl_logger.setLevel(logging.WARNING)
import matplotlib.pylab as plt
import numpy as np
fig, ax = plt.subplots(figsize=(6, 4))
im = ax.imshow(

19
api.py
View File

@ -142,7 +142,8 @@ RESP: 无
import argparse
import os,re
import os
import re
import sys
now_dir = os.getcwd()
@ -152,10 +153,11 @@ sys.path.append("%s/GPT_SoVITS" % (now_dir))
import signal
from text.LangSegmenter import LangSegmenter
from time import time as ttime
import torch, torchaudio
import torch
import torchaudio
import librosa
import soundfile as sf
from fastapi import FastAPI, Request, Query, HTTPException
from fastapi import FastAPI, Request, Query
from fastapi.responses import StreamingResponse, JSONResponse
import uvicorn
from transformers import AutoModelForMaskedLM, AutoTokenizer
@ -163,12 +165,11 @@ import numpy as np
from feature_extractor import cnhubert
from io import BytesIO
from module.models import SynthesizerTrn, SynthesizerTrnV3
from peft import LoraConfig, PeftModel, get_peft_model
from peft import LoraConfig, get_peft_model
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
from text import cleaned_text_to_sequence
from text.cleaner import clean_text
from module.mel_processing import spectrogram_torch
from tools.my_utils import load_audio
import config as global_config
import logging
import subprocess
@ -221,7 +222,7 @@ def resample(audio_tensor, sr0):
return resample_transform_dict[sr0](audio_tensor)
from module.mel_processing import spectrogram_torch,mel_spectrogram_torch
from module.mel_processing import mel_spectrogram_torch
spec_min = -12
spec_max = 2
def norm_spec(x):
@ -860,7 +861,7 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cu
if not default_refer.is_ready():
return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
if not sample_steps in [4,8,16,32]:
if sample_steps not in [4,8,16,32]:
sample_steps = 32
if cut_punc == None:
@ -990,10 +991,10 @@ logger.info(f"编码格式: {media_type}")
# 音频数据类型
if args.sub_type.lower() == 'int32':
is_int32 = True
logger.info(f"数据类型: int32")
logger.info("数据类型: int32")
else:
is_int32 = False
logger.info(f"数据类型: int16")
logger.info("数据类型: int16")
# 初始化模型
cnhubert.cnhubert_base_path = cnhubert_base_path

View File

@ -112,15 +112,13 @@ import wave
import signal
import numpy as np
import soundfile as sf
from fastapi import FastAPI, Request, HTTPException, Response
from fastapi import FastAPI, Response
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi import FastAPI, UploadFile, File
import uvicorn
from io import BytesIO
from tools.i18n.i18n import I18nAuto
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import get_method_names as get_cut_method_names
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
# print(sys.path)
i18n = I18nAuto()
@ -337,7 +335,7 @@ async def tts_handle(req:dict):
audio_data = pack_audio(BytesIO(), audio_data, sr, media_type).getvalue()
return Response(audio_data, media_type=f"audio/{media_type}")
except Exception as e:
return JSONResponse(status_code=400, content={"message": f"tts failed", "Exception": str(e)})
return JSONResponse(status_code=400, content={"message": "tts failed", "Exception": str(e)})
@ -415,7 +413,7 @@ async def set_refer_aduio(refer_audio_path: str = None):
try:
tts_pipeline.set_ref_audio(refer_audio_path)
except Exception as e:
return JSONResponse(status_code=400, content={"message": f"set refer audio failed", "Exception": str(e)})
return JSONResponse(status_code=400, content={"message": "set refer audio failed", "Exception": str(e)})
return JSONResponse(status_code=200, content={"message": "success"})
@ -444,7 +442,7 @@ async def set_gpt_weights(weights_path: str = None):
return JSONResponse(status_code=400, content={"message": "gpt weight path is required"})
tts_pipeline.init_t2s_weights(weights_path)
except Exception as e:
return JSONResponse(status_code=400, content={"message": f"change gpt weight failed", "Exception": str(e)})
return JSONResponse(status_code=400, content={"message": "change gpt weight failed", "Exception": str(e)})
return JSONResponse(status_code=200, content={"message": "success"})
@ -456,7 +454,7 @@ async def set_sovits_weights(weights_path: str = None):
return JSONResponse(status_code=400, content={"message": "sovits weight path is required"})
tts_pipeline.init_vits_weights(weights_path)
except Exception as e:
return JSONResponse(status_code=400, content={"message": f"change sovits weight failed", "Exception": str(e)})
return JSONResponse(status_code=400, content={"message": "change sovits weight failed", "Exception": str(e)})
return JSONResponse(status_code=200, content={"message": "success"})
@ -466,7 +464,7 @@ if __name__ == "__main__":
if host == 'None': # 在调用时使用 -a None 参数可以让api监听双栈
host = None
uvicorn.run(app=APP, host=host, port=port, workers=1)
except Exception as e:
except Exception:
traceback.print_exc()
os.kill(os.getpid(), signal.SIGTERM)
exit(0)

View File

@ -1,4 +1,5 @@
import sys,os
import sys
import os
import torch

View File

@ -143,11 +143,7 @@
"# 开启推理页面\n",
"%cd /kaggle/working/GPT-SoVITS/\n",
"!npm install -g localtunnel\n",
"import subprocess\n",
"import threading\n",
"import time\n",
"import socket\n",
"import urllib.request\n",
"def iframe_thread(port):\n",
" while True:\n",
" time.sleep(0.5)\n",

View File

@ -1,7 +1,7 @@
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from torch.nn.utils import weight_norm, spectral_norm
# from utils import init_weights, get_padding
def get_padding(kernel_size, dilation=1):
return int((kernel_size*dilation - dilation)/2)

View File

@ -1,24 +1,15 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import sys,os
import traceback
import sys
import os
AP_BWE_main_dir_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'AP_BWE_main')
sys.path.append(AP_BWE_main_dir_path)
import glob
import argparse
import json
from re import S
import torch
import numpy as np
import torchaudio
import time
import torchaudio.functional as aF
# from attrdict import AttrDict####will be bug in py3.10
from datasets1.dataset import amp_pha_stft, amp_pha_istft
from models.model import APNet_BWE_Model
import soundfile as sf
import matplotlib.pyplot as plt
from rich.progress import track
class AP_BWE():
def __init__(self,device,DictToAttrRecursive,checkpoint_file=None):

View File

@ -1,4 +1,5 @@
import os,argparse
import os
import argparse
import traceback
from modelscope.pipelines import pipeline

View File

@ -116,7 +116,7 @@ def update_i18n_json(json_file, standard_keys):
if num_miss_translation > 0:
print(f"\033[31m{'[Failed] Missing Translation'.ljust(KEY_LEN)}: {num_miss_translation}\033[0m")
else:
print(f"\033[32m[Passed] All Keys Translated\033[0m")
print("\033[32m[Passed] All Keys Translated\033[0m")
# 将处理后的结果写入 JSON 文件
with open(json_file, "w", encoding="utf-8") as f:
json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=SORT_KEYS)

View File

@ -1,4 +1,5 @@
import platform,os,traceback
import os
import traceback
import ffmpeg
import numpy as np
import gradio as gr
@ -21,7 +22,7 @@ def load_audio(file, sr):
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
except Exception as e:
except Exception:
traceback.print_exc()
raise RuntimeError(i18n("音频加载失败"))

View File

@ -1,4 +1,6 @@
import os,sys,numpy as np
import os
import sys
import numpy as np
import traceback
from scipy.io import wavfile
# parent_directory = os.path.dirname(os.path.abspath(__file__))

View File

@ -249,7 +249,7 @@ def main():
soundfile.write(
os.path.join(
out,
f"%s_%d.wav"
"%s_%d.wav"
% (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
),
chunk,

View File

@ -1,7 +1,7 @@
import argparse,os
import argparse
import os
import copy
import json
import os
import uuid
try:
@ -70,7 +70,7 @@ def b_change_index(index, batch):
# )
{
"__type__": "update",
"label": f"Text",
"label": "Text",
"value": ""
}
)

View File

@ -54,7 +54,7 @@ class Attend(nn.Module):
# similarity
sim = einsum(f"b h i d, b h j d -> b h i j", q, k) * scale
sim = einsum("b h i d, b h j d -> b h i j", q, k) * scale
# attention
@ -63,6 +63,6 @@ class Attend(nn.Module):
# aggregate values
out = einsum(f"b h i j, b h j d -> b h i d", attn, v)
out = einsum("b h i j, b h j d -> b h i d", attn, v)
return out

View File

@ -1,14 +1,14 @@
from functools import partial
import torch
from torch import nn, einsum, Tensor
from torch import nn
from torch.nn import Module, ModuleList
import torch.nn.functional as F
from bs_roformer.attend import Attend
from torch.utils.checkpoint import checkpoint
from typing import Tuple, Optional, List, Callable
from typing import Tuple, Optional, Callable
# from beartype.typing import Tuple, Optional, List, Callable
# from beartype import beartype

View File

@ -1,14 +1,14 @@
from functools import partial
import torch
from torch import nn, einsum, Tensor
from torch import nn
from torch.nn import Module, ModuleList
import torch.nn.functional as F
from bs_roformer.attend import Attend
from torch.utils.checkpoint import checkpoint
from typing import Tuple, Optional, List, Callable
from typing import Tuple, Optional, Callable
# from beartype.typing import Tuple, Optional, List, Callable
# from beartype import beartype

View File

@ -1,6 +1,6 @@
import os,sys
import os
parent_directory = os.path.dirname(os.path.abspath(__file__))
import logging,pdb
import logging
logger = logging.getLogger(__name__)
import librosa

View File

@ -1,13 +1,13 @@
import os
import traceback,gradio as gr
import traceback
import gradio as gr
import logging
from tools.i18n.i18n import I18nAuto
from tools.my_utils import clean_path
i18n = I18nAuto()
logger = logging.getLogger(__name__)
import librosa,ffmpeg
import soundfile as sf
import ffmpeg
import torch
import sys
from mdxnet import MDXNetDereverb

View File

@ -1,4 +1,5 @@
import os,sys
import os
import sys
if len(sys.argv)==1:sys.argv.append('v2')
version="v1"if sys.argv[1]=="v1" else"v2"
os.environ["version"]=version
@ -6,7 +7,11 @@ now_dir = os.getcwd()
sys.path.insert(0, now_dir)
import warnings
warnings.filterwarnings("ignore")
import json,yaml,torch,pdb,re,shutil
import json
import yaml
import torch
import re
import shutil
import platform
import psutil
import signal
@ -45,21 +50,18 @@ for site_packages_root in site_packages_roots:
% (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir)
)
break
except PermissionError as e:
except PermissionError:
traceback.print_exc()
from tools import my_utils
import shutil
import pdb
import subprocess
from subprocess import Popen
import signal
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
from tools.i18n.i18n import I18nAuto, scan_language_list
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
os.environ["language"]=language
i18n = I18nAuto(language=language)
from scipy.io import wavfile
from tools.my_utils import load_audio, check_for_existance, check_details
from tools.my_utils import check_for_existance, check_details
from multiprocessing import cpu_count
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
try: