mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 04:22:46 +08:00
ruff check --fix
This commit is contained in:
parent
28bdff356f
commit
a893a4e283
@ -1,19 +1,17 @@
|
||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/dataset.py
|
||||
# reference: https://github.com/lifeiteng/vall-e
|
||||
import pdb
|
||||
import sys
|
||||
|
||||
# sys.path.append("/data/docker/liujing04/gpt-vits/mq-vits-s1bert_no_bert")
|
||||
import traceback, os
|
||||
import traceback
|
||||
import os
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch, json
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data import Dataset
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
version = os.environ.get('version',None)
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
||||
# reference: https://github.com/lifeiteng/vall-e
|
||||
import os, sys
|
||||
import os
|
||||
import sys
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
|
@ -1,6 +1,7 @@
|
||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
||||
# reference: https://github.com/lifeiteng/vall-e
|
||||
import os, sys
|
||||
import os
|
||||
import sys
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
|
@ -9,8 +9,6 @@ from AR.models.utils import make_pad_mask, make_pad_mask_left
|
||||
from AR.models.utils import (
|
||||
topk_sampling,
|
||||
sample,
|
||||
logits_to_probs,
|
||||
multinomial_sample_one_no_sync,
|
||||
dpo_loss,
|
||||
make_reject_y,
|
||||
get_batch_logps
|
||||
@ -718,7 +716,7 @@ class Text2SemanticDecoder(nn.Module):
|
||||
idx_list[batch_index] = idx
|
||||
y_list[batch_index] = y[i, :-1]
|
||||
|
||||
if not (None in idx_list):
|
||||
if None not in idx_list:
|
||||
stop = True
|
||||
|
||||
if stop:
|
||||
|
@ -1,7 +1,6 @@
|
||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
|
||||
# reference: https://github.com/lifeiteng/vall-e
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from AR.modules.embedding_onnx import SinePositionalEmbedding
|
||||
from AR.modules.embedding_onnx import TokenEmbedding
|
||||
|
@ -130,7 +130,7 @@ def topk_sampling(logits, top_k=10, top_p=1.0, temperature=1.0):
|
||||
return token
|
||||
|
||||
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def multinomial_sample_one_no_sync(
|
||||
|
@ -11,7 +11,6 @@ from torch.nn.init import xavier_uniform_
|
||||
from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from torch.nn import functional as F
|
||||
from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched
|
||||
|
||||
|
||||
|
@ -5,7 +5,6 @@ from torch.nn.functional import (
|
||||
_none_or_dtype,
|
||||
_in_projection_packed,
|
||||
)
|
||||
from torch.nn import functional as F
|
||||
import torch
|
||||
# Tensor = torch.Tensor
|
||||
# from typing import Callable, List, Optional, Tuple, Union
|
||||
|
@ -1,9 +1,6 @@
|
||||
from torch.nn.functional import *
|
||||
from torch.nn.functional import (
|
||||
_mha_shape_check,
|
||||
_canonical_mask,
|
||||
_none_or_dtype,
|
||||
_in_projection_packed,
|
||||
)
|
||||
|
||||
def multi_head_attention_forward_patched(
|
||||
|
@ -13,12 +13,9 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import logging
|
||||
import math
|
||||
import random
|
||||
from typing import Optional
|
||||
from typing import Tuple
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
@ -451,13 +451,13 @@ class BigVGAN(
|
||||
# instantiate BigVGAN using h
|
||||
if use_cuda_kernel:
|
||||
print(
|
||||
f"[WARNING] You have specified use_cuda_kernel=True during BigVGAN.from_pretrained(). Only inference is supported (training is not implemented)!"
|
||||
"[WARNING] You have specified use_cuda_kernel=True during BigVGAN.from_pretrained(). Only inference is supported (training is not implemented)!"
|
||||
)
|
||||
print(
|
||||
f"[WARNING] You need nvcc and ninja installed in your system that matches your PyTorch build is using to build the kernel. If not, the model will fail to initialize or generate incorrect waveform!"
|
||||
"[WARNING] You need nvcc and ninja installed in your system that matches your PyTorch build is using to build the kernel. If not, the model will fail to initialize or generate incorrect waveform!"
|
||||
)
|
||||
print(
|
||||
f"[WARNING] For detail, see the official GitHub repository: https://github.com/NVIDIA/BigVGAN?tab=readme-ov-file#using-custom-cuda-kernel-for-synthesis"
|
||||
"[WARNING] For detail, see the official GitHub repository: https://github.com/NVIDIA/BigVGAN?tab=readme-ov-file#using-custom-cuda-kernel-for-synthesis"
|
||||
)
|
||||
model = cls(h, use_cuda_kernel=use_cuda_kernel)
|
||||
|
||||
@ -485,7 +485,7 @@ class BigVGAN(
|
||||
model.load_state_dict(checkpoint_dict["generator"])
|
||||
except RuntimeError:
|
||||
print(
|
||||
f"[INFO] the pretrained checkpoint does not contain weight norm. Loading the checkpoint after removing weight norm!"
|
||||
"[INFO] the pretrained checkpoint does not contain weight norm. Loading the checkpoint after removing weight norm!"
|
||||
)
|
||||
model.remove_weight_norm()
|
||||
model.load_state_dict(checkpoint_dict["generator"])
|
||||
|
@ -15,7 +15,7 @@ from torchaudio.transforms import Spectrogram, Resample
|
||||
from env import AttrDict
|
||||
from utils import get_padding
|
||||
import typing
|
||||
from typing import Optional, List, Union, Dict, Tuple
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
class DiscriminatorP(torch.nn.Module):
|
||||
@ -508,7 +508,7 @@ class DiscriminatorCQT(nn.Module):
|
||||
self.cqtd_normalize_volume = self.cfg.get("cqtd_normalize_volume", False)
|
||||
if self.cqtd_normalize_volume:
|
||||
print(
|
||||
f"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
|
||||
"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
|
||||
)
|
||||
|
||||
def get_2d_padding(
|
||||
|
@ -6,13 +6,12 @@
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
from scipy import signal
|
||||
|
||||
import typing
|
||||
from typing import Optional, List, Union, Dict, Tuple
|
||||
from typing import List, Tuple
|
||||
from collections import namedtuple
|
||||
import math
|
||||
import functools
|
||||
|
@ -328,7 +328,7 @@ def train(rank, a, h):
|
||||
|
||||
# PESQ calculation. only evaluate PESQ if it's speech signal (nonspeech PESQ will error out)
|
||||
if (
|
||||
not "nonspeech" in mode
|
||||
"nonspeech" not in mode
|
||||
): # Skips if the name of dataset (in mode string) contains "nonspeech"
|
||||
|
||||
# Resample to 16000 for pesq
|
||||
|
@ -1,6 +1,8 @@
|
||||
from copy import deepcopy
|
||||
import math
|
||||
import os, sys, gc
|
||||
import os
|
||||
import sys
|
||||
import gc
|
||||
import random
|
||||
import traceback
|
||||
import time
|
||||
@ -10,7 +12,7 @@ now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
import ffmpeg
|
||||
import os
|
||||
from typing import Generator, List, Tuple, Union
|
||||
from typing import List, Tuple, Union
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
@ -22,14 +24,13 @@ from feature_extractor.cnhubert import CNHubert
|
||||
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||
from peft import LoraConfig, get_peft_model
|
||||
import librosa
|
||||
from time import time as ttime
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from tools.my_utils import load_audio
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from TTS_infer_pack.text_segmentation_method import splits
|
||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||
from BigVGAN.bigvgan import BigVGAN
|
||||
from module.mel_processing import spectrogram_torch,mel_spectrogram_torch
|
||||
from module.mel_processing import mel_spectrogram_torch
|
||||
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||
language=os.environ.get("language","Auto")
|
||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
@ -250,7 +251,7 @@ class TTS_Config:
|
||||
|
||||
self.device = self.configs.get("device", torch.device("cpu"))
|
||||
if "cuda" in str(self.device) and not torch.cuda.is_available():
|
||||
print(f"Warning: CUDA is not available, set device to CPU.")
|
||||
print("Warning: CUDA is not available, set device to CPU.")
|
||||
self.device = torch.device("cpu")
|
||||
|
||||
self.is_half = self.configs.get("is_half", False)
|
||||
|
@ -1,5 +1,6 @@
|
||||
|
||||
import os, sys
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from tqdm import tqdm
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os, sys
|
||||
import os
|
||||
import sys
|
||||
now_dir = os.getcwd()
|
||||
sys.path.insert(0, now_dir)
|
||||
from text.g2pw import G2PWPinyin
|
||||
|
@ -3,7 +3,6 @@
|
||||
import argparse
|
||||
from typing import Optional
|
||||
from my_utils import load_audio
|
||||
from text import cleaned_text_to_sequence
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
@ -813,11 +812,11 @@ import json
|
||||
def export_symbel(version='v2'):
|
||||
if version=='v1':
|
||||
symbols = text._symbol_to_id_v1
|
||||
with open(f"onnx/symbols_v1.json", "w") as file:
|
||||
with open("onnx/symbols_v1.json", "w") as file:
|
||||
json.dump(symbols, file, indent=4)
|
||||
else:
|
||||
symbols = text._symbol_to_id_v2
|
||||
with open(f"onnx/symbols_v2.json", "w") as file:
|
||||
with open("onnx/symbols_v2.json", "w") as file:
|
||||
json.dump(symbols, file, indent=4)
|
||||
|
||||
def main():
|
||||
|
@ -6,16 +6,16 @@ from export_torch_script import (
|
||||
spectrogram_torch,
|
||||
)
|
||||
from f5_tts.model.backbones.dit import DiT
|
||||
from feature_extractor import cnhubert
|
||||
from inference_webui import get_phones_and_bert
|
||||
import librosa
|
||||
from module import commons
|
||||
from module.mel_processing import mel_spectrogram_torch, spectral_normalize_torch
|
||||
from module.mel_processing import mel_spectrogram_torch
|
||||
from module.models_onnx import CFM, SynthesizerTrnV3
|
||||
import numpy as np
|
||||
import torch._dynamo.config
|
||||
import torchaudio
|
||||
import logging, uvicorn
|
||||
import logging
|
||||
import uvicorn
|
||||
import torch
|
||||
import soundfile
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
@ -942,7 +942,7 @@ def test_():
|
||||
|
||||
cfm.eval()
|
||||
|
||||
logger.info(f"cfm ok")
|
||||
logger.info("cfm ok")
|
||||
|
||||
dict_s1 = torch.load("GPT_SoVITS/pretrained_models/s1v3.ckpt")
|
||||
# v2 的 gpt 也可以用
|
||||
@ -957,7 +957,7 @@ def test_():
|
||||
t2s_m = torch.jit.script(t2s_m)
|
||||
t2s_m.eval()
|
||||
# t2s_m.top_k = 15
|
||||
logger.info(f"t2s_m ok")
|
||||
logger.info("t2s_m ok")
|
||||
|
||||
|
||||
vq_model: torch.jit.ScriptModule = torch.jit.load(
|
||||
@ -967,7 +967,7 @@ def test_():
|
||||
# vq_model = vq_model.half().to(device)
|
||||
vq_model.eval()
|
||||
# vq_model = sovits.vq_model
|
||||
logger.info(f"vq_model ok")
|
||||
logger.info("vq_model ok")
|
||||
|
||||
# gpt_sovits_v3_half = torch.jit.load("onnx/ad/gpt_sovits_v3_half.pt")
|
||||
# gpt_sovits_v3_half = torch.jit.optimize_for_inference(gpt_sovits_v3_half)
|
||||
@ -975,7 +975,7 @@ def test_():
|
||||
# gpt_sovits_v3_half = gpt_sovits_v3_half.cuda()
|
||||
# gpt_sovits_v3_half.eval()
|
||||
gpt_sovits_v3_half = ExportGPTSovitsHalf(sovits.hps, t2s_m, vq_model)
|
||||
logger.info(f"gpt_sovits_v3_half ok")
|
||||
logger.info("gpt_sovits_v3_half ok")
|
||||
|
||||
# init_bigvgan()
|
||||
# global bigvgan_model
|
||||
@ -985,7 +985,7 @@ def test_():
|
||||
bigvgan_model = bigvgan_model.cuda()
|
||||
bigvgan_model.eval()
|
||||
|
||||
logger.info(f"bigvgan ok")
|
||||
logger.info("bigvgan ok")
|
||||
|
||||
gpt_sovits_v3 = GPTSoVITSV3(gpt_sovits_v3_half, cfm, bigvgan_model)
|
||||
gpt_sovits_v3 = torch.jit.script(gpt_sovits_v3)
|
||||
|
@ -11,7 +11,6 @@ from __future__ import annotations
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.checkpoint import checkpoint
|
||||
|
||||
from x_transformers.x_transformers import RotaryEmbedding
|
||||
|
@ -1,9 +1,5 @@
|
||||
import time
|
||||
|
||||
import librosa
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import soundfile as sf
|
||||
import os
|
||||
from transformers import logging as tf_logging
|
||||
tf_logging.set_verbosity_error()
|
||||
|
@ -7,7 +7,9 @@
|
||||
全部按日文识别
|
||||
'''
|
||||
import logging
|
||||
import traceback,torchaudio,warnings
|
||||
import traceback
|
||||
import torchaudio
|
||||
import warnings
|
||||
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||
@ -18,8 +20,10 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
|
||||
import os, re, sys, json
|
||||
import pdb
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import torch
|
||||
from text.LangSegmenter import LangSegmenter
|
||||
|
||||
@ -42,12 +46,12 @@ for i in range(3):
|
||||
pretrained_gpt_name,pretrained_sovits_name = _
|
||||
|
||||
|
||||
if os.path.exists(f"./weight.json"):
|
||||
if os.path.exists("./weight.json"):
|
||||
pass
|
||||
else:
|
||||
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
||||
with open("./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
||||
|
||||
with open(f"./weight.json", 'r', encoding="utf-8") as file:
|
||||
with open("./weight.json", 'r', encoding="utf-8") as file:
|
||||
weight_data = file.read()
|
||||
weight_data=json.loads(weight_data)
|
||||
gpt_path = os.environ.get(
|
||||
@ -87,7 +91,6 @@ from feature_extractor import cnhubert
|
||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||
|
||||
from GPT_SoVITS.module.models import SynthesizerTrn,SynthesizerTrnV3
|
||||
import numpy as np
|
||||
import random
|
||||
def set_seed(seed):
|
||||
if seed == -1:
|
||||
@ -104,9 +107,8 @@ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from text import cleaned_text_to_sequence
|
||||
from text.cleaner import clean_text
|
||||
from time import time as ttime
|
||||
from tools.my_utils import load_audio
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from peft import LoraConfig, PeftModel, get_peft_model
|
||||
from peft import LoraConfig, get_peft_model
|
||||
|
||||
language=os.environ.get("language","Auto")
|
||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
@ -327,9 +329,8 @@ def change_gpt_weights(gpt_path):
|
||||
|
||||
change_gpt_weights(gpt_path)
|
||||
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
||||
import torch,soundfile
|
||||
import torch
|
||||
now_dir = os.getcwd()
|
||||
import soundfile
|
||||
|
||||
def init_bigvgan():
|
||||
global bigvgan_model
|
||||
|
@ -7,7 +7,10 @@
|
||||
全部按日文识别
|
||||
'''
|
||||
import random
|
||||
import os, re, logging, json
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
import json
|
||||
import sys
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
@ -20,7 +23,6 @@ logging.getLogger("httpx").setLevel(logging.ERROR)
|
||||
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
||||
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
||||
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||
import pdb
|
||||
import torch
|
||||
|
||||
try:
|
||||
@ -182,12 +184,12 @@ for i in range(3):
|
||||
pretrained_gpt_name,pretrained_sovits_name = _
|
||||
|
||||
|
||||
if os.path.exists(f"./weight.json"):
|
||||
if os.path.exists("./weight.json"):
|
||||
pass
|
||||
else:
|
||||
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
||||
with open("./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
||||
|
||||
with open(f"./weight.json", 'r', encoding="utf-8") as file:
|
||||
with open("./weight.json", 'r', encoding="utf-8") as file:
|
||||
weight_data = file.read()
|
||||
weight_data=json.loads(weight_data)
|
||||
gpt_path = os.environ.get(
|
||||
|
@ -1,22 +1,13 @@
|
||||
import time
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import traceback
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.utils.data
|
||||
from tqdm import tqdm
|
||||
|
||||
from module import commons
|
||||
from module.mel_processing import spectrogram_torch,spec_to_mel_torch
|
||||
from text import cleaned_text_to_sequence
|
||||
from utils import load_wav_to_torch, load_filepaths_and_text
|
||||
import torch.nn.functional as F
|
||||
from functools import lru_cache
|
||||
import requests
|
||||
from scipy.io import wavfile
|
||||
from io import BytesIO
|
||||
from tools.my_utils import load_audio
|
||||
version = os.environ.get('version',None)
|
||||
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
|
||||
|
@ -1,7 +1,6 @@
|
||||
import math
|
||||
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
def feature_loss(fmap_r, fmap_g):
|
||||
|
@ -1,16 +1,5 @@
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.data
|
||||
import numpy as np
|
||||
import librosa
|
||||
import librosa.util as librosa_util
|
||||
from librosa.util import normalize, pad_center, tiny
|
||||
from scipy.signal import get_window
|
||||
from scipy.io.wavfile import read
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
|
||||
MAX_WAV_VALUE = 32768.0
|
||||
|
@ -1,9 +1,6 @@
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import copy
|
||||
import math
|
||||
import os
|
||||
import pdb
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -13,7 +10,7 @@ from module import commons
|
||||
from module import modules
|
||||
from module import attentions
|
||||
from f5_tts.model import DiT
|
||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
||||
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from module.commons import init_weights, get_padding
|
||||
from module.mrte_model import MRTE
|
||||
@ -22,7 +19,8 @@ from module.quantize import ResidualVectorQuantizer
|
||||
from text import symbols as symbols_v1
|
||||
from text import symbols2 as symbols_v2
|
||||
from torch.cuda.amp import autocast
|
||||
import contextlib,random
|
||||
import contextlib
|
||||
import random
|
||||
|
||||
|
||||
class StochasticDurationPredictor(nn.Module):
|
||||
|
@ -1,4 +1,3 @@
|
||||
import copy
|
||||
import math
|
||||
from typing import Optional
|
||||
import torch
|
||||
@ -11,14 +10,13 @@ from module import attentions_onnx as attentions
|
||||
|
||||
from f5_tts.model import DiT
|
||||
|
||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
||||
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from module.commons import init_weights, get_padding
|
||||
from module.quantize import ResidualVectorQuantizer
|
||||
# from text import symbols
|
||||
from text import symbols as symbols_v1
|
||||
from text import symbols2 as symbols_v2
|
||||
from torch.cuda.amp import autocast
|
||||
|
||||
|
||||
class StochasticDurationPredictor(nn.Module):
|
||||
|
@ -7,7 +7,6 @@
|
||||
"""Residual vector quantizer implementation."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
import math
|
||||
import typing as tp
|
||||
|
||||
import torch
|
||||
|
@ -10,7 +10,6 @@ cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||
ssl_model = cnhubert.get_model()
|
||||
from text import cleaned_text_to_sequence
|
||||
import soundfile
|
||||
from tools.my_utils import load_audio
|
||||
import os
|
||||
import json
|
||||
|
||||
|
@ -14,13 +14,10 @@ bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
|
||||
import torch
|
||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||
version = os.environ.get('version', None)
|
||||
import sys, numpy as np, traceback, pdb
|
||||
import traceback
|
||||
import os.path
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
from text.cleaner import clean_text
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
import numpy as np
|
||||
from tools.my_utils import clean_path
|
||||
|
||||
# inp_text=sys.argv[1]
|
||||
|
@ -1,6 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys,os
|
||||
import sys
|
||||
import os
|
||||
inp_text= os.environ.get("inp_text")
|
||||
inp_wav_dir= os.environ.get("inp_wav_dir")
|
||||
exp_name= os.environ.get("exp_name")
|
||||
@ -14,7 +15,8 @@ cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir")
|
||||
import torch
|
||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||
|
||||
import pdb,traceback,numpy as np,logging
|
||||
import traceback
|
||||
import numpy as np
|
||||
from scipy.io import wavfile
|
||||
import librosa
|
||||
now_dir = os.getcwd()
|
||||
|
@ -26,17 +26,13 @@ else:
|
||||
version = "v3"
|
||||
import torch
|
||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||
import math, traceback
|
||||
import multiprocessing
|
||||
import sys, pdb
|
||||
import traceback
|
||||
import sys
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
from random import shuffle
|
||||
import torch.multiprocessing as mp
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
import logging, librosa, utils
|
||||
import logging
|
||||
import utils
|
||||
if version!="v3":
|
||||
from module.models import SynthesizerTrn
|
||||
else:
|
||||
|
@ -1,7 +1,8 @@
|
||||
import traceback
|
||||
from collections import OrderedDict
|
||||
from time import time as ttime
|
||||
import shutil,os
|
||||
import shutil
|
||||
import os
|
||||
import torch
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py
|
||||
import os
|
||||
import pdb
|
||||
|
||||
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
||||
@ -8,7 +7,8 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import torch, platform
|
||||
import torch
|
||||
import platform
|
||||
from pytorch_lightning import seed_everything
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
@ -24,8 +24,6 @@ torch.set_float32_matmul_precision("high")
|
||||
from AR.utils import get_newest_ckpt
|
||||
|
||||
from collections import OrderedDict
|
||||
from time import time as ttime
|
||||
import shutil
|
||||
from process_ckpt import my_save
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import utils, os
|
||||
import utils
|
||||
import os
|
||||
hps = utils.get_hparams(stage=2)
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
||||
import torch
|
||||
@ -8,11 +9,11 @@ from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
import torch.multiprocessing as mp
|
||||
import torch.distributed as dist, traceback
|
||||
import torch.distributed as dist
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.cuda.amp import autocast, GradScaler
|
||||
from tqdm import tqdm
|
||||
import logging, traceback
|
||||
import logging
|
||||
|
||||
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
||||
logging.getLogger("h5py").setLevel(logging.INFO)
|
||||
|
@ -1,18 +1,18 @@
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import utils, os
|
||||
import utils
|
||||
import os
|
||||
hps = utils.get_hparams(stage=2)
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
import torch.multiprocessing as mp
|
||||
import torch.distributed as dist, traceback
|
||||
import torch.distributed as dist
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.cuda.amp import autocast, GradScaler
|
||||
from tqdm import tqdm
|
||||
import logging, traceback
|
||||
import logging
|
||||
|
||||
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
||||
logging.getLogger("h5py").setLevel(logging.INFO)
|
||||
@ -27,10 +27,7 @@ from module.data_utils import (
|
||||
)
|
||||
from module.models import (
|
||||
SynthesizerTrnV3 as SynthesizerTrn,
|
||||
MultiPeriodDiscriminator,
|
||||
)
|
||||
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
|
||||
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
||||
from process_ckpt import savee
|
||||
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
@ -1,25 +1,25 @@
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import utils, os
|
||||
import utils
|
||||
import os
|
||||
hps = utils.get_hparams(stage=2)
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
import torch.multiprocessing as mp
|
||||
import torch.distributed as dist, traceback
|
||||
import torch.distributed as dist
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.cuda.amp import autocast, GradScaler
|
||||
from tqdm import tqdm
|
||||
import logging, traceback
|
||||
import logging
|
||||
|
||||
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
||||
logging.getLogger("h5py").setLevel(logging.INFO)
|
||||
logging.getLogger("numba").setLevel(logging.INFO)
|
||||
from random import randint
|
||||
from module import commons
|
||||
from peft import LoraConfig, PeftModel, get_peft_model
|
||||
from peft import LoraConfig, get_peft_model
|
||||
from module.data_utils import (
|
||||
TextAudioSpeakerLoaderV3 as TextAudioSpeakerLoader,
|
||||
TextAudioSpeakerCollateV3 as TextAudioSpeakerCollate,
|
||||
@ -27,10 +27,7 @@ from module.data_utils import (
|
||||
)
|
||||
from module.models import (
|
||||
SynthesizerTrnV3 as SynthesizerTrn,
|
||||
MultiPeriodDiscriminator,
|
||||
)
|
||||
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
|
||||
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
||||
from process_ckpt import savee
|
||||
from collections import OrderedDict as od
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
@ -1,6 +1,5 @@
|
||||
# reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py
|
||||
|
||||
import sys
|
||||
import re
|
||||
import cn2an
|
||||
import ToJyutping
|
||||
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
import pdb
|
||||
import re
|
||||
|
||||
import cn2an
|
||||
@ -17,7 +16,8 @@ pinyin_to_symbol_map = {
|
||||
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
||||
}
|
||||
|
||||
import jieba_fast, logging
|
||||
import jieba_fast
|
||||
import logging
|
||||
jieba_fast.setLogLevel(logging.CRITICAL)
|
||||
import jieba_fast.posseg as psg
|
||||
|
||||
|
@ -1,10 +1,9 @@
|
||||
import os
|
||||
import pdb
|
||||
import re
|
||||
|
||||
import cn2an
|
||||
from pypinyin import lazy_pinyin, Style
|
||||
from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
|
||||
from pypinyin.contrib.tone_convert import to_finals_tone3, to_initials
|
||||
|
||||
from text.symbols import punctuation
|
||||
from text.tone_sandhi import ToneSandhi
|
||||
@ -18,7 +17,8 @@ pinyin_to_symbol_map = {
|
||||
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
||||
}
|
||||
|
||||
import jieba_fast, logging
|
||||
import jieba_fast
|
||||
import logging
|
||||
jieba_fast.setLogLevel(logging.CRITICAL)
|
||||
import jieba_fast.posseg as psg
|
||||
|
||||
|
@ -8,7 +8,6 @@ from text.symbols import punctuation
|
||||
|
||||
from text.symbols2 import symbols
|
||||
|
||||
import unicodedata
|
||||
from builtins import str as unicode
|
||||
from text.en_normalization.expend import normalize
|
||||
from nltk.tokenize import TweetTokenizer
|
||||
|
@ -5,7 +5,8 @@ import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import json
|
||||
import os
|
||||
import zipfile,requests
|
||||
import zipfile
|
||||
import requests
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
|
@ -58,7 +58,7 @@ try:
|
||||
|
||||
if os.path.exists(USERDIC_BIN_PATH):
|
||||
pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# print(e)
|
||||
import pyopenjtalk
|
||||
# failed to load user dictionary, ignore.
|
||||
|
@ -16,7 +16,7 @@ if os.name == 'nt':
|
||||
spam_spec = importlib.util.find_spec("eunjeon")
|
||||
non_found = spam_spec is None
|
||||
if non_found:
|
||||
print(f'you have to install eunjeon. install it...')
|
||||
print('you have to install eunjeon. install it...')
|
||||
else:
|
||||
installpath = spam_spec.submodule_search_locations[0]
|
||||
if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)):
|
||||
|
@ -1,4 +1,3 @@
|
||||
import os
|
||||
|
||||
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
||||
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
||||
|
@ -1,4 +1,3 @@
|
||||
import os
|
||||
|
||||
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
||||
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
||||
|
@ -9,9 +9,7 @@ import traceback
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
from scipy.io.wavfile import read
|
||||
import torch
|
||||
import logging
|
||||
|
||||
logging.getLogger("numba").setLevel(logging.ERROR)
|
||||
logging.getLogger("matplotlib").setLevel(logging.ERROR)
|
||||
@ -132,7 +130,6 @@ def plot_spectrogram_to_numpy(spectrogram):
|
||||
mpl_logger = logging.getLogger("matplotlib")
|
||||
mpl_logger.setLevel(logging.WARNING)
|
||||
import matplotlib.pylab as plt
|
||||
import numpy as np
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 2))
|
||||
im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
|
||||
@ -158,7 +155,6 @@ def plot_alignment_to_numpy(alignment, info=None):
|
||||
mpl_logger = logging.getLogger("matplotlib")
|
||||
mpl_logger.setLevel(logging.WARNING)
|
||||
import matplotlib.pylab as plt
|
||||
import numpy as np
|
||||
|
||||
fig, ax = plt.subplots(figsize=(6, 4))
|
||||
im = ax.imshow(
|
||||
|
19
api.py
19
api.py
@ -142,7 +142,8 @@ RESP: 无
|
||||
|
||||
|
||||
import argparse
|
||||
import os,re
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
now_dir = os.getcwd()
|
||||
@ -152,10 +153,11 @@ sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
||||
import signal
|
||||
from text.LangSegmenter import LangSegmenter
|
||||
from time import time as ttime
|
||||
import torch, torchaudio
|
||||
import torch
|
||||
import torchaudio
|
||||
import librosa
|
||||
import soundfile as sf
|
||||
from fastapi import FastAPI, Request, Query, HTTPException
|
||||
from fastapi import FastAPI, Request, Query
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
import uvicorn
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
@ -163,12 +165,11 @@ import numpy as np
|
||||
from feature_extractor import cnhubert
|
||||
from io import BytesIO
|
||||
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||
from peft import LoraConfig, PeftModel, get_peft_model
|
||||
from peft import LoraConfig, get_peft_model
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from text import cleaned_text_to_sequence
|
||||
from text.cleaner import clean_text
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from tools.my_utils import load_audio
|
||||
import config as global_config
|
||||
import logging
|
||||
import subprocess
|
||||
@ -221,7 +222,7 @@ def resample(audio_tensor, sr0):
|
||||
return resample_transform_dict[sr0](audio_tensor)
|
||||
|
||||
|
||||
from module.mel_processing import spectrogram_torch,mel_spectrogram_torch
|
||||
from module.mel_processing import mel_spectrogram_torch
|
||||
spec_min = -12
|
||||
spec_max = 2
|
||||
def norm_spec(x):
|
||||
@ -860,7 +861,7 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cu
|
||||
if not default_refer.is_ready():
|
||||
return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
|
||||
|
||||
if not sample_steps in [4,8,16,32]:
|
||||
if sample_steps not in [4,8,16,32]:
|
||||
sample_steps = 32
|
||||
|
||||
if cut_punc == None:
|
||||
@ -990,10 +991,10 @@ logger.info(f"编码格式: {media_type}")
|
||||
# 音频数据类型
|
||||
if args.sub_type.lower() == 'int32':
|
||||
is_int32 = True
|
||||
logger.info(f"数据类型: int32")
|
||||
logger.info("数据类型: int32")
|
||||
else:
|
||||
is_int32 = False
|
||||
logger.info(f"数据类型: int16")
|
||||
logger.info("数据类型: int16")
|
||||
|
||||
# 初始化模型
|
||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||
|
14
api_v2.py
14
api_v2.py
@ -112,15 +112,13 @@ import wave
|
||||
import signal
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from fastapi import FastAPI, Request, HTTPException, Response
|
||||
from fastapi import FastAPI, Response
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from fastapi import FastAPI, UploadFile, File
|
||||
import uvicorn
|
||||
from io import BytesIO
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
|
||||
from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import get_method_names as get_cut_method_names
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
# print(sys.path)
|
||||
i18n = I18nAuto()
|
||||
@ -337,7 +335,7 @@ async def tts_handle(req:dict):
|
||||
audio_data = pack_audio(BytesIO(), audio_data, sr, media_type).getvalue()
|
||||
return Response(audio_data, media_type=f"audio/{media_type}")
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=400, content={"message": f"tts failed", "Exception": str(e)})
|
||||
return JSONResponse(status_code=400, content={"message": "tts failed", "Exception": str(e)})
|
||||
|
||||
|
||||
|
||||
@ -415,7 +413,7 @@ async def set_refer_aduio(refer_audio_path: str = None):
|
||||
try:
|
||||
tts_pipeline.set_ref_audio(refer_audio_path)
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=400, content={"message": f"set refer audio failed", "Exception": str(e)})
|
||||
return JSONResponse(status_code=400, content={"message": "set refer audio failed", "Exception": str(e)})
|
||||
return JSONResponse(status_code=200, content={"message": "success"})
|
||||
|
||||
|
||||
@ -444,7 +442,7 @@ async def set_gpt_weights(weights_path: str = None):
|
||||
return JSONResponse(status_code=400, content={"message": "gpt weight path is required"})
|
||||
tts_pipeline.init_t2s_weights(weights_path)
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=400, content={"message": f"change gpt weight failed", "Exception": str(e)})
|
||||
return JSONResponse(status_code=400, content={"message": "change gpt weight failed", "Exception": str(e)})
|
||||
|
||||
return JSONResponse(status_code=200, content={"message": "success"})
|
||||
|
||||
@ -456,7 +454,7 @@ async def set_sovits_weights(weights_path: str = None):
|
||||
return JSONResponse(status_code=400, content={"message": "sovits weight path is required"})
|
||||
tts_pipeline.init_vits_weights(weights_path)
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=400, content={"message": f"change sovits weight failed", "Exception": str(e)})
|
||||
return JSONResponse(status_code=400, content={"message": "change sovits weight failed", "Exception": str(e)})
|
||||
return JSONResponse(status_code=200, content={"message": "success"})
|
||||
|
||||
|
||||
@ -466,7 +464,7 @@ if __name__ == "__main__":
|
||||
if host == 'None': # 在调用时使用 -a None 参数,可以让api监听双栈
|
||||
host = None
|
||||
uvicorn.run(app=APP, host=host, port=port, workers=1)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
exit(0)
|
||||
|
@ -143,11 +143,7 @@
|
||||
"# 开启推理页面\n",
|
||||
"%cd /kaggle/working/GPT-SoVITS/\n",
|
||||
"!npm install -g localtunnel\n",
|
||||
"import subprocess\n",
|
||||
"import threading\n",
|
||||
"import time\n",
|
||||
"import socket\n",
|
||||
"import urllib.request\n",
|
||||
"def iframe_thread(port):\n",
|
||||
" while True:\n",
|
||||
" time.sleep(0.5)\n",
|
||||
|
@ -1,7 +1,7 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from torch.nn.utils import weight_norm, spectral_norm
|
||||
# from utils import init_weights, get_padding
|
||||
def get_padding(kernel_size, dilation=1):
|
||||
return int((kernel_size*dilation - dilation)/2)
|
||||
|
@ -1,24 +1,15 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
import sys,os
|
||||
import traceback
|
||||
import sys
|
||||
import os
|
||||
AP_BWE_main_dir_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'AP_BWE_main')
|
||||
sys.path.append(AP_BWE_main_dir_path)
|
||||
import glob
|
||||
import argparse
|
||||
import json
|
||||
from re import S
|
||||
import torch
|
||||
import numpy as np
|
||||
import torchaudio
|
||||
import time
|
||||
import torchaudio.functional as aF
|
||||
# from attrdict import AttrDict####will be bug in py3.10
|
||||
|
||||
from datasets1.dataset import amp_pha_stft, amp_pha_istft
|
||||
from models.model import APNet_BWE_Model
|
||||
import soundfile as sf
|
||||
import matplotlib.pyplot as plt
|
||||
from rich.progress import track
|
||||
|
||||
class AP_BWE():
|
||||
def __init__(self,device,DictToAttrRecursive,checkpoint_file=None):
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os,argparse
|
||||
import os
|
||||
import argparse
|
||||
import traceback
|
||||
|
||||
from modelscope.pipelines import pipeline
|
||||
|
@ -116,7 +116,7 @@ def update_i18n_json(json_file, standard_keys):
|
||||
if num_miss_translation > 0:
|
||||
print(f"\033[31m{'[Failed] Missing Translation'.ljust(KEY_LEN)}: {num_miss_translation}\033[0m")
|
||||
else:
|
||||
print(f"\033[32m[Passed] All Keys Translated\033[0m")
|
||||
print("\033[32m[Passed] All Keys Translated\033[0m")
|
||||
# 将处理后的结果写入 JSON 文件
|
||||
with open(json_file, "w", encoding="utf-8") as f:
|
||||
json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=SORT_KEYS)
|
||||
|
@ -1,4 +1,5 @@
|
||||
import platform,os,traceback
|
||||
import os
|
||||
import traceback
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
import gradio as gr
|
||||
@ -21,7 +22,7 @@ def load_audio(file, sr):
|
||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
raise RuntimeError(i18n("音频加载失败"))
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
import os,sys,numpy as np
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import traceback
|
||||
from scipy.io import wavfile
|
||||
# parent_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
@ -249,7 +249,7 @@ def main():
|
||||
soundfile.write(
|
||||
os.path.join(
|
||||
out,
|
||||
f"%s_%d.wav"
|
||||
"%s_%d.wav"
|
||||
% (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
|
||||
),
|
||||
chunk,
|
||||
|
@ -1,7 +1,7 @@
|
||||
import argparse,os
|
||||
import argparse
|
||||
import os
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
|
||||
try:
|
||||
@ -70,7 +70,7 @@ def b_change_index(index, batch):
|
||||
# )
|
||||
{
|
||||
"__type__": "update",
|
||||
"label": f"Text",
|
||||
"label": "Text",
|
||||
"value": ""
|
||||
}
|
||||
)
|
||||
|
@ -54,7 +54,7 @@ class Attend(nn.Module):
|
||||
|
||||
# similarity
|
||||
|
||||
sim = einsum(f"b h i d, b h j d -> b h i j", q, k) * scale
|
||||
sim = einsum("b h i d, b h j d -> b h i j", q, k) * scale
|
||||
|
||||
# attention
|
||||
|
||||
@ -63,6 +63,6 @@ class Attend(nn.Module):
|
||||
|
||||
# aggregate values
|
||||
|
||||
out = einsum(f"b h i j, b h j d -> b h i d", attn, v)
|
||||
out = einsum("b h i j, b h j d -> b h i d", attn, v)
|
||||
|
||||
return out
|
||||
|
@ -1,14 +1,14 @@
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
from torch import nn, einsum, Tensor
|
||||
from torch import nn
|
||||
from torch.nn import Module, ModuleList
|
||||
import torch.nn.functional as F
|
||||
|
||||
from bs_roformer.attend import Attend
|
||||
from torch.utils.checkpoint import checkpoint
|
||||
|
||||
from typing import Tuple, Optional, List, Callable
|
||||
from typing import Tuple, Optional, Callable
|
||||
# from beartype.typing import Tuple, Optional, List, Callable
|
||||
# from beartype import beartype
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
from torch import nn, einsum, Tensor
|
||||
from torch import nn
|
||||
from torch.nn import Module, ModuleList
|
||||
import torch.nn.functional as F
|
||||
|
||||
from bs_roformer.attend import Attend
|
||||
from torch.utils.checkpoint import checkpoint
|
||||
|
||||
from typing import Tuple, Optional, List, Callable
|
||||
from typing import Tuple, Optional, Callable
|
||||
# from beartype.typing import Tuple, Optional, List, Callable
|
||||
# from beartype import beartype
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import os,sys
|
||||
import os
|
||||
parent_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
import logging,pdb
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import librosa
|
||||
|
@ -1,13 +1,13 @@
|
||||
import os
|
||||
import traceback,gradio as gr
|
||||
import traceback
|
||||
import gradio as gr
|
||||
import logging
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from tools.my_utils import clean_path
|
||||
i18n = I18nAuto()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import librosa,ffmpeg
|
||||
import soundfile as sf
|
||||
import ffmpeg
|
||||
import torch
|
||||
import sys
|
||||
from mdxnet import MDXNetDereverb
|
||||
|
16
webui.py
16
webui.py
@ -1,4 +1,5 @@
|
||||
import os,sys
|
||||
import os
|
||||
import sys
|
||||
if len(sys.argv)==1:sys.argv.append('v2')
|
||||
version="v1"if sys.argv[1]=="v1" else"v2"
|
||||
os.environ["version"]=version
|
||||
@ -6,7 +7,11 @@ now_dir = os.getcwd()
|
||||
sys.path.insert(0, now_dir)
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
import json,yaml,torch,pdb,re,shutil
|
||||
import json
|
||||
import yaml
|
||||
import torch
|
||||
import re
|
||||
import shutil
|
||||
import platform
|
||||
import psutil
|
||||
import signal
|
||||
@ -45,21 +50,18 @@ for site_packages_root in site_packages_roots:
|
||||
% (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir)
|
||||
)
|
||||
break
|
||||
except PermissionError as e:
|
||||
except PermissionError:
|
||||
traceback.print_exc()
|
||||
from tools import my_utils
|
||||
import shutil
|
||||
import pdb
|
||||
import subprocess
|
||||
from subprocess import Popen
|
||||
import signal
|
||||
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
|
||||
os.environ["language"]=language
|
||||
i18n = I18nAuto(language=language)
|
||||
from scipy.io import wavfile
|
||||
from tools.my_utils import load_audio, check_for_existance, check_details
|
||||
from tools.my_utils import check_for_existance, check_details
|
||||
from multiprocessing import cpu_count
|
||||
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user