mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-06 03:57:44 +08:00
ruff check --fix
This commit is contained in:
parent
28bdff356f
commit
a893a4e283
@ -1,19 +1,17 @@
|
|||||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/dataset.py
|
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/data/dataset.py
|
||||||
# reference: https://github.com/lifeiteng/vall-e
|
# reference: https://github.com/lifeiteng/vall-e
|
||||||
import pdb
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# sys.path.append("/data/docker/liujing04/gpt-vits/mq-vits-s1bert_no_bert")
|
# sys.path.append("/data/docker/liujing04/gpt-vits/mq-vits-s1bert_no_bert")
|
||||||
import traceback, os
|
import traceback
|
||||||
|
import os
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import torch, json
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
from transformers import AutoTokenizer
|
|
||||||
|
|
||||||
version = os.environ.get('version',None)
|
version = os.environ.get('version',None)
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
||||||
# reference: https://github.com/lifeiteng/vall-e
|
# reference: https://github.com/lifeiteng/vall-e
|
||||||
import os, sys
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_lightning_module.py
|
||||||
# reference: https://github.com/lifeiteng/vall-e
|
# reference: https://github.com/lifeiteng/vall-e
|
||||||
import os, sys
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
|
@ -9,8 +9,6 @@ from AR.models.utils import make_pad_mask, make_pad_mask_left
|
|||||||
from AR.models.utils import (
|
from AR.models.utils import (
|
||||||
topk_sampling,
|
topk_sampling,
|
||||||
sample,
|
sample,
|
||||||
logits_to_probs,
|
|
||||||
multinomial_sample_one_no_sync,
|
|
||||||
dpo_loss,
|
dpo_loss,
|
||||||
make_reject_y,
|
make_reject_y,
|
||||||
get_batch_logps
|
get_batch_logps
|
||||||
@ -718,7 +716,7 @@ class Text2SemanticDecoder(nn.Module):
|
|||||||
idx_list[batch_index] = idx
|
idx_list[batch_index] = idx
|
||||||
y_list[batch_index] = y[i, :-1]
|
y_list[batch_index] = y[i, :-1]
|
||||||
|
|
||||||
if not (None in idx_list):
|
if None not in idx_list:
|
||||||
stop = True
|
stop = True
|
||||||
|
|
||||||
if stop:
|
if stop:
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
|
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
|
||||||
# reference: https://github.com/lifeiteng/vall-e
|
# reference: https://github.com/lifeiteng/vall-e
|
||||||
import torch
|
import torch
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from AR.modules.embedding_onnx import SinePositionalEmbedding
|
from AR.modules.embedding_onnx import SinePositionalEmbedding
|
||||||
from AR.modules.embedding_onnx import TokenEmbedding
|
from AR.modules.embedding_onnx import TokenEmbedding
|
||||||
|
@ -130,7 +130,7 @@ def topk_sampling(logits, top_k=10, top_p=1.0, temperature=1.0):
|
|||||||
return token
|
return token
|
||||||
|
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
def multinomial_sample_one_no_sync(
|
def multinomial_sample_one_no_sync(
|
||||||
|
@ -11,7 +11,6 @@ from torch.nn.init import xavier_uniform_
|
|||||||
from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
|
from torch.nn.modules.linear import NonDynamicallyQuantizableLinear
|
||||||
from torch.nn.parameter import Parameter
|
from torch.nn.parameter import Parameter
|
||||||
|
|
||||||
from torch.nn import functional as F
|
|
||||||
from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched
|
from AR.modules.patched_mha_with_cache_onnx import multi_head_attention_forward_patched
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ from torch.nn.functional import (
|
|||||||
_none_or_dtype,
|
_none_or_dtype,
|
||||||
_in_projection_packed,
|
_in_projection_packed,
|
||||||
)
|
)
|
||||||
from torch.nn import functional as F
|
|
||||||
import torch
|
import torch
|
||||||
# Tensor = torch.Tensor
|
# Tensor = torch.Tensor
|
||||||
# from typing import Callable, List, Optional, Tuple, Union
|
# from typing import Callable, List, Optional, Tuple, Union
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
from torch.nn.functional import *
|
from torch.nn.functional import *
|
||||||
from torch.nn.functional import (
|
from torch.nn.functional import (
|
||||||
_mha_shape_check,
|
|
||||||
_canonical_mask,
|
_canonical_mask,
|
||||||
_none_or_dtype,
|
|
||||||
_in_projection_packed,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def multi_head_attention_forward_patched(
|
def multi_head_attention_forward_patched(
|
||||||
|
@ -13,12 +13,9 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import logging
|
|
||||||
import math
|
|
||||||
import random
|
import random
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
@ -451,13 +451,13 @@ class BigVGAN(
|
|||||||
# instantiate BigVGAN using h
|
# instantiate BigVGAN using h
|
||||||
if use_cuda_kernel:
|
if use_cuda_kernel:
|
||||||
print(
|
print(
|
||||||
f"[WARNING] You have specified use_cuda_kernel=True during BigVGAN.from_pretrained(). Only inference is supported (training is not implemented)!"
|
"[WARNING] You have specified use_cuda_kernel=True during BigVGAN.from_pretrained(). Only inference is supported (training is not implemented)!"
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
f"[WARNING] You need nvcc and ninja installed in your system that matches your PyTorch build is using to build the kernel. If not, the model will fail to initialize or generate incorrect waveform!"
|
"[WARNING] You need nvcc and ninja installed in your system that matches your PyTorch build is using to build the kernel. If not, the model will fail to initialize or generate incorrect waveform!"
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
f"[WARNING] For detail, see the official GitHub repository: https://github.com/NVIDIA/BigVGAN?tab=readme-ov-file#using-custom-cuda-kernel-for-synthesis"
|
"[WARNING] For detail, see the official GitHub repository: https://github.com/NVIDIA/BigVGAN?tab=readme-ov-file#using-custom-cuda-kernel-for-synthesis"
|
||||||
)
|
)
|
||||||
model = cls(h, use_cuda_kernel=use_cuda_kernel)
|
model = cls(h, use_cuda_kernel=use_cuda_kernel)
|
||||||
|
|
||||||
@ -485,7 +485,7 @@ class BigVGAN(
|
|||||||
model.load_state_dict(checkpoint_dict["generator"])
|
model.load_state_dict(checkpoint_dict["generator"])
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
print(
|
print(
|
||||||
f"[INFO] the pretrained checkpoint does not contain weight norm. Loading the checkpoint after removing weight norm!"
|
"[INFO] the pretrained checkpoint does not contain weight norm. Loading the checkpoint after removing weight norm!"
|
||||||
)
|
)
|
||||||
model.remove_weight_norm()
|
model.remove_weight_norm()
|
||||||
model.load_state_dict(checkpoint_dict["generator"])
|
model.load_state_dict(checkpoint_dict["generator"])
|
||||||
|
@ -15,7 +15,7 @@ from torchaudio.transforms import Spectrogram, Resample
|
|||||||
from env import AttrDict
|
from env import AttrDict
|
||||||
from utils import get_padding
|
from utils import get_padding
|
||||||
import typing
|
import typing
|
||||||
from typing import Optional, List, Union, Dict, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
|
||||||
class DiscriminatorP(torch.nn.Module):
|
class DiscriminatorP(torch.nn.Module):
|
||||||
@ -508,7 +508,7 @@ class DiscriminatorCQT(nn.Module):
|
|||||||
self.cqtd_normalize_volume = self.cfg.get("cqtd_normalize_volume", False)
|
self.cqtd_normalize_volume = self.cfg.get("cqtd_normalize_volume", False)
|
||||||
if self.cqtd_normalize_volume:
|
if self.cqtd_normalize_volume:
|
||||||
print(
|
print(
|
||||||
f"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
|
"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_2d_padding(
|
def get_2d_padding(
|
||||||
|
@ -6,13 +6,12 @@
|
|||||||
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
from scipy import signal
|
from scipy import signal
|
||||||
|
|
||||||
import typing
|
import typing
|
||||||
from typing import Optional, List, Union, Dict, Tuple
|
from typing import List, Tuple
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import math
|
import math
|
||||||
import functools
|
import functools
|
||||||
|
@ -328,7 +328,7 @@ def train(rank, a, h):
|
|||||||
|
|
||||||
# PESQ calculation. only evaluate PESQ if it's speech signal (nonspeech PESQ will error out)
|
# PESQ calculation. only evaluate PESQ if it's speech signal (nonspeech PESQ will error out)
|
||||||
if (
|
if (
|
||||||
not "nonspeech" in mode
|
"nonspeech" not in mode
|
||||||
): # Skips if the name of dataset (in mode string) contains "nonspeech"
|
): # Skips if the name of dataset (in mode string) contains "nonspeech"
|
||||||
|
|
||||||
# Resample to 16000 for pesq
|
# Resample to 16000 for pesq
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import math
|
import math
|
||||||
import os, sys, gc
|
import os
|
||||||
|
import sys
|
||||||
|
import gc
|
||||||
import random
|
import random
|
||||||
import traceback
|
import traceback
|
||||||
import time
|
import time
|
||||||
@ -10,7 +12,7 @@ now_dir = os.getcwd()
|
|||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
import os
|
import os
|
||||||
from typing import Generator, List, Tuple, Union
|
from typing import List, Tuple, Union
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@ -22,14 +24,13 @@ from feature_extractor.cnhubert import CNHubert
|
|||||||
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||||
from peft import LoraConfig, get_peft_model
|
from peft import LoraConfig, get_peft_model
|
||||||
import librosa
|
import librosa
|
||||||
from time import time as ttime
|
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
from tools.my_utils import load_audio
|
from tools.my_utils import load_audio
|
||||||
from module.mel_processing import spectrogram_torch
|
from module.mel_processing import spectrogram_torch
|
||||||
from TTS_infer_pack.text_segmentation_method import splits
|
from TTS_infer_pack.text_segmentation_method import splits
|
||||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||||
from BigVGAN.bigvgan import BigVGAN
|
from BigVGAN.bigvgan import BigVGAN
|
||||||
from module.mel_processing import spectrogram_torch,mel_spectrogram_torch
|
from module.mel_processing import mel_spectrogram_torch
|
||||||
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||||
language=os.environ.get("language","Auto")
|
language=os.environ.get("language","Auto")
|
||||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||||
@ -250,7 +251,7 @@ class TTS_Config:
|
|||||||
|
|
||||||
self.device = self.configs.get("device", torch.device("cpu"))
|
self.device = self.configs.get("device", torch.device("cpu"))
|
||||||
if "cuda" in str(self.device) and not torch.cuda.is_available():
|
if "cuda" in str(self.device) and not torch.cuda.is_available():
|
||||||
print(f"Warning: CUDA is not available, set device to CPU.")
|
print("Warning: CUDA is not available, set device to CPU.")
|
||||||
self.device = torch.device("cpu")
|
self.device = torch.device("cpu")
|
||||||
|
|
||||||
self.is_half = self.configs.get("is_half", False)
|
self.is_half = self.configs.get("is_half", False)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
|
||||||
import os, sys
|
import os
|
||||||
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import os, sys
|
import os
|
||||||
|
import sys
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.insert(0, now_dir)
|
sys.path.insert(0, now_dir)
|
||||||
from text.g2pw import G2PWPinyin
|
from text.g2pw import G2PWPinyin
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from my_utils import load_audio
|
from my_utils import load_audio
|
||||||
from text import cleaned_text_to_sequence
|
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
|
|
||||||
@ -813,11 +812,11 @@ import json
|
|||||||
def export_symbel(version='v2'):
|
def export_symbel(version='v2'):
|
||||||
if version=='v1':
|
if version=='v1':
|
||||||
symbols = text._symbol_to_id_v1
|
symbols = text._symbol_to_id_v1
|
||||||
with open(f"onnx/symbols_v1.json", "w") as file:
|
with open("onnx/symbols_v1.json", "w") as file:
|
||||||
json.dump(symbols, file, indent=4)
|
json.dump(symbols, file, indent=4)
|
||||||
else:
|
else:
|
||||||
symbols = text._symbol_to_id_v2
|
symbols = text._symbol_to_id_v2
|
||||||
with open(f"onnx/symbols_v2.json", "w") as file:
|
with open("onnx/symbols_v2.json", "w") as file:
|
||||||
json.dump(symbols, file, indent=4)
|
json.dump(symbols, file, indent=4)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -6,16 +6,16 @@ from export_torch_script import (
|
|||||||
spectrogram_torch,
|
spectrogram_torch,
|
||||||
)
|
)
|
||||||
from f5_tts.model.backbones.dit import DiT
|
from f5_tts.model.backbones.dit import DiT
|
||||||
from feature_extractor import cnhubert
|
|
||||||
from inference_webui import get_phones_and_bert
|
from inference_webui import get_phones_and_bert
|
||||||
import librosa
|
import librosa
|
||||||
from module import commons
|
from module import commons
|
||||||
from module.mel_processing import mel_spectrogram_torch, spectral_normalize_torch
|
from module.mel_processing import mel_spectrogram_torch
|
||||||
from module.models_onnx import CFM, SynthesizerTrnV3
|
from module.models_onnx import CFM, SynthesizerTrnV3
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch._dynamo.config
|
import torch._dynamo.config
|
||||||
import torchaudio
|
import torchaudio
|
||||||
import logging, uvicorn
|
import logging
|
||||||
|
import uvicorn
|
||||||
import torch
|
import torch
|
||||||
import soundfile
|
import soundfile
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
@ -942,7 +942,7 @@ def test_():
|
|||||||
|
|
||||||
cfm.eval()
|
cfm.eval()
|
||||||
|
|
||||||
logger.info(f"cfm ok")
|
logger.info("cfm ok")
|
||||||
|
|
||||||
dict_s1 = torch.load("GPT_SoVITS/pretrained_models/s1v3.ckpt")
|
dict_s1 = torch.load("GPT_SoVITS/pretrained_models/s1v3.ckpt")
|
||||||
# v2 的 gpt 也可以用
|
# v2 的 gpt 也可以用
|
||||||
@ -957,7 +957,7 @@ def test_():
|
|||||||
t2s_m = torch.jit.script(t2s_m)
|
t2s_m = torch.jit.script(t2s_m)
|
||||||
t2s_m.eval()
|
t2s_m.eval()
|
||||||
# t2s_m.top_k = 15
|
# t2s_m.top_k = 15
|
||||||
logger.info(f"t2s_m ok")
|
logger.info("t2s_m ok")
|
||||||
|
|
||||||
|
|
||||||
vq_model: torch.jit.ScriptModule = torch.jit.load(
|
vq_model: torch.jit.ScriptModule = torch.jit.load(
|
||||||
@ -967,7 +967,7 @@ def test_():
|
|||||||
# vq_model = vq_model.half().to(device)
|
# vq_model = vq_model.half().to(device)
|
||||||
vq_model.eval()
|
vq_model.eval()
|
||||||
# vq_model = sovits.vq_model
|
# vq_model = sovits.vq_model
|
||||||
logger.info(f"vq_model ok")
|
logger.info("vq_model ok")
|
||||||
|
|
||||||
# gpt_sovits_v3_half = torch.jit.load("onnx/ad/gpt_sovits_v3_half.pt")
|
# gpt_sovits_v3_half = torch.jit.load("onnx/ad/gpt_sovits_v3_half.pt")
|
||||||
# gpt_sovits_v3_half = torch.jit.optimize_for_inference(gpt_sovits_v3_half)
|
# gpt_sovits_v3_half = torch.jit.optimize_for_inference(gpt_sovits_v3_half)
|
||||||
@ -975,7 +975,7 @@ def test_():
|
|||||||
# gpt_sovits_v3_half = gpt_sovits_v3_half.cuda()
|
# gpt_sovits_v3_half = gpt_sovits_v3_half.cuda()
|
||||||
# gpt_sovits_v3_half.eval()
|
# gpt_sovits_v3_half.eval()
|
||||||
gpt_sovits_v3_half = ExportGPTSovitsHalf(sovits.hps, t2s_m, vq_model)
|
gpt_sovits_v3_half = ExportGPTSovitsHalf(sovits.hps, t2s_m, vq_model)
|
||||||
logger.info(f"gpt_sovits_v3_half ok")
|
logger.info("gpt_sovits_v3_half ok")
|
||||||
|
|
||||||
# init_bigvgan()
|
# init_bigvgan()
|
||||||
# global bigvgan_model
|
# global bigvgan_model
|
||||||
@ -985,7 +985,7 @@ def test_():
|
|||||||
bigvgan_model = bigvgan_model.cuda()
|
bigvgan_model = bigvgan_model.cuda()
|
||||||
bigvgan_model.eval()
|
bigvgan_model.eval()
|
||||||
|
|
||||||
logger.info(f"bigvgan ok")
|
logger.info("bigvgan ok")
|
||||||
|
|
||||||
gpt_sovits_v3 = GPTSoVITSV3(gpt_sovits_v3_half, cfm, bigvgan_model)
|
gpt_sovits_v3 = GPTSoVITSV3(gpt_sovits_v3_half, cfm, bigvgan_model)
|
||||||
gpt_sovits_v3 = torch.jit.script(gpt_sovits_v3)
|
gpt_sovits_v3 = torch.jit.script(gpt_sovits_v3)
|
||||||
|
@ -11,7 +11,6 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
import torch.nn.functional as F
|
|
||||||
from torch.utils.checkpoint import checkpoint
|
from torch.utils.checkpoint import checkpoint
|
||||||
|
|
||||||
from x_transformers.x_transformers import RotaryEmbedding
|
from x_transformers.x_transformers import RotaryEmbedding
|
||||||
|
@ -1,9 +1,5 @@
|
|||||||
import time
|
|
||||||
|
|
||||||
import librosa
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
import soundfile as sf
|
|
||||||
import os
|
import os
|
||||||
from transformers import logging as tf_logging
|
from transformers import logging as tf_logging
|
||||||
tf_logging.set_verbosity_error()
|
tf_logging.set_verbosity_error()
|
||||||
|
@ -7,7 +7,9 @@
|
|||||||
全部按日文识别
|
全部按日文识别
|
||||||
'''
|
'''
|
||||||
import logging
|
import logging
|
||||||
import traceback,torchaudio,warnings
|
import traceback
|
||||||
|
import torchaudio
|
||||||
|
import warnings
|
||||||
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
||||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||||
@ -18,8 +20,10 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
|||||||
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
||||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||||
|
|
||||||
import os, re, sys, json
|
import os
|
||||||
import pdb
|
import re
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
import torch
|
import torch
|
||||||
from text.LangSegmenter import LangSegmenter
|
from text.LangSegmenter import LangSegmenter
|
||||||
|
|
||||||
@ -42,12 +46,12 @@ for i in range(3):
|
|||||||
pretrained_gpt_name,pretrained_sovits_name = _
|
pretrained_gpt_name,pretrained_sovits_name = _
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists(f"./weight.json"):
|
if os.path.exists("./weight.json"):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
with open("./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
||||||
|
|
||||||
with open(f"./weight.json", 'r', encoding="utf-8") as file:
|
with open("./weight.json", 'r', encoding="utf-8") as file:
|
||||||
weight_data = file.read()
|
weight_data = file.read()
|
||||||
weight_data=json.loads(weight_data)
|
weight_data=json.loads(weight_data)
|
||||||
gpt_path = os.environ.get(
|
gpt_path = os.environ.get(
|
||||||
@ -87,7 +91,6 @@ from feature_extractor import cnhubert
|
|||||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||||
|
|
||||||
from GPT_SoVITS.module.models import SynthesizerTrn,SynthesizerTrnV3
|
from GPT_SoVITS.module.models import SynthesizerTrn,SynthesizerTrnV3
|
||||||
import numpy as np
|
|
||||||
import random
|
import random
|
||||||
def set_seed(seed):
|
def set_seed(seed):
|
||||||
if seed == -1:
|
if seed == -1:
|
||||||
@ -104,9 +107,8 @@ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
|||||||
from text import cleaned_text_to_sequence
|
from text import cleaned_text_to_sequence
|
||||||
from text.cleaner import clean_text
|
from text.cleaner import clean_text
|
||||||
from time import time as ttime
|
from time import time as ttime
|
||||||
from tools.my_utils import load_audio
|
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
from peft import LoraConfig, PeftModel, get_peft_model
|
from peft import LoraConfig, get_peft_model
|
||||||
|
|
||||||
language=os.environ.get("language","Auto")
|
language=os.environ.get("language","Auto")
|
||||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||||
@ -327,9 +329,8 @@ def change_gpt_weights(gpt_path):
|
|||||||
|
|
||||||
change_gpt_weights(gpt_path)
|
change_gpt_weights(gpt_path)
|
||||||
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
|
||||||
import torch,soundfile
|
import torch
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
import soundfile
|
|
||||||
|
|
||||||
def init_bigvgan():
|
def init_bigvgan():
|
||||||
global bigvgan_model
|
global bigvgan_model
|
||||||
|
@ -7,7 +7,10 @@
|
|||||||
全部按日文识别
|
全部按日文识别
|
||||||
'''
|
'''
|
||||||
import random
|
import random
|
||||||
import os, re, logging, json
|
import os
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
import sys
|
import sys
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
@ -20,7 +23,6 @@ logging.getLogger("httpx").setLevel(logging.ERROR)
|
|||||||
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
||||||
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
||||||
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||||
import pdb
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -182,12 +184,12 @@ for i in range(3):
|
|||||||
pretrained_gpt_name,pretrained_sovits_name = _
|
pretrained_gpt_name,pretrained_sovits_name = _
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists(f"./weight.json"):
|
if os.path.exists("./weight.json"):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
with open("./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
||||||
|
|
||||||
with open(f"./weight.json", 'r', encoding="utf-8") as file:
|
with open("./weight.json", 'r', encoding="utf-8") as file:
|
||||||
weight_data = file.read()
|
weight_data = file.read()
|
||||||
weight_data=json.loads(weight_data)
|
weight_data=json.loads(weight_data)
|
||||||
gpt_path = os.environ.get(
|
gpt_path = os.environ.get(
|
||||||
|
@ -1,22 +1,13 @@
|
|||||||
import time
|
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import traceback
|
import traceback
|
||||||
import numpy as np
|
|
||||||
import torch
|
import torch
|
||||||
import torch.utils.data
|
import torch.utils.data
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from module import commons
|
|
||||||
from module.mel_processing import spectrogram_torch,spec_to_mel_torch
|
from module.mel_processing import spectrogram_torch,spec_to_mel_torch
|
||||||
from text import cleaned_text_to_sequence
|
from text import cleaned_text_to_sequence
|
||||||
from utils import load_wav_to_torch, load_filepaths_and_text
|
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from functools import lru_cache
|
|
||||||
import requests
|
|
||||||
from scipy.io import wavfile
|
|
||||||
from io import BytesIO
|
|
||||||
from tools.my_utils import load_audio
|
from tools.my_utils import load_audio
|
||||||
version = os.environ.get('version',None)
|
version = os.environ.get('version',None)
|
||||||
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
|
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.nn import functional as F
|
|
||||||
|
|
||||||
|
|
||||||
def feature_loss(fmap_r, fmap_g):
|
def feature_loss(fmap_r, fmap_g):
|
||||||
|
@ -1,16 +1,5 @@
|
|||||||
import math
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.utils.data
|
import torch.utils.data
|
||||||
import numpy as np
|
|
||||||
import librosa
|
|
||||||
import librosa.util as librosa_util
|
|
||||||
from librosa.util import normalize, pad_center, tiny
|
|
||||||
from scipy.signal import get_window
|
|
||||||
from scipy.io.wavfile import read
|
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
|
|
||||||
MAX_WAV_VALUE = 32768.0
|
MAX_WAV_VALUE = 32768.0
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
import warnings
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
import copy
|
|
||||||
import math
|
import math
|
||||||
import os
|
|
||||||
import pdb
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -13,7 +10,7 @@ from module import commons
|
|||||||
from module import modules
|
from module import modules
|
||||||
from module import attentions
|
from module import attentions
|
||||||
from f5_tts.model import DiT
|
from f5_tts.model import DiT
|
||||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||||
from module.commons import init_weights, get_padding
|
from module.commons import init_weights, get_padding
|
||||||
from module.mrte_model import MRTE
|
from module.mrte_model import MRTE
|
||||||
@ -22,7 +19,8 @@ from module.quantize import ResidualVectorQuantizer
|
|||||||
from text import symbols as symbols_v1
|
from text import symbols as symbols_v1
|
||||||
from text import symbols2 as symbols_v2
|
from text import symbols2 as symbols_v2
|
||||||
from torch.cuda.amp import autocast
|
from torch.cuda.amp import autocast
|
||||||
import contextlib,random
|
import contextlib
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
class StochasticDurationPredictor(nn.Module):
|
class StochasticDurationPredictor(nn.Module):
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import copy
|
|
||||||
import math
|
import math
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import torch
|
import torch
|
||||||
@ -11,14 +10,13 @@ from module import attentions_onnx as attentions
|
|||||||
|
|
||||||
from f5_tts.model import DiT
|
from f5_tts.model import DiT
|
||||||
|
|
||||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||||
from module.commons import init_weights, get_padding
|
from module.commons import init_weights, get_padding
|
||||||
from module.quantize import ResidualVectorQuantizer
|
from module.quantize import ResidualVectorQuantizer
|
||||||
# from text import symbols
|
# from text import symbols
|
||||||
from text import symbols as symbols_v1
|
from text import symbols as symbols_v1
|
||||||
from text import symbols2 as symbols_v2
|
from text import symbols2 as symbols_v2
|
||||||
from torch.cuda.amp import autocast
|
|
||||||
|
|
||||||
|
|
||||||
class StochasticDurationPredictor(nn.Module):
|
class StochasticDurationPredictor(nn.Module):
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
"""Residual vector quantizer implementation."""
|
"""Residual vector quantizer implementation."""
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
import math
|
|
||||||
import typing as tp
|
import typing as tp
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
@ -10,7 +10,6 @@ cnhubert.cnhubert_base_path = cnhubert_base_path
|
|||||||
ssl_model = cnhubert.get_model()
|
ssl_model = cnhubert.get_model()
|
||||||
from text import cleaned_text_to_sequence
|
from text import cleaned_text_to_sequence
|
||||||
import soundfile
|
import soundfile
|
||||||
from tools.my_utils import load_audio
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
@ -14,13 +14,10 @@ bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
|
|||||||
import torch
|
import torch
|
||||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||||
version = os.environ.get('version', None)
|
version = os.environ.get('version', None)
|
||||||
import sys, numpy as np, traceback, pdb
|
import traceback
|
||||||
import os.path
|
import os.path
|
||||||
from glob import glob
|
|
||||||
from tqdm import tqdm
|
|
||||||
from text.cleaner import clean_text
|
from text.cleaner import clean_text
|
||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
import numpy as np
|
|
||||||
from tools.my_utils import clean_path
|
from tools.my_utils import clean_path
|
||||||
|
|
||||||
# inp_text=sys.argv[1]
|
# inp_text=sys.argv[1]
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import sys,os
|
import sys
|
||||||
|
import os
|
||||||
inp_text= os.environ.get("inp_text")
|
inp_text= os.environ.get("inp_text")
|
||||||
inp_wav_dir= os.environ.get("inp_wav_dir")
|
inp_wav_dir= os.environ.get("inp_wav_dir")
|
||||||
exp_name= os.environ.get("exp_name")
|
exp_name= os.environ.get("exp_name")
|
||||||
@ -14,7 +15,8 @@ cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir")
|
|||||||
import torch
|
import torch
|
||||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||||
|
|
||||||
import pdb,traceback,numpy as np,logging
|
import traceback
|
||||||
|
import numpy as np
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
import librosa
|
import librosa
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
|
@ -26,17 +26,13 @@ else:
|
|||||||
version = "v3"
|
version = "v3"
|
||||||
import torch
|
import torch
|
||||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||||
import math, traceback
|
import traceback
|
||||||
import multiprocessing
|
import sys
|
||||||
import sys, pdb
|
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
from random import shuffle
|
import logging
|
||||||
import torch.multiprocessing as mp
|
import utils
|
||||||
from glob import glob
|
|
||||||
from tqdm import tqdm
|
|
||||||
import logging, librosa, utils
|
|
||||||
if version!="v3":
|
if version!="v3":
|
||||||
from module.models import SynthesizerTrn
|
from module.models import SynthesizerTrn
|
||||||
else:
|
else:
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import traceback
|
import traceback
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from time import time as ttime
|
from time import time as ttime
|
||||||
import shutil,os
|
import shutil
|
||||||
|
import os
|
||||||
import torch
|
import torch
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py
|
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py
|
||||||
import os
|
import os
|
||||||
import pdb
|
|
||||||
|
|
||||||
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
||||||
@ -8,7 +7,8 @@ import argparse
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import torch, platform
|
import torch
|
||||||
|
import platform
|
||||||
from pytorch_lightning import seed_everything
|
from pytorch_lightning import seed_everything
|
||||||
from pytorch_lightning import Trainer
|
from pytorch_lightning import Trainer
|
||||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||||
@ -24,8 +24,6 @@ torch.set_float32_matmul_precision("high")
|
|||||||
from AR.utils import get_newest_ckpt
|
from AR.utils import get_newest_ckpt
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from time import time as ttime
|
|
||||||
import shutil
|
|
||||||
from process_ckpt import my_save
|
from process_ckpt import my_save
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import warnings
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
import utils, os
|
import utils
|
||||||
|
import os
|
||||||
hps = utils.get_hparams(stage=2)
|
hps = utils.get_hparams(stage=2)
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
||||||
import torch
|
import torch
|
||||||
@ -8,11 +9,11 @@ from torch.nn import functional as F
|
|||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
import torch.multiprocessing as mp
|
import torch.multiprocessing as mp
|
||||||
import torch.distributed as dist, traceback
|
import torch.distributed as dist
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||||
from torch.cuda.amp import autocast, GradScaler
|
from torch.cuda.amp import autocast, GradScaler
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import logging, traceback
|
import logging
|
||||||
|
|
||||||
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
||||||
logging.getLogger("h5py").setLevel(logging.INFO)
|
logging.getLogger("h5py").setLevel(logging.INFO)
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
import warnings
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
import utils, os
|
import utils
|
||||||
|
import os
|
||||||
hps = utils.get_hparams(stage=2)
|
hps = utils.get_hparams(stage=2)
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
||||||
import torch
|
import torch
|
||||||
from torch.nn import functional as F
|
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
import torch.multiprocessing as mp
|
import torch.multiprocessing as mp
|
||||||
import torch.distributed as dist, traceback
|
import torch.distributed as dist
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||||
from torch.cuda.amp import autocast, GradScaler
|
from torch.cuda.amp import autocast, GradScaler
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import logging, traceback
|
import logging
|
||||||
|
|
||||||
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
||||||
logging.getLogger("h5py").setLevel(logging.INFO)
|
logging.getLogger("h5py").setLevel(logging.INFO)
|
||||||
@ -27,10 +27,7 @@ from module.data_utils import (
|
|||||||
)
|
)
|
||||||
from module.models import (
|
from module.models import (
|
||||||
SynthesizerTrnV3 as SynthesizerTrn,
|
SynthesizerTrnV3 as SynthesizerTrn,
|
||||||
MultiPeriodDiscriminator,
|
|
||||||
)
|
)
|
||||||
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
|
|
||||||
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
|
||||||
from process_ckpt import savee
|
from process_ckpt import savee
|
||||||
|
|
||||||
torch.backends.cudnn.benchmark = False
|
torch.backends.cudnn.benchmark = False
|
||||||
|
@ -1,25 +1,25 @@
|
|||||||
import warnings
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
import utils, os
|
import utils
|
||||||
|
import os
|
||||||
hps = utils.get_hparams(stage=2)
|
hps = utils.get_hparams(stage=2)
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
||||||
import torch
|
import torch
|
||||||
from torch.nn import functional as F
|
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
import torch.multiprocessing as mp
|
import torch.multiprocessing as mp
|
||||||
import torch.distributed as dist, traceback
|
import torch.distributed as dist
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||||
from torch.cuda.amp import autocast, GradScaler
|
from torch.cuda.amp import autocast, GradScaler
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import logging, traceback
|
import logging
|
||||||
|
|
||||||
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
||||||
logging.getLogger("h5py").setLevel(logging.INFO)
|
logging.getLogger("h5py").setLevel(logging.INFO)
|
||||||
logging.getLogger("numba").setLevel(logging.INFO)
|
logging.getLogger("numba").setLevel(logging.INFO)
|
||||||
from random import randint
|
from random import randint
|
||||||
from module import commons
|
from module import commons
|
||||||
from peft import LoraConfig, PeftModel, get_peft_model
|
from peft import LoraConfig, get_peft_model
|
||||||
from module.data_utils import (
|
from module.data_utils import (
|
||||||
TextAudioSpeakerLoaderV3 as TextAudioSpeakerLoader,
|
TextAudioSpeakerLoaderV3 as TextAudioSpeakerLoader,
|
||||||
TextAudioSpeakerCollateV3 as TextAudioSpeakerCollate,
|
TextAudioSpeakerCollateV3 as TextAudioSpeakerCollate,
|
||||||
@ -27,10 +27,7 @@ from module.data_utils import (
|
|||||||
)
|
)
|
||||||
from module.models import (
|
from module.models import (
|
||||||
SynthesizerTrnV3 as SynthesizerTrn,
|
SynthesizerTrnV3 as SynthesizerTrn,
|
||||||
MultiPeriodDiscriminator,
|
|
||||||
)
|
)
|
||||||
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
|
|
||||||
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
|
||||||
from process_ckpt import savee
|
from process_ckpt import savee
|
||||||
from collections import OrderedDict as od
|
from collections import OrderedDict as od
|
||||||
torch.backends.cudnn.benchmark = False
|
torch.backends.cudnn.benchmark = False
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py
|
# reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py
|
||||||
|
|
||||||
import sys
|
|
||||||
import re
|
import re
|
||||||
import cn2an
|
import cn2an
|
||||||
import ToJyutping
|
import ToJyutping
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
import pdb
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import cn2an
|
import cn2an
|
||||||
@ -17,7 +16,8 @@ pinyin_to_symbol_map = {
|
|||||||
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
||||||
}
|
}
|
||||||
|
|
||||||
import jieba_fast, logging
|
import jieba_fast
|
||||||
|
import logging
|
||||||
jieba_fast.setLogLevel(logging.CRITICAL)
|
jieba_fast.setLogLevel(logging.CRITICAL)
|
||||||
import jieba_fast.posseg as psg
|
import jieba_fast.posseg as psg
|
||||||
|
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
import pdb
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import cn2an
|
import cn2an
|
||||||
from pypinyin import lazy_pinyin, Style
|
from pypinyin import lazy_pinyin, Style
|
||||||
from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
|
from pypinyin.contrib.tone_convert import to_finals_tone3, to_initials
|
||||||
|
|
||||||
from text.symbols import punctuation
|
from text.symbols import punctuation
|
||||||
from text.tone_sandhi import ToneSandhi
|
from text.tone_sandhi import ToneSandhi
|
||||||
@ -18,7 +17,8 @@ pinyin_to_symbol_map = {
|
|||||||
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
||||||
}
|
}
|
||||||
|
|
||||||
import jieba_fast, logging
|
import jieba_fast
|
||||||
|
import logging
|
||||||
jieba_fast.setLogLevel(logging.CRITICAL)
|
jieba_fast.setLogLevel(logging.CRITICAL)
|
||||||
import jieba_fast.posseg as psg
|
import jieba_fast.posseg as psg
|
||||||
|
|
||||||
|
@ -8,7 +8,6 @@ from text.symbols import punctuation
|
|||||||
|
|
||||||
from text.symbols2 import symbols
|
from text.symbols2 import symbols
|
||||||
|
|
||||||
import unicodedata
|
|
||||||
from builtins import str as unicode
|
from builtins import str as unicode
|
||||||
from text.en_normalization.expend import normalize
|
from text.en_normalization.expend import normalize
|
||||||
from nltk.tokenize import TweetTokenizer
|
from nltk.tokenize import TweetTokenizer
|
||||||
|
@ -5,7 +5,8 @@ import warnings
|
|||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import zipfile,requests
|
import zipfile
|
||||||
|
import requests
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from typing import List
|
from typing import List
|
||||||
|
@ -58,7 +58,7 @@ try:
|
|||||||
|
|
||||||
if os.path.exists(USERDIC_BIN_PATH):
|
if os.path.exists(USERDIC_BIN_PATH):
|
||||||
pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
|
pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
# print(e)
|
# print(e)
|
||||||
import pyopenjtalk
|
import pyopenjtalk
|
||||||
# failed to load user dictionary, ignore.
|
# failed to load user dictionary, ignore.
|
||||||
|
@ -16,7 +16,7 @@ if os.name == 'nt':
|
|||||||
spam_spec = importlib.util.find_spec("eunjeon")
|
spam_spec = importlib.util.find_spec("eunjeon")
|
||||||
non_found = spam_spec is None
|
non_found = spam_spec is None
|
||||||
if non_found:
|
if non_found:
|
||||||
print(f'you have to install eunjeon. install it...')
|
print('you have to install eunjeon. install it...')
|
||||||
else:
|
else:
|
||||||
installpath = spam_spec.submodule_search_locations[0]
|
installpath = spam_spec.submodule_search_locations[0]
|
||||||
if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)):
|
if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)):
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
||||||
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
||||||
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
||||||
|
@ -9,9 +9,7 @@ import traceback
|
|||||||
|
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.io.wavfile import read
|
|
||||||
import torch
|
import torch
|
||||||
import logging
|
|
||||||
|
|
||||||
logging.getLogger("numba").setLevel(logging.ERROR)
|
logging.getLogger("numba").setLevel(logging.ERROR)
|
||||||
logging.getLogger("matplotlib").setLevel(logging.ERROR)
|
logging.getLogger("matplotlib").setLevel(logging.ERROR)
|
||||||
@ -132,7 +130,6 @@ def plot_spectrogram_to_numpy(spectrogram):
|
|||||||
mpl_logger = logging.getLogger("matplotlib")
|
mpl_logger = logging.getLogger("matplotlib")
|
||||||
mpl_logger.setLevel(logging.WARNING)
|
mpl_logger.setLevel(logging.WARNING)
|
||||||
import matplotlib.pylab as plt
|
import matplotlib.pylab as plt
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(10, 2))
|
fig, ax = plt.subplots(figsize=(10, 2))
|
||||||
im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
|
im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
|
||||||
@ -158,7 +155,6 @@ def plot_alignment_to_numpy(alignment, info=None):
|
|||||||
mpl_logger = logging.getLogger("matplotlib")
|
mpl_logger = logging.getLogger("matplotlib")
|
||||||
mpl_logger.setLevel(logging.WARNING)
|
mpl_logger.setLevel(logging.WARNING)
|
||||||
import matplotlib.pylab as plt
|
import matplotlib.pylab as plt
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(6, 4))
|
fig, ax = plt.subplots(figsize=(6, 4))
|
||||||
im = ax.imshow(
|
im = ax.imshow(
|
||||||
|
19
api.py
19
api.py
@ -142,7 +142,8 @@ RESP: 无
|
|||||||
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os,re
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
@ -152,10 +153,11 @@ sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
|||||||
import signal
|
import signal
|
||||||
from text.LangSegmenter import LangSegmenter
|
from text.LangSegmenter import LangSegmenter
|
||||||
from time import time as ttime
|
from time import time as ttime
|
||||||
import torch, torchaudio
|
import torch
|
||||||
|
import torchaudio
|
||||||
import librosa
|
import librosa
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
from fastapi import FastAPI, Request, Query, HTTPException
|
from fastapi import FastAPI, Request, Query
|
||||||
from fastapi.responses import StreamingResponse, JSONResponse
|
from fastapi.responses import StreamingResponse, JSONResponse
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
@ -163,12 +165,11 @@ import numpy as np
|
|||||||
from feature_extractor import cnhubert
|
from feature_extractor import cnhubert
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||||
from peft import LoraConfig, PeftModel, get_peft_model
|
from peft import LoraConfig, get_peft_model
|
||||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||||
from text import cleaned_text_to_sequence
|
from text import cleaned_text_to_sequence
|
||||||
from text.cleaner import clean_text
|
from text.cleaner import clean_text
|
||||||
from module.mel_processing import spectrogram_torch
|
from module.mel_processing import spectrogram_torch
|
||||||
from tools.my_utils import load_audio
|
|
||||||
import config as global_config
|
import config as global_config
|
||||||
import logging
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -221,7 +222,7 @@ def resample(audio_tensor, sr0):
|
|||||||
return resample_transform_dict[sr0](audio_tensor)
|
return resample_transform_dict[sr0](audio_tensor)
|
||||||
|
|
||||||
|
|
||||||
from module.mel_processing import spectrogram_torch,mel_spectrogram_torch
|
from module.mel_processing import mel_spectrogram_torch
|
||||||
spec_min = -12
|
spec_min = -12
|
||||||
spec_max = 2
|
spec_max = 2
|
||||||
def norm_spec(x):
|
def norm_spec(x):
|
||||||
@ -860,7 +861,7 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cu
|
|||||||
if not default_refer.is_ready():
|
if not default_refer.is_ready():
|
||||||
return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
|
return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
|
||||||
|
|
||||||
if not sample_steps in [4,8,16,32]:
|
if sample_steps not in [4,8,16,32]:
|
||||||
sample_steps = 32
|
sample_steps = 32
|
||||||
|
|
||||||
if cut_punc == None:
|
if cut_punc == None:
|
||||||
@ -990,10 +991,10 @@ logger.info(f"编码格式: {media_type}")
|
|||||||
# 音频数据类型
|
# 音频数据类型
|
||||||
if args.sub_type.lower() == 'int32':
|
if args.sub_type.lower() == 'int32':
|
||||||
is_int32 = True
|
is_int32 = True
|
||||||
logger.info(f"数据类型: int32")
|
logger.info("数据类型: int32")
|
||||||
else:
|
else:
|
||||||
is_int32 = False
|
is_int32 = False
|
||||||
logger.info(f"数据类型: int16")
|
logger.info("数据类型: int16")
|
||||||
|
|
||||||
# 初始化模型
|
# 初始化模型
|
||||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||||
|
14
api_v2.py
14
api_v2.py
@ -112,15 +112,13 @@ import wave
|
|||||||
import signal
|
import signal
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
from fastapi import FastAPI, Request, HTTPException, Response
|
from fastapi import FastAPI, Response
|
||||||
from fastapi.responses import StreamingResponse, JSONResponse
|
from fastapi.responses import StreamingResponse, JSONResponse
|
||||||
from fastapi import FastAPI, UploadFile, File
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
|
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
|
||||||
from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import get_method_names as get_cut_method_names
|
from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import get_method_names as get_cut_method_names
|
||||||
from fastapi.responses import StreamingResponse
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
# print(sys.path)
|
# print(sys.path)
|
||||||
i18n = I18nAuto()
|
i18n = I18nAuto()
|
||||||
@ -337,7 +335,7 @@ async def tts_handle(req:dict):
|
|||||||
audio_data = pack_audio(BytesIO(), audio_data, sr, media_type).getvalue()
|
audio_data = pack_audio(BytesIO(), audio_data, sr, media_type).getvalue()
|
||||||
return Response(audio_data, media_type=f"audio/{media_type}")
|
return Response(audio_data, media_type=f"audio/{media_type}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse(status_code=400, content={"message": f"tts failed", "Exception": str(e)})
|
return JSONResponse(status_code=400, content={"message": "tts failed", "Exception": str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -415,7 +413,7 @@ async def set_refer_aduio(refer_audio_path: str = None):
|
|||||||
try:
|
try:
|
||||||
tts_pipeline.set_ref_audio(refer_audio_path)
|
tts_pipeline.set_ref_audio(refer_audio_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse(status_code=400, content={"message": f"set refer audio failed", "Exception": str(e)})
|
return JSONResponse(status_code=400, content={"message": "set refer audio failed", "Exception": str(e)})
|
||||||
return JSONResponse(status_code=200, content={"message": "success"})
|
return JSONResponse(status_code=200, content={"message": "success"})
|
||||||
|
|
||||||
|
|
||||||
@ -444,7 +442,7 @@ async def set_gpt_weights(weights_path: str = None):
|
|||||||
return JSONResponse(status_code=400, content={"message": "gpt weight path is required"})
|
return JSONResponse(status_code=400, content={"message": "gpt weight path is required"})
|
||||||
tts_pipeline.init_t2s_weights(weights_path)
|
tts_pipeline.init_t2s_weights(weights_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse(status_code=400, content={"message": f"change gpt weight failed", "Exception": str(e)})
|
return JSONResponse(status_code=400, content={"message": "change gpt weight failed", "Exception": str(e)})
|
||||||
|
|
||||||
return JSONResponse(status_code=200, content={"message": "success"})
|
return JSONResponse(status_code=200, content={"message": "success"})
|
||||||
|
|
||||||
@ -456,7 +454,7 @@ async def set_sovits_weights(weights_path: str = None):
|
|||||||
return JSONResponse(status_code=400, content={"message": "sovits weight path is required"})
|
return JSONResponse(status_code=400, content={"message": "sovits weight path is required"})
|
||||||
tts_pipeline.init_vits_weights(weights_path)
|
tts_pipeline.init_vits_weights(weights_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse(status_code=400, content={"message": f"change sovits weight failed", "Exception": str(e)})
|
return JSONResponse(status_code=400, content={"message": "change sovits weight failed", "Exception": str(e)})
|
||||||
return JSONResponse(status_code=200, content={"message": "success"})
|
return JSONResponse(status_code=200, content={"message": "success"})
|
||||||
|
|
||||||
|
|
||||||
@ -466,7 +464,7 @@ if __name__ == "__main__":
|
|||||||
if host == 'None': # 在调用时使用 -a None 参数,可以让api监听双栈
|
if host == 'None': # 在调用时使用 -a None 参数,可以让api监听双栈
|
||||||
host = None
|
host = None
|
||||||
uvicorn.run(app=APP, host=host, port=port, workers=1)
|
uvicorn.run(app=APP, host=host, port=port, workers=1)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
os.kill(os.getpid(), signal.SIGTERM)
|
os.kill(os.getpid(), signal.SIGTERM)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
@ -143,11 +143,7 @@
|
|||||||
"# 开启推理页面\n",
|
"# 开启推理页面\n",
|
||||||
"%cd /kaggle/working/GPT-SoVITS/\n",
|
"%cd /kaggle/working/GPT-SoVITS/\n",
|
||||||
"!npm install -g localtunnel\n",
|
"!npm install -g localtunnel\n",
|
||||||
"import subprocess\n",
|
|
||||||
"import threading\n",
|
"import threading\n",
|
||||||
"import time\n",
|
|
||||||
"import socket\n",
|
|
||||||
"import urllib.request\n",
|
|
||||||
"def iframe_thread(port):\n",
|
"def iframe_thread(port):\n",
|
||||||
" while True:\n",
|
" while True:\n",
|
||||||
" time.sleep(0.5)\n",
|
" time.sleep(0.5)\n",
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
from torch.nn.utils import weight_norm, spectral_norm
|
||||||
# from utils import init_weights, get_padding
|
# from utils import init_weights, get_padding
|
||||||
def get_padding(kernel_size, dilation=1):
|
def get_padding(kernel_size, dilation=1):
|
||||||
return int((kernel_size*dilation - dilation)/2)
|
return int((kernel_size*dilation - dilation)/2)
|
||||||
|
@ -1,24 +1,15 @@
|
|||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
import sys,os
|
import sys
|
||||||
import traceback
|
import os
|
||||||
AP_BWE_main_dir_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'AP_BWE_main')
|
AP_BWE_main_dir_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'AP_BWE_main')
|
||||||
sys.path.append(AP_BWE_main_dir_path)
|
sys.path.append(AP_BWE_main_dir_path)
|
||||||
import glob
|
|
||||||
import argparse
|
|
||||||
import json
|
import json
|
||||||
from re import S
|
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
|
||||||
import torchaudio
|
|
||||||
import time
|
|
||||||
import torchaudio.functional as aF
|
import torchaudio.functional as aF
|
||||||
# from attrdict import AttrDict####will be bug in py3.10
|
# from attrdict import AttrDict####will be bug in py3.10
|
||||||
|
|
||||||
from datasets1.dataset import amp_pha_stft, amp_pha_istft
|
from datasets1.dataset import amp_pha_stft, amp_pha_istft
|
||||||
from models.model import APNet_BWE_Model
|
from models.model import APNet_BWE_Model
|
||||||
import soundfile as sf
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from rich.progress import track
|
|
||||||
|
|
||||||
class AP_BWE():
|
class AP_BWE():
|
||||||
def __init__(self,device,DictToAttrRecursive,checkpoint_file=None):
|
def __init__(self,device,DictToAttrRecursive,checkpoint_file=None):
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import os,argparse
|
import os
|
||||||
|
import argparse
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from modelscope.pipelines import pipeline
|
from modelscope.pipelines import pipeline
|
||||||
|
@ -116,7 +116,7 @@ def update_i18n_json(json_file, standard_keys):
|
|||||||
if num_miss_translation > 0:
|
if num_miss_translation > 0:
|
||||||
print(f"\033[31m{'[Failed] Missing Translation'.ljust(KEY_LEN)}: {num_miss_translation}\033[0m")
|
print(f"\033[31m{'[Failed] Missing Translation'.ljust(KEY_LEN)}: {num_miss_translation}\033[0m")
|
||||||
else:
|
else:
|
||||||
print(f"\033[32m[Passed] All Keys Translated\033[0m")
|
print("\033[32m[Passed] All Keys Translated\033[0m")
|
||||||
# 将处理后的结果写入 JSON 文件
|
# 将处理后的结果写入 JSON 文件
|
||||||
with open(json_file, "w", encoding="utf-8") as f:
|
with open(json_file, "w", encoding="utf-8") as f:
|
||||||
json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=SORT_KEYS)
|
json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=SORT_KEYS)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import platform,os,traceback
|
import os
|
||||||
|
import traceback
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
@ -21,7 +22,7 @@ def load_audio(file, sr):
|
|||||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
raise RuntimeError(i18n("音频加载失败"))
|
raise RuntimeError(i18n("音频加载失败"))
|
||||||
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
import os,sys,numpy as np
|
import os
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
import traceback
|
import traceback
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
# parent_directory = os.path.dirname(os.path.abspath(__file__))
|
# parent_directory = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
@ -249,7 +249,7 @@ def main():
|
|||||||
soundfile.write(
|
soundfile.write(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
out,
|
out,
|
||||||
f"%s_%d.wav"
|
"%s_%d.wav"
|
||||||
% (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
|
% (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
|
||||||
),
|
),
|
||||||
chunk,
|
chunk,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import argparse,os
|
import argparse
|
||||||
|
import os
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -70,7 +70,7 @@ def b_change_index(index, batch):
|
|||||||
# )
|
# )
|
||||||
{
|
{
|
||||||
"__type__": "update",
|
"__type__": "update",
|
||||||
"label": f"Text",
|
"label": "Text",
|
||||||
"value": ""
|
"value": ""
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -54,7 +54,7 @@ class Attend(nn.Module):
|
|||||||
|
|
||||||
# similarity
|
# similarity
|
||||||
|
|
||||||
sim = einsum(f"b h i d, b h j d -> b h i j", q, k) * scale
|
sim = einsum("b h i d, b h j d -> b h i j", q, k) * scale
|
||||||
|
|
||||||
# attention
|
# attention
|
||||||
|
|
||||||
@ -63,6 +63,6 @@ class Attend(nn.Module):
|
|||||||
|
|
||||||
# aggregate values
|
# aggregate values
|
||||||
|
|
||||||
out = einsum(f"b h i j, b h j d -> b h i d", attn, v)
|
out = einsum("b h i j, b h j d -> b h i d", attn, v)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn, einsum, Tensor
|
from torch import nn
|
||||||
from torch.nn import Module, ModuleList
|
from torch.nn import Module, ModuleList
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from bs_roformer.attend import Attend
|
from bs_roformer.attend import Attend
|
||||||
from torch.utils.checkpoint import checkpoint
|
from torch.utils.checkpoint import checkpoint
|
||||||
|
|
||||||
from typing import Tuple, Optional, List, Callable
|
from typing import Tuple, Optional, Callable
|
||||||
# from beartype.typing import Tuple, Optional, List, Callable
|
# from beartype.typing import Tuple, Optional, List, Callable
|
||||||
# from beartype import beartype
|
# from beartype import beartype
|
||||||
|
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn, einsum, Tensor
|
from torch import nn
|
||||||
from torch.nn import Module, ModuleList
|
from torch.nn import Module, ModuleList
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from bs_roformer.attend import Attend
|
from bs_roformer.attend import Attend
|
||||||
from torch.utils.checkpoint import checkpoint
|
from torch.utils.checkpoint import checkpoint
|
||||||
|
|
||||||
from typing import Tuple, Optional, List, Callable
|
from typing import Tuple, Optional, Callable
|
||||||
# from beartype.typing import Tuple, Optional, List, Callable
|
# from beartype.typing import Tuple, Optional, List, Callable
|
||||||
# from beartype import beartype
|
# from beartype import beartype
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import os,sys
|
import os
|
||||||
parent_directory = os.path.dirname(os.path.abspath(__file__))
|
parent_directory = os.path.dirname(os.path.abspath(__file__))
|
||||||
import logging,pdb
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
import librosa
|
import librosa
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
import traceback,gradio as gr
|
import traceback
|
||||||
|
import gradio as gr
|
||||||
import logging
|
import logging
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
from tools.my_utils import clean_path
|
from tools.my_utils import clean_path
|
||||||
i18n = I18nAuto()
|
i18n = I18nAuto()
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
import librosa,ffmpeg
|
import ffmpeg
|
||||||
import soundfile as sf
|
|
||||||
import torch
|
import torch
|
||||||
import sys
|
import sys
|
||||||
from mdxnet import MDXNetDereverb
|
from mdxnet import MDXNetDereverb
|
||||||
|
16
webui.py
16
webui.py
@ -1,4 +1,5 @@
|
|||||||
import os,sys
|
import os
|
||||||
|
import sys
|
||||||
if len(sys.argv)==1:sys.argv.append('v2')
|
if len(sys.argv)==1:sys.argv.append('v2')
|
||||||
version="v1"if sys.argv[1]=="v1" else"v2"
|
version="v1"if sys.argv[1]=="v1" else"v2"
|
||||||
os.environ["version"]=version
|
os.environ["version"]=version
|
||||||
@ -6,7 +7,11 @@ now_dir = os.getcwd()
|
|||||||
sys.path.insert(0, now_dir)
|
sys.path.insert(0, now_dir)
|
||||||
import warnings
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
import json,yaml,torch,pdb,re,shutil
|
import json
|
||||||
|
import yaml
|
||||||
|
import torch
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
import platform
|
import platform
|
||||||
import psutil
|
import psutil
|
||||||
import signal
|
import signal
|
||||||
@ -45,21 +50,18 @@ for site_packages_root in site_packages_roots:
|
|||||||
% (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir)
|
% (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir)
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
except PermissionError as e:
|
except PermissionError:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
from tools import my_utils
|
from tools import my_utils
|
||||||
import shutil
|
import shutil
|
||||||
import pdb
|
|
||||||
import subprocess
|
import subprocess
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
import signal
|
|
||||||
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
|
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
|
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
|
||||||
os.environ["language"]=language
|
os.environ["language"]=language
|
||||||
i18n = I18nAuto(language=language)
|
i18n = I18nAuto(language=language)
|
||||||
from scipy.io import wavfile
|
from tools.my_utils import check_for_existance, check_details
|
||||||
from tools.my_utils import load_audio, check_for_existance, check_details
|
|
||||||
from multiprocessing import cpu_count
|
from multiprocessing import cpu_count
|
||||||
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
|
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user