mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Merge remote-tracking branch 'upstream/main' into Ruff-Format
This commit is contained in:
commit
27ee75e47b
@ -1,38 +1,40 @@
|
||||
from copy import deepcopy
|
||||
import gc
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
import gc
|
||||
import random
|
||||
import traceback
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from copy import deepcopy
|
||||
|
||||
import torchaudio
|
||||
from tqdm import tqdm
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
import ffmpeg
|
||||
import os
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import ffmpeg
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import yaml
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
from tools.audio_sr import AP_BWE
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from BigVGAN.bigvgan import BigVGAN
|
||||
from feature_extractor.cnhubert import CNHubert
|
||||
from module.mel_processing import mel_spectrogram_torch, spectrogram_torch
|
||||
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||
from peft import LoraConfig, get_peft_model
|
||||
import librosa
|
||||
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
|
||||
from tools.audio_sr import AP_BWE
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from tools.my_utils import load_audio
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from TTS_infer_pack.text_segmentation_method import splits
|
||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||
from BigVGAN.bigvgan import BigVGAN
|
||||
from module.mel_processing import mel_spectrogram_torch
|
||||
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||
|
||||
language = os.environ.get("language", "Auto")
|
||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
@ -461,8 +463,6 @@ class TTS:
|
||||
n_speakers=self.configs.n_speakers,
|
||||
**kwargs,
|
||||
)
|
||||
if hasattr(vits_model, "enc_q"):
|
||||
del vits_model.enc_q
|
||||
self.configs.is_v3_synthesizer = False
|
||||
else:
|
||||
vits_model = SynthesizerTrnV3(
|
||||
@ -473,6 +473,8 @@ class TTS:
|
||||
)
|
||||
self.configs.is_v3_synthesizer = True
|
||||
self.init_bigvgan()
|
||||
if "pretrained" not in weights_path and hasattr(vits_model, "enc_q"):
|
||||
del vits_model.enc_q
|
||||
|
||||
if if_lora_v3 == False:
|
||||
print(
|
||||
|
@ -9,9 +9,10 @@
|
||||
|
||||
import logging
|
||||
import traceback
|
||||
import torchaudio
|
||||
import warnings
|
||||
|
||||
import torchaudio
|
||||
|
||||
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||
@ -22,10 +23,11 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
||||
warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
|
||||
import torch
|
||||
from text.LangSegmenter import LangSegmenter
|
||||
|
||||
@ -91,16 +93,17 @@ is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||
# is_half=False
|
||||
punctuation = set(["!", "?", "…", ",", ".", "-", " "])
|
||||
import gradio as gr
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
import numpy as np
|
||||
import librosa
|
||||
import numpy as np
|
||||
from feature_extractor import cnhubert
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
|
||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||
|
||||
from GPT_SoVITS.module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||
import random
|
||||
|
||||
from GPT_SoVITS.module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||
|
||||
|
||||
def set_seed(seed):
|
||||
if seed == -1:
|
||||
@ -115,12 +118,14 @@ def set_seed(seed):
|
||||
|
||||
# set_seed(42)
|
||||
|
||||
from time import time as ttime
|
||||
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from peft import LoraConfig, get_peft_model
|
||||
from text import cleaned_text_to_sequence
|
||||
from text.cleaner import clean_text
|
||||
from time import time as ttime
|
||||
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from peft import LoraConfig, get_peft_model
|
||||
|
||||
language = os.environ.get("language", "Auto")
|
||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
@ -265,10 +270,11 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
||||
prompt_language_update,
|
||||
text_update,
|
||||
text_language_update,
|
||||
{"__type__": "update", "visible": visible_sample_steps},
|
||||
{"__type__": "update", "visible": visible_sample_steps, "value": 32},
|
||||
{"__type__": "update", "visible": visible_inp_refs},
|
||||
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
||||
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
||||
{"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False},
|
||||
)
|
||||
|
||||
dict_s2 = load_sovits_new(sovits_path)
|
||||
@ -329,6 +335,19 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
||||
# torch.save(vq_model.state_dict(),"merge_win.pth")
|
||||
vq_model.eval()
|
||||
|
||||
yield (
|
||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||
prompt_text_update,
|
||||
prompt_language_update,
|
||||
text_update,
|
||||
text_language_update,
|
||||
{"__type__": "update", "visible": visible_sample_steps, "value": 32},
|
||||
{"__type__": "update", "visible": visible_inp_refs},
|
||||
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
||||
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
||||
{"__type__": "update", "value": i18n("合成语音"), "interactive": True},
|
||||
)
|
||||
with open("./weight.json") as f:
|
||||
data = f.read()
|
||||
data = json.loads(data)
|
||||
@ -530,7 +549,7 @@ def get_phones_and_bert(text, language, version, final=False):
|
||||
return phones, bert.to(dtype), norm_text
|
||||
|
||||
|
||||
from module.mel_processing import spectrogram_torch, mel_spectrogram_torch
|
||||
from module.mel_processing import mel_spectrogram_torch, spectrogram_torch
|
||||
|
||||
spec_min = -12
|
||||
spec_max = 2
|
||||
@ -1020,7 +1039,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")
|
||||
+ i18n("v3暂不支持该模式,使用了会报错。"),
|
||||
value=False,
|
||||
interactive=True,
|
||||
interactive=True if model_version != "v3" else False,
|
||||
show_label=True,
|
||||
scale=1,
|
||||
)
|
||||
@ -1137,7 +1156,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
# phoneme=gr.Textbox(label=i18n("音素框"), value="")
|
||||
# get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
|
||||
with gr.Row():
|
||||
inference_button = gr.Button(i18n("合成语音"), variant="primary", size="lg", scale=25)
|
||||
inference_button = gr.Button(value=i18n("合成语音"), variant="primary", size="lg", scale=25)
|
||||
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
||||
|
||||
inference_button.click(
|
||||
@ -1176,6 +1195,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
inp_refs,
|
||||
ref_text_free,
|
||||
if_sr_Checkbox,
|
||||
inference_button,
|
||||
],
|
||||
)
|
||||
GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
|
||||
|
@ -7,11 +7,11 @@
|
||||
全部按日文识别
|
||||
"""
|
||||
|
||||
import random
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
|
||||
now_dir = os.getcwd()
|
||||
@ -47,11 +47,13 @@ gpt_path = os.environ.get("gpt_path", None)
|
||||
sovits_path = os.environ.get("sovits_path", None)
|
||||
cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
|
||||
bert_path = os.environ.get("bert_path", None)
|
||||
version = os.environ.get("version", "v2")
|
||||
version = model_version = os.environ.get("version", "v2")
|
||||
|
||||
import gradio as gr
|
||||
from TTS_infer_pack.TTS import TTS, TTS_Config, NO_PROMPT_ERROR
|
||||
from inference_webui import DictToAttrRecursive
|
||||
from TTS_infer_pack.text_segmentation_method import get_method
|
||||
from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config
|
||||
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
|
||||
language = os.environ.get("language", "Auto")
|
||||
@ -254,21 +256,18 @@ def get_weights_names(GPT_weight_root, SoVITS_weight_root):
|
||||
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
||||
|
||||
|
||||
from process_ckpt import get_sovits_version_from_path_fast
|
||||
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||
|
||||
|
||||
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
|
||||
global version, dict_language
|
||||
global version, model_version, dict_language, if_lora_v3
|
||||
version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path)
|
||||
|
||||
if if_lora_v3 and not os.path.exists(path_sovits_v3):
|
||||
info = path_sovits_v3 + i18n("SoVITS V3 底模缺失,无法加载相应 LoRA 权重")
|
||||
# print(sovits_path,version, model_version, if_lora_v3)
|
||||
if if_lora_v3 == True and is_exist_s2gv3 == False: #
|
||||
info = "GPT_SoVITS/pretrained_models/s2Gv3.pth" + i18n("SoVITS V3 底模缺失,无法加载相应 LoRA 权重")
|
||||
gr.Warning(info)
|
||||
raise FileExistsError(info)
|
||||
|
||||
tts_pipeline.init_vits_weights(sovits_path)
|
||||
|
||||
dict_language = dict_language_v1 if tts_pipeline.configs.version == "v1" else dict_language_v2
|
||||
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
||||
if prompt_language is not None and text_language is not None:
|
||||
if prompt_language in list(dict_language.keys()):
|
||||
prompt_text_update, prompt_language_update = (
|
||||
@ -289,6 +288,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
||||
else:
|
||||
visible_sample_steps = False
|
||||
visible_inp_refs = True
|
||||
# prompt_language,text_language,prompt_text,prompt_language,text,text_language,inp_refs,ref_text_free,
|
||||
yield (
|
||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||
@ -296,12 +296,25 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
||||
prompt_language_update,
|
||||
text_update,
|
||||
text_language_update,
|
||||
{"__type__": "update", "visible": visible_sample_steps},
|
||||
{"__type__": "update", "interactive": visible_sample_steps, "value": 32},
|
||||
{"__type__": "update", "visible": visible_inp_refs},
|
||||
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
||||
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
||||
{"__type__": "update", "interactive": True if model_version != "v3" else False},
|
||||
{"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False},
|
||||
)
|
||||
|
||||
tts_pipeline.init_vits_weights(sovits_path)
|
||||
yield (
|
||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||
prompt_text_update,
|
||||
prompt_language_update,
|
||||
text_update,
|
||||
text_language_update,
|
||||
{"__type__": "update", "interactive": visible_sample_steps, "value": 32},
|
||||
{"__type__": "update", "visible": visible_inp_refs},
|
||||
{"__type__": "update", "interactive": True if model_version != "v3" else False},
|
||||
{"__type__": "update", "value": i18n("合成语音"), "interactive": True},
|
||||
)
|
||||
with open("./weight.json") as f:
|
||||
data = f.read()
|
||||
data = json.loads(data)
|
||||
@ -341,7 +354,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
||||
with gr.Row():
|
||||
inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath")
|
||||
inp_refs = gr.File(label=i18n("辅参考音频(可选多个,或不选)"), file_count="multiple")
|
||||
inp_refs = gr.File(
|
||||
label=i18n("辅参考音频(可选多个,或不选)"),
|
||||
file_count="multiple",
|
||||
visible=True if model_version != "v3" else False,
|
||||
)
|
||||
prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2)
|
||||
with gr.Row():
|
||||
prompt_language = gr.Dropdown(
|
||||
@ -351,7 +368,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
ref_text_free = gr.Checkbox(
|
||||
label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"),
|
||||
value=False,
|
||||
interactive=True,
|
||||
interactive=True if model_version != "v3" else False,
|
||||
show_label=True,
|
||||
)
|
||||
gr.Markdown(
|
||||
@ -465,8 +482,19 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
SoVITS_dropdown.change(
|
||||
change_sovits_weights,
|
||||
[SoVITS_dropdown, prompt_language, text_language],
|
||||
[prompt_language, text_language, prompt_text, prompt_language, text, text_language],
|
||||
)
|
||||
[
|
||||
prompt_language,
|
||||
text_language,
|
||||
prompt_text,
|
||||
prompt_language,
|
||||
text,
|
||||
text_language,
|
||||
sample_steps,
|
||||
inp_refs,
|
||||
ref_text_free,
|
||||
inference_button,
|
||||
],
|
||||
) #
|
||||
GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
|
||||
|
||||
with gr.Group():
|
||||
|
@ -63,7 +63,7 @@ def download_and_decompress(model_dir: str = "G2PWModel/"):
|
||||
extract_dir = os.path.join(parent_directory, "G2PWModel_1.1")
|
||||
extract_dir_new = os.path.join(parent_directory, "G2PWModel")
|
||||
print("Downloading g2pw model...")
|
||||
modelscope_url = "https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
|
||||
modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"#"https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
|
||||
with requests.get(modelscope_url, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with open(zip_dir, "wb") as f:
|
||||
|
@ -286,3 +286,17 @@ https://github.com/RVC-Boss/GPT-SoVITS/pull/2112 https://github.com/RVC-Boss/GPT
|
||||
修复短文本语种选择出错 https://github.com/RVC-Boss/GPT-SoVITS/pull/2122
|
||||
|
||||
修复v3sovits未传参以支持调节语速
|
||||
|
||||
### 202503
|
||||
|
||||
修复一批由依赖的库版本不对导致的问题https://github.com/RVC-Boss/GPT-SoVITS/commit/6c468583c5566e5fbb4fb805e4cc89c403e997b8
|
||||
|
||||
修复模型加载异步逻辑https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa
|
||||
|
||||
修复其他若干bug
|
||||
|
||||
重点更新:
|
||||
|
||||
1-v3支持并行推理 https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa
|
||||
|
||||
2-整合包修复onnxruntime GPU推理的支持,影响:(1)g2pw有个onnx模型原先是CPU推理现在用GPU,显著降低推理的CPU瓶颈 (2)foxjoy去混响模型现在可使用GPU推理
|
||||
|
10
webui.py
10
webui.py
@ -435,9 +435,9 @@ def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, so
|
||||
cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language)
|
||||
else:
|
||||
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
|
||||
#####v3暂不支持加速推理
|
||||
if version == "v3":
|
||||
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
|
||||
# #####v3暂不支持加速推理
|
||||
# if version=="v3":
|
||||
# cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language)
|
||||
if p_tts_inference is None:
|
||||
os.environ["gpt_path"] = gpt_path if "/" in gpt_path else "%s/%s" % (GPT_weight_root, gpt_path)
|
||||
os.environ["sovits_path"] = sovits_path if "/" in sovits_path else "%s/%s" % (SoVITS_weight_root, sovits_path)
|
||||
@ -1312,9 +1312,9 @@ def switch_version(version_):
|
||||
"value": False if not if_force_ckpt else True,
|
||||
"interactive": True if not if_force_ckpt else False,
|
||||
},
|
||||
{"__type__": "update", "interactive": False if version == "v3" else True, "value": False},
|
||||
{"__type__": "update", "interactive": True, "value": False},
|
||||
{"__type__": "update", "visible": True if version == "v3" else False},
|
||||
)
|
||||
) # {'__type__': 'update', "interactive": False if version == "v3" else True, "value": False}, \ ####batch infer
|
||||
|
||||
|
||||
if os.path.exists("GPT_SoVITS/text/G2PWModel"):
|
||||
|
Loading…
x
Reference in New Issue
Block a user