mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-06 03:57:44 +08:00
Merge remote-tracking branch 'upstream/main' into Ruff-Format
This commit is contained in:
commit
27ee75e47b
@ -1,38 +1,40 @@
|
|||||||
from copy import deepcopy
|
import gc
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import gc
|
|
||||||
import random
|
import random
|
||||||
import traceback
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
import torchaudio
|
import torchaudio
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
import ffmpeg
|
|
||||||
import os
|
import os
|
||||||
from typing import List, Tuple, Union
|
from typing import List, Tuple, Union
|
||||||
|
|
||||||
|
import ffmpeg
|
||||||
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import yaml
|
import yaml
|
||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
|
||||||
from tools.audio_sr import AP_BWE
|
|
||||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||||
|
from BigVGAN.bigvgan import BigVGAN
|
||||||
from feature_extractor.cnhubert import CNHubert
|
from feature_extractor.cnhubert import CNHubert
|
||||||
|
from module.mel_processing import mel_spectrogram_torch, spectrogram_torch
|
||||||
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
from module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||||
from peft import LoraConfig, get_peft_model
|
from peft import LoraConfig, get_peft_model
|
||||||
import librosa
|
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||||
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
|
|
||||||
|
from tools.audio_sr import AP_BWE
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
from tools.my_utils import load_audio
|
from tools.my_utils import load_audio
|
||||||
from module.mel_processing import spectrogram_torch
|
|
||||||
from TTS_infer_pack.text_segmentation_method import splits
|
from TTS_infer_pack.text_segmentation_method import splits
|
||||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||||
from BigVGAN.bigvgan import BigVGAN
|
|
||||||
from module.mel_processing import mel_spectrogram_torch
|
|
||||||
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
|
||||||
|
|
||||||
language = os.environ.get("language", "Auto")
|
language = os.environ.get("language", "Auto")
|
||||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||||
@ -461,8 +463,6 @@ class TTS:
|
|||||||
n_speakers=self.configs.n_speakers,
|
n_speakers=self.configs.n_speakers,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
if hasattr(vits_model, "enc_q"):
|
|
||||||
del vits_model.enc_q
|
|
||||||
self.configs.is_v3_synthesizer = False
|
self.configs.is_v3_synthesizer = False
|
||||||
else:
|
else:
|
||||||
vits_model = SynthesizerTrnV3(
|
vits_model = SynthesizerTrnV3(
|
||||||
@ -473,6 +473,8 @@ class TTS:
|
|||||||
)
|
)
|
||||||
self.configs.is_v3_synthesizer = True
|
self.configs.is_v3_synthesizer = True
|
||||||
self.init_bigvgan()
|
self.init_bigvgan()
|
||||||
|
if "pretrained" not in weights_path and hasattr(vits_model, "enc_q"):
|
||||||
|
del vits_model.enc_q
|
||||||
|
|
||||||
if if_lora_v3 == False:
|
if if_lora_v3 == False:
|
||||||
print(
|
print(
|
||||||
|
@ -9,9 +9,10 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import traceback
|
import traceback
|
||||||
import torchaudio
|
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
import torchaudio
|
||||||
|
|
||||||
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
||||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||||
@ -22,10 +23,11 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
|||||||
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
||||||
warnings.simplefilter(action="ignore", category=FutureWarning)
|
warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import json
|
|
||||||
import torch
|
import torch
|
||||||
from text.LangSegmenter import LangSegmenter
|
from text.LangSegmenter import LangSegmenter
|
||||||
|
|
||||||
@ -91,16 +93,17 @@ is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
|||||||
# is_half=False
|
# is_half=False
|
||||||
punctuation = set(["!", "?", "…", ",", ".", "-", " "])
|
punctuation = set(["!", "?", "…", ",", ".", "-", " "])
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
|
||||||
import numpy as np
|
|
||||||
import librosa
|
import librosa
|
||||||
|
import numpy as np
|
||||||
from feature_extractor import cnhubert
|
from feature_extractor import cnhubert
|
||||||
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
|
|
||||||
cnhubert.cnhubert_base_path = cnhubert_base_path
|
cnhubert.cnhubert_base_path = cnhubert_base_path
|
||||||
|
|
||||||
from GPT_SoVITS.module.models import SynthesizerTrn, SynthesizerTrnV3
|
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
from GPT_SoVITS.module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||||
|
|
||||||
|
|
||||||
def set_seed(seed):
|
def set_seed(seed):
|
||||||
if seed == -1:
|
if seed == -1:
|
||||||
@ -115,12 +118,14 @@ def set_seed(seed):
|
|||||||
|
|
||||||
# set_seed(42)
|
# set_seed(42)
|
||||||
|
|
||||||
|
from time import time as ttime
|
||||||
|
|
||||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||||
|
from peft import LoraConfig, get_peft_model
|
||||||
from text import cleaned_text_to_sequence
|
from text import cleaned_text_to_sequence
|
||||||
from text.cleaner import clean_text
|
from text.cleaner import clean_text
|
||||||
from time import time as ttime
|
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
from peft import LoraConfig, get_peft_model
|
|
||||||
|
|
||||||
language = os.environ.get("language", "Auto")
|
language = os.environ.get("language", "Auto")
|
||||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||||
@ -265,10 +270,11 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
prompt_language_update,
|
prompt_language_update,
|
||||||
text_update,
|
text_update,
|
||||||
text_language_update,
|
text_language_update,
|
||||||
{"__type__": "update", "visible": visible_sample_steps},
|
{"__type__": "update", "visible": visible_sample_steps, "value": 32},
|
||||||
{"__type__": "update", "visible": visible_inp_refs},
|
{"__type__": "update", "visible": visible_inp_refs},
|
||||||
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
||||||
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
||||||
|
{"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False},
|
||||||
)
|
)
|
||||||
|
|
||||||
dict_s2 = load_sovits_new(sovits_path)
|
dict_s2 = load_sovits_new(sovits_path)
|
||||||
@ -329,6 +335,19 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
# torch.save(vq_model.state_dict(),"merge_win.pth")
|
# torch.save(vq_model.state_dict(),"merge_win.pth")
|
||||||
vq_model.eval()
|
vq_model.eval()
|
||||||
|
|
||||||
|
yield (
|
||||||
|
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||||
|
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||||
|
prompt_text_update,
|
||||||
|
prompt_language_update,
|
||||||
|
text_update,
|
||||||
|
text_language_update,
|
||||||
|
{"__type__": "update", "visible": visible_sample_steps, "value": 32},
|
||||||
|
{"__type__": "update", "visible": visible_inp_refs},
|
||||||
|
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
||||||
|
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
||||||
|
{"__type__": "update", "value": i18n("合成语音"), "interactive": True},
|
||||||
|
)
|
||||||
with open("./weight.json") as f:
|
with open("./weight.json") as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
data = json.loads(data)
|
data = json.loads(data)
|
||||||
@ -530,7 +549,7 @@ def get_phones_and_bert(text, language, version, final=False):
|
|||||||
return phones, bert.to(dtype), norm_text
|
return phones, bert.to(dtype), norm_text
|
||||||
|
|
||||||
|
|
||||||
from module.mel_processing import spectrogram_torch, mel_spectrogram_torch
|
from module.mel_processing import mel_spectrogram_torch, spectrogram_torch
|
||||||
|
|
||||||
spec_min = -12
|
spec_min = -12
|
||||||
spec_max = 2
|
spec_max = 2
|
||||||
@ -1020,7 +1039,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")
|
label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")
|
||||||
+ i18n("v3暂不支持该模式,使用了会报错。"),
|
+ i18n("v3暂不支持该模式,使用了会报错。"),
|
||||||
value=False,
|
value=False,
|
||||||
interactive=True,
|
interactive=True if model_version != "v3" else False,
|
||||||
show_label=True,
|
show_label=True,
|
||||||
scale=1,
|
scale=1,
|
||||||
)
|
)
|
||||||
@ -1137,7 +1156,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
# phoneme=gr.Textbox(label=i18n("音素框"), value="")
|
# phoneme=gr.Textbox(label=i18n("音素框"), value="")
|
||||||
# get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
|
# get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
inference_button = gr.Button(i18n("合成语音"), variant="primary", size="lg", scale=25)
|
inference_button = gr.Button(value=i18n("合成语音"), variant="primary", size="lg", scale=25)
|
||||||
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
||||||
|
|
||||||
inference_button.click(
|
inference_button.click(
|
||||||
@ -1176,6 +1195,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
inp_refs,
|
inp_refs,
|
||||||
ref_text_free,
|
ref_text_free,
|
||||||
if_sr_Checkbox,
|
if_sr_Checkbox,
|
||||||
|
inference_button,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
|
GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
|
||||||
|
@ -7,11 +7,11 @@
|
|||||||
全部按日文识别
|
全部按日文识别
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import random
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
@ -47,11 +47,13 @@ gpt_path = os.environ.get("gpt_path", None)
|
|||||||
sovits_path = os.environ.get("sovits_path", None)
|
sovits_path = os.environ.get("sovits_path", None)
|
||||||
cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
|
cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
|
||||||
bert_path = os.environ.get("bert_path", None)
|
bert_path = os.environ.get("bert_path", None)
|
||||||
version = os.environ.get("version", "v2")
|
version = model_version = os.environ.get("version", "v2")
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
from TTS_infer_pack.TTS import TTS, TTS_Config, NO_PROMPT_ERROR
|
from inference_webui import DictToAttrRecursive
|
||||||
from TTS_infer_pack.text_segmentation_method import get_method
|
from TTS_infer_pack.text_segmentation_method import get_method
|
||||||
|
from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config
|
||||||
|
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
|
|
||||||
language = os.environ.get("language", "Auto")
|
language = os.environ.get("language", "Auto")
|
||||||
@ -254,21 +256,18 @@ def get_weights_names(GPT_weight_root, SoVITS_weight_root):
|
|||||||
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
||||||
|
|
||||||
|
|
||||||
from process_ckpt import get_sovits_version_from_path_fast
|
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
|
||||||
|
|
||||||
|
|
||||||
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
|
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
|
||||||
global version, dict_language
|
global version, model_version, dict_language, if_lora_v3
|
||||||
version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path)
|
version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path)
|
||||||
|
# print(sovits_path,version, model_version, if_lora_v3)
|
||||||
if if_lora_v3 and not os.path.exists(path_sovits_v3):
|
if if_lora_v3 == True and is_exist_s2gv3 == False: #
|
||||||
info = path_sovits_v3 + i18n("SoVITS V3 底模缺失,无法加载相应 LoRA 权重")
|
info = "GPT_SoVITS/pretrained_models/s2Gv3.pth" + i18n("SoVITS V3 底模缺失,无法加载相应 LoRA 权重")
|
||||||
gr.Warning(info)
|
gr.Warning(info)
|
||||||
raise FileExistsError(info)
|
raise FileExistsError(info)
|
||||||
|
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
|
||||||
tts_pipeline.init_vits_weights(sovits_path)
|
|
||||||
|
|
||||||
dict_language = dict_language_v1 if tts_pipeline.configs.version == "v1" else dict_language_v2
|
|
||||||
if prompt_language is not None and text_language is not None:
|
if prompt_language is not None and text_language is not None:
|
||||||
if prompt_language in list(dict_language.keys()):
|
if prompt_language in list(dict_language.keys()):
|
||||||
prompt_text_update, prompt_language_update = (
|
prompt_text_update, prompt_language_update = (
|
||||||
@ -289,6 +288,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
else:
|
else:
|
||||||
visible_sample_steps = False
|
visible_sample_steps = False
|
||||||
visible_inp_refs = True
|
visible_inp_refs = True
|
||||||
|
# prompt_language,text_language,prompt_text,prompt_language,text,text_language,inp_refs,ref_text_free,
|
||||||
yield (
|
yield (
|
||||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||||
{"__type__": "update", "choices": list(dict_language.keys())},
|
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||||
@ -296,12 +296,25 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|||||||
prompt_language_update,
|
prompt_language_update,
|
||||||
text_update,
|
text_update,
|
||||||
text_language_update,
|
text_language_update,
|
||||||
{"__type__": "update", "visible": visible_sample_steps},
|
{"__type__": "update", "interactive": visible_sample_steps, "value": 32},
|
||||||
{"__type__": "update", "visible": visible_inp_refs},
|
{"__type__": "update", "visible": visible_inp_refs},
|
||||||
{"__type__": "update", "value": False, "interactive": True if model_version != "v3" else False},
|
{"__type__": "update", "interactive": True if model_version != "v3" else False},
|
||||||
{"__type__": "update", "visible": True if model_version == "v3" else False},
|
{"__type__": "update", "value": i18n("模型加载中,请等待"), "interactive": False},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tts_pipeline.init_vits_weights(sovits_path)
|
||||||
|
yield (
|
||||||
|
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||||
|
{"__type__": "update", "choices": list(dict_language.keys())},
|
||||||
|
prompt_text_update,
|
||||||
|
prompt_language_update,
|
||||||
|
text_update,
|
||||||
|
text_language_update,
|
||||||
|
{"__type__": "update", "interactive": visible_sample_steps, "value": 32},
|
||||||
|
{"__type__": "update", "visible": visible_inp_refs},
|
||||||
|
{"__type__": "update", "interactive": True if model_version != "v3" else False},
|
||||||
|
{"__type__": "update", "value": i18n("合成语音"), "interactive": True},
|
||||||
|
)
|
||||||
with open("./weight.json") as f:
|
with open("./weight.json") as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
data = json.loads(data)
|
data = json.loads(data)
|
||||||
@ -341,7 +354,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath")
|
inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath")
|
||||||
inp_refs = gr.File(label=i18n("辅参考音频(可选多个,或不选)"), file_count="multiple")
|
inp_refs = gr.File(
|
||||||
|
label=i18n("辅参考音频(可选多个,或不选)"),
|
||||||
|
file_count="multiple",
|
||||||
|
visible=True if model_version != "v3" else False,
|
||||||
|
)
|
||||||
prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2)
|
prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
prompt_language = gr.Dropdown(
|
prompt_language = gr.Dropdown(
|
||||||
@ -351,7 +368,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
ref_text_free = gr.Checkbox(
|
ref_text_free = gr.Checkbox(
|
||||||
label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"),
|
label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"),
|
||||||
value=False,
|
value=False,
|
||||||
interactive=True,
|
interactive=True if model_version != "v3" else False,
|
||||||
show_label=True,
|
show_label=True,
|
||||||
)
|
)
|
||||||
gr.Markdown(
|
gr.Markdown(
|
||||||
@ -465,8 +482,19 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
SoVITS_dropdown.change(
|
SoVITS_dropdown.change(
|
||||||
change_sovits_weights,
|
change_sovits_weights,
|
||||||
[SoVITS_dropdown, prompt_language, text_language],
|
[SoVITS_dropdown, prompt_language, text_language],
|
||||||
[prompt_language, text_language, prompt_text, prompt_language, text, text_language],
|
[
|
||||||
)
|
prompt_language,
|
||||||
|
text_language,
|
||||||
|
prompt_text,
|
||||||
|
prompt_language,
|
||||||
|
text,
|
||||||
|
text_language,
|
||||||
|
sample_steps,
|
||||||
|
inp_refs,
|
||||||
|
ref_text_free,
|
||||||
|
inference_button,
|
||||||
|
],
|
||||||
|
) #
|
||||||
GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
|
GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
|
||||||
|
|
||||||
with gr.Group():
|
with gr.Group():
|
||||||
|
@ -63,7 +63,7 @@ def download_and_decompress(model_dir: str = "G2PWModel/"):
|
|||||||
extract_dir = os.path.join(parent_directory, "G2PWModel_1.1")
|
extract_dir = os.path.join(parent_directory, "G2PWModel_1.1")
|
||||||
extract_dir_new = os.path.join(parent_directory, "G2PWModel")
|
extract_dir_new = os.path.join(parent_directory, "G2PWModel")
|
||||||
print("Downloading g2pw model...")
|
print("Downloading g2pw model...")
|
||||||
modelscope_url = "https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
|
modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"#"https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
|
||||||
with requests.get(modelscope_url, stream=True) as r:
|
with requests.get(modelscope_url, stream=True) as r:
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
with open(zip_dir, "wb") as f:
|
with open(zip_dir, "wb") as f:
|
||||||
|
@ -286,3 +286,17 @@ https://github.com/RVC-Boss/GPT-SoVITS/pull/2112 https://github.com/RVC-Boss/GPT
|
|||||||
修复短文本语种选择出错 https://github.com/RVC-Boss/GPT-SoVITS/pull/2122
|
修复短文本语种选择出错 https://github.com/RVC-Boss/GPT-SoVITS/pull/2122
|
||||||
|
|
||||||
修复v3sovits未传参以支持调节语速
|
修复v3sovits未传参以支持调节语速
|
||||||
|
|
||||||
|
### 202503
|
||||||
|
|
||||||
|
修复一批由依赖的库版本不对导致的问题https://github.com/RVC-Boss/GPT-SoVITS/commit/6c468583c5566e5fbb4fb805e4cc89c403e997b8
|
||||||
|
|
||||||
|
修复模型加载异步逻辑https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa
|
||||||
|
|
||||||
|
修复其他若干bug
|
||||||
|
|
||||||
|
重点更新:
|
||||||
|
|
||||||
|
1-v3支持并行推理 https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa
|
||||||
|
|
||||||
|
2-整合包修复onnxruntime GPU推理的支持,影响:(1)g2pw有个onnx模型原先是CPU推理现在用GPU,显著降低推理的CPU瓶颈 (2)foxjoy去混响模型现在可使用GPU推理
|
||||||
|
10
webui.py
10
webui.py
@ -435,9 +435,9 @@ def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, so
|
|||||||
cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language)
|
cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language)
|
||||||
else:
|
else:
|
||||||
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
|
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
|
||||||
#####v3暂不支持加速推理
|
# #####v3暂不支持加速推理
|
||||||
if version == "v3":
|
# if version=="v3":
|
||||||
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
|
# cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language)
|
||||||
if p_tts_inference is None:
|
if p_tts_inference is None:
|
||||||
os.environ["gpt_path"] = gpt_path if "/" in gpt_path else "%s/%s" % (GPT_weight_root, gpt_path)
|
os.environ["gpt_path"] = gpt_path if "/" in gpt_path else "%s/%s" % (GPT_weight_root, gpt_path)
|
||||||
os.environ["sovits_path"] = sovits_path if "/" in sovits_path else "%s/%s" % (SoVITS_weight_root, sovits_path)
|
os.environ["sovits_path"] = sovits_path if "/" in sovits_path else "%s/%s" % (SoVITS_weight_root, sovits_path)
|
||||||
@ -1312,9 +1312,9 @@ def switch_version(version_):
|
|||||||
"value": False if not if_force_ckpt else True,
|
"value": False if not if_force_ckpt else True,
|
||||||
"interactive": True if not if_force_ckpt else False,
|
"interactive": True if not if_force_ckpt else False,
|
||||||
},
|
},
|
||||||
{"__type__": "update", "interactive": False if version == "v3" else True, "value": False},
|
{"__type__": "update", "interactive": True, "value": False},
|
||||||
{"__type__": "update", "visible": True if version == "v3" else False},
|
{"__type__": "update", "visible": True if version == "v3" else False},
|
||||||
)
|
) # {'__type__': 'update', "interactive": False if version == "v3" else True, "value": False}, \ ####batch infer
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists("GPT_SoVITS/text/G2PWModel"):
|
if os.path.exists("GPT_SoVITS/text/G2PWModel"):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user