diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index 5cd618e..1571ef7 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -462,8 +462,6 @@ class TTS: n_speakers=self.configs.n_speakers, **kwargs ) - if hasattr(vits_model, "enc_q"): - del vits_model.enc_q self.configs.is_v3_synthesizer = False else: vits_model = SynthesizerTrnV3( @@ -474,7 +472,8 @@ class TTS: ) self.configs.is_v3_synthesizer = True self.init_bigvgan() - + if "pretrained" not in weights_path and hasattr(vits_model, "enc_q"): + del vits_model.enc_q if if_lora_v3==False: print(f"Loading VITS weights from {weights_path}. {vits_model.load_state_dict(dict_s2['weight'], strict=False)}") diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index afae2cf..dd9086f 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -238,7 +238,7 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None): else: visible_sample_steps=False visible_inp_refs=True - yield {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update,{"__type__": "update", "visible": visible_sample_steps},{"__type__": "update", "visible": visible_inp_refs},{"__type__": "update", "value": False,"interactive":True if model_version!="v3"else False},{"__type__": "update", "visible":True if model_version=="v3"else False} + yield {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update,{"__type__": "update", "visible": visible_sample_steps,"value":32},{"__type__": "update", "visible": visible_inp_refs},{"__type__": "update", "value": False,"interactive":True if model_version!="v3"else False},{"__type__": "update", "visible":True if model_version=="v3"else False},{"__type__": "update", "value":i18n("模型加载中,请等待"),"interactive":False} dict_s2 = load_sovits_new(sovits_path) hps = dict_s2["config"] @@ -294,6 +294,7 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None): # torch.save(vq_model.state_dict(),"merge_win.pth") vq_model.eval() + yield {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update,{"__type__": "update", "visible": visible_sample_steps,"value":32},{"__type__": "update", "visible": visible_inp_refs},{"__type__": "update", "value": False,"interactive":True if model_version!="v3"else False},{"__type__": "update", "visible":True if model_version=="v3"else False},{"__type__": "update", "value":i18n("合成语音"),"interactive":True} with open("./weight.json")as f: data=f.read() data=json.loads(data) @@ -877,7 +878,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: with gr.Row(): inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13) with gr.Column(scale=13): - ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")+i18n("v3暂不支持该模式,使用了会报错。"), value=False, interactive=True, show_label=True,scale=1) + ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")+i18n("v3暂不支持该模式,使用了会报错。"), value=False, interactive=True if model_version!="v3"else False, show_label=True,scale=1) gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT")+"
"+i18n("听不清参考音频说的啥(不晓得写啥)可以开。开启后无视填写的参考文本。"))) prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5,scale=1) with gr.Column(scale=14): @@ -915,7 +916,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: # phoneme=gr.Textbox(label=i18n("音素框"), value="") # get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary") with gr.Row(): - inference_button = gr.Button(i18n("合成语音"), variant="primary", size='lg', scale=25) + inference_button = gr.Button(value=i18n("合成语音"), variant="primary", size='lg', scale=25) output = gr.Audio(label=i18n("输出的语音"), scale=14) inference_button.click( @@ -923,7 +924,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: [inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed,if_freeze,inp_refs,sample_steps,if_sr_Checkbox,pause_second_slider], [output], ) - SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language,sample_steps,inp_refs,ref_text_free,if_sr_Checkbox]) + SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language,sample_steps,inp_refs,ref_text_free,if_sr_Checkbox,inference_button]) GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], []) # gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。")) diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py index 9017aa4..40cdae9 100644 --- a/GPT_SoVITS/inference_webui_fast.py +++ b/GPT_SoVITS/inference_webui_fast.py @@ -41,12 +41,13 @@ gpt_path = os.environ.get("gpt_path", None) sovits_path = os.environ.get("sovits_path", None) cnhubert_base_path = os.environ.get("cnhubert_base_path", None) bert_path = os.environ.get("bert_path", None) -version=os.environ.get("version","v2") +version=model_version=os.environ.get("version","v2") import gradio as gr from TTS_infer_pack.TTS import TTS, TTS_Config, NO_PROMPT_ERROR from TTS_infer_pack.text_segmentation_method import get_method from tools.i18n.i18n import I18nAuto, scan_language_list +from inference_webui import DictToAttrRecursive language=os.environ.get("language","Auto") language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language @@ -221,19 +222,16 @@ def get_weights_names(GPT_weight_root, SoVITS_weight_root): SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root) -from process_ckpt import get_sovits_version_from_path_fast +from process_ckpt import get_sovits_version_from_path_fast,load_sovits_new def change_sovits_weights(sovits_path,prompt_language=None,text_language=None): - global version, dict_language + global version, model_version, dict_language,if_lora_v3 version, model_version, if_lora_v3=get_sovits_version_from_path_fast(sovits_path) - + # print(sovits_path,version, model_version, if_lora_v3) if if_lora_v3 and not os.path.exists(path_sovits_v3): info= path_sovits_v3 + i18n("SoVITS V3 底模缺失,无法加载相应 LoRA 权重") gr.Warning(info) raise FileExistsError(info) - - tts_pipeline.init_vits_weights(sovits_path) - - dict_language = dict_language_v1 if tts_pipeline.configs.version =='v1' else dict_language_v2 + dict_language = dict_language_v1 if version =='v1' else dict_language_v2 if prompt_language is not None and text_language is not None: if prompt_language in list(dict_language.keys()): prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language} @@ -251,8 +249,11 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None): else: visible_sample_steps=False visible_inp_refs=True - yield {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update,{"__type__": "update", "visible": visible_sample_steps},{"__type__": "update", "visible": visible_inp_refs},{"__type__": "update", "value": False,"interactive":True if model_version!="v3"else False},{"__type__": "update", "visible":True if model_version=="v3"else False} + #prompt_language,text_language,prompt_text,prompt_language,text,text_language,inp_refs,ref_text_free, + yield {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update,{"__type__": "update", "interactive": visible_sample_steps,"value":32},{"__type__": "update", "visible": visible_inp_refs},{"__type__": "update", "interactive": True if model_version!="v3"else False},{"__type__": "update", "value":i18n("模型加载中,请等待"),"interactive":False} + tts_pipeline.init_vits_weights(sovits_path) + yield {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update,{"__type__": "update", "interactive": visible_sample_steps,"value":32},{"__type__": "update", "visible": visible_inp_refs},{"__type__": "update", "interactive": True if model_version!="v3"else False},{"__type__": "update", "value":i18n("合成语音"),"interactive":True} with open("./weight.json")as f: data=f.read() data=json.loads(data) @@ -279,14 +280,14 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown(value=i18n("*请上传并填写参考信息")) with gr.Row(): inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath") - inp_refs = gr.File(label=i18n("辅参考音频(可选多个,或不选)"),file_count="multiple") + inp_refs = gr.File(label=i18n("辅参考音频(可选多个,或不选)"),file_count="multiple", visible=True if model_version!="v3"else False) prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2) with gr.Row(): prompt_language = gr.Dropdown( label=i18n("主参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文") ) with gr.Column(): - ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True) + ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True if model_version!="v3"else False, show_label=True) gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT")+"
"+i18n("听不清参考音频说的啥(不晓得写啥)可以开。开启后无视填写的参考文本。")) with gr.Column(): @@ -355,7 +356,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: [output, seed], ) stop_infer.click(tts_pipeline.stop, [], []) - SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language]) + SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language,sample_steps,inp_refs,ref_text_free,inference_button])# GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], []) with gr.Group(): diff --git a/GPT_SoVITS/text/g2pw/onnx_api.py b/GPT_SoVITS/text/g2pw/onnx_api.py index dcb4604..78a4c93 100644 --- a/GPT_SoVITS/text/g2pw/onnx_api.py +++ b/GPT_SoVITS/text/g2pw/onnx_api.py @@ -58,7 +58,7 @@ def download_and_decompress(model_dir: str='G2PWModel/'): extract_dir = os.path.join(parent_directory,"G2PWModel_1.1") extract_dir_new = os.path.join(parent_directory,"G2PWModel") print("Downloading g2pw model...") - modelscope_url = "https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip" + modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"#"https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip" with requests.get(modelscope_url, stream=True) as r: r.raise_for_status() with open(zip_dir, 'wb') as f: diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 4666f6e..58a67d3 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -286,3 +286,17 @@ https://github.com/RVC-Boss/GPT-SoVITS/pull/2112 https://github.com/RVC-Boss/GPT 修复短文本语种选择出错 https://github.com/RVC-Boss/GPT-SoVITS/pull/2122 修复v3sovits未传参以支持调节语速 + +### 202503 + +修复一批由依赖的库版本不对导致的问题https://github.com/RVC-Boss/GPT-SoVITS/commit/6c468583c5566e5fbb4fb805e4cc89c403e997b8 + +修复模型加载异步逻辑https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa + +修复其他若干bug + +重点更新: + +1-v3支持并行推理 https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa + +2-整合包修复onnxruntime GPU推理的支持,影响:(1)g2pw有个onnx模型原先是CPU推理现在用GPU,显著降低推理的CPU瓶颈 (2)foxjoy去混响模型现在可使用GPU推理 diff --git a/webui.py b/webui.py index b73ed89..41955a5 100644 --- a/webui.py +++ b/webui.py @@ -298,9 +298,9 @@ def change_tts_inference(bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"'%(python_exec, language) else: cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) - #####v3暂不支持加速推理 - if version=="v3": - cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) + # #####v3暂不支持加速推理 + # if version=="v3": + # cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language) if p_tts_inference is None: os.environ["gpt_path"]=gpt_path if "/" in gpt_path else "%s/%s"%(GPT_weight_root,gpt_path) os.environ["sovits_path"]=sovits_path if "/"in sovits_path else "%s/%s"%(SoVITS_weight_root,sovits_path) @@ -849,8 +849,8 @@ def switch_version(version_): {'__type__': 'update', "value": default_sovits_save_every_epoch,"maximum": max_sovits_save_every_epoch}, \ {'__type__': 'update', "visible": True if version!="v3"else False}, \ {'__type__': 'update', "value": False if not if_force_ckpt else True, "interactive": True if not if_force_ckpt else False}, \ - {'__type__': 'update', "interactive": False if version == "v3" else True, "value": False}, \ - {'__type__': 'update', "visible": True if version== "v3" else False} + {'__type__': 'update', "interactive": True, "value": False}, \ + {'__type__': 'update', "visible": True if version== "v3" else False} # {'__type__': 'update', "interactive": False if version == "v3" else True, "value": False}, \ ####batch infer if os.path.exists('GPT_SoVITS/text/G2PWModel'):... else: