From e7232a38daaf3fa0c218582e2868e3ca824e14df Mon Sep 17 00:00:00 2001 From: Lei Hao Date: Fri, 26 Jan 2024 10:34:22 +0800 Subject: [PATCH 1/4] Resolved the issue of duplicate inference in the Bert1 model --- GPT_SoVITS/inference_webui.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 246748a..11042c0 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -185,19 +185,22 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) phones1 = cleaned_text_to_sequence(phones1) texts = text.split("\n") audio_opt = [] + + if prompt_language == "zh": + bert1 = get_bert_feature(norm_text1, word2ph1).to(device) + else: + bert1 = torch.zeros( + (1024, len(phones1)), + dtype=torch.float16 if is_half == True else torch.float32, + ).to(device) + for text in texts: # 解决输入目标文本的空行导致报错的问题 if (len(text.strip()) == 0): continue phones2, word2ph2, norm_text2 = clean_text(text, text_language) phones2 = cleaned_text_to_sequence(phones2) - if prompt_language == "zh": - bert1 = get_bert_feature(norm_text1, word2ph1).to(device) - else: - bert1 = torch.zeros( - (1024, len(phones1)), - dtype=torch.float16 if is_half == True else torch.float32, - ).to(device) + if text_language == "zh": bert2 = get_bert_feature(norm_text2, word2ph2).to(device) else: From 163823a920763f9ea6bee0eb70d7217769149453 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:49:33 +0800 Subject: [PATCH 2/4] Add files via upload --- tools/uvr5/webui.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/tools/uvr5/webui.py b/tools/uvr5/webui.py index 71e7ebc..f1357e5 100644 --- a/tools/uvr5/webui.py +++ b/tools/uvr5/webui.py @@ -1,12 +1,11 @@ import os import traceback,gradio as gr import logging -from tools.i18n.i18n import I18nAuto +from i18n.i18n import I18nAuto i18n = I18nAuto() logger = logging.getLogger(__name__) -import librosa -import soundfile as sf +import ffmpeg import torch import sys from mdxnet import MDXNetDereverb @@ -20,8 +19,7 @@ for name in os.listdir(weight_uvr5_root): device=sys.argv[1] is_half=sys.argv[2] -webui_port_uvr5=int(sys.argv[3]) -is_share=eval(sys.argv[4]) + def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): infos = [] @@ -55,17 +53,16 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format need_reformat = 1 done = 0 try: - y, sr = librosa.load(inp_path, sr=None) - info = sf.info(inp_path) - channels = info.channels - if channels == 2 and sr == 44100: + info = ffmpeg.probe(inp_path, cmd="ffprobe") + if ( + info["streams"][0]["channels"] == 2 + and info["streams"][0]["sample_rate"] == "44100" + ): need_reformat = 0 pre_fun._path_audio_( inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3 ) done = 1 - else: - need_reformat = 1 except: need_reformat = 1 traceback.print_exc() @@ -74,8 +71,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format os.path.join(os.environ["TEMP"]), os.path.basename(inp_path), ) - y_resampled = librosa.resample(y, sr, 44100) - sf.write(tmp_path, y_resampled, 44100, "PCM_16") + os.system( + "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" + % (inp_path, tmp_path) + ) inp_path = tmp_path try: if done == 0: @@ -116,10 +115,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format yield "\n".join(infos) -with gr.Blocks(title="RVC WebUI") as app: +with gr.Blocks(title="UVR5 WebUI") as app: gr.Markdown( value= - i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + "MIT license. https://github.com/Anjok07/ultimatevocalremovergui" ) with gr.Tabs(): with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): @@ -144,7 +143,7 @@ with gr.Blocks(title="RVC WebUI") as app: minimum=0, maximum=20, step=1, - label=i18n("人声提取激进程度"), + label="人声提取激进程度", value=10, interactive=True, visible=False, # 先不开放调整 @@ -180,7 +179,6 @@ with gr.Blocks(title="RVC WebUI") as app: app.queue(concurrency_count=511, max_size=1022).launch( server_name="0.0.0.0", inbrowser=True, - share=is_share, - server_port=webui_port_uvr5, + server_port=9873, quiet=True, -) +) \ No newline at end of file From 66be44bee4088902d86e2313f2ce4f351e3de228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=82=A6?= Date: Fri, 26 Jan 2024 14:09:50 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E8=AE=B0=E5=BF=86=E5=8A=9F=E8=83=BD=EF=BC=8C=E4=B8=8D=E7=94=A8?= =?UTF-8?q?=E4=BA=8C=E6=AC=A1=E9=80=89=E6=8B=A9=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加模型记忆功能,不用二次选择模型 --- GPT_SoVITS/inference_webui.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index bb57183..fdee8d9 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -6,10 +6,25 @@ logging.getLogger("httpx").setLevel(logging.ERROR) logging.getLogger("asyncio").setLevel(logging.ERROR) import pdb -gpt_path = os.environ.get( - "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -) -sovits_path = os.environ.get("sovits_path", "pretrained_models/s2G488k.pth") +if os.path.exists("./gweight.txt"): + with open("./gweight.txt", 'r',encoding="utf-8") as file: + gweight_data = file.read() + gpt_path = os.environ.get( + "gpt_path", gweight_data) +else: + gpt_path = os.environ.get( + "gpt_path", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt") + +if os.path.exists("./sweight.txt"): + with open("./sweight.txt", 'r',encoding="utf-8") as file: + sweight_data = file.read() + sovits_path = os.environ.get("sovits_path", sweight_data) +else: + sovits_path = os.environ.get("sovits_path", "GPT_SoVITS/pretrained_models/s2G488k.pth") +# gpt_path = os.environ.get( +# "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +# ) +# sovits_path = os.environ.get("sovits_path", "pretrained_models/s2G488k.pth") cnhubert_base_path = os.environ.get( "cnhubert_base_path", "pretrained_models/chinese-hubert-base" ) @@ -124,6 +139,7 @@ def change_sovits_weights(sovits_path): vq_model = vq_model.to(device) vq_model.eval() print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) + with open("./sweight.txt","w",encoding="utf-8")as f:f.write(sovits_path) change_sovits_weights(sovits_path) def change_gpt_weights(gpt_path): @@ -140,6 +156,7 @@ def change_gpt_weights(gpt_path): t2s_model.eval() total = sum([param.nelement() for param in t2s_model.parameters()]) print("Number of parameter: %.2fM" % (total / 1e6)) + with open("./gweight.txt","w",encoding="utf-8")as f:f.write(gpt_path) change_gpt_weights(gpt_path) def get_spepc(hps, filename): From 996796d92c659418bbea0880d995eff94a51c0d4 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:34:11 +0800 Subject: [PATCH 4/4] Add files via upload --- tools/uvr5/webui.py | 94 ++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/tools/uvr5/webui.py b/tools/uvr5/webui.py index f1357e5..97170bf 100644 --- a/tools/uvr5/webui.py +++ b/tools/uvr5/webui.py @@ -1,11 +1,12 @@ import os import traceback,gradio as gr import logging -from i18n.i18n import I18nAuto +from tools.i18n.i18n import I18nAuto i18n = I18nAuto() logger = logging.getLogger(__name__) -import ffmpeg +import librosa +import soundfile as sf import torch import sys from mdxnet import MDXNetDereverb @@ -19,7 +20,8 @@ for name in os.listdir(weight_uvr5_root): device=sys.argv[1] is_half=sys.argv[2] - +webui_port_uvr5=int(sys.argv[3]) +is_share=eval(sys.argv[4]) def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): infos = [] @@ -50,40 +52,32 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format paths = [path.name for path in paths] for path in paths: inp_path = os.path.join(inp_root, path) - need_reformat = 1 - done = 0 + if(os.path.isfile(inp_path)==False):continue try: - info = ffmpeg.probe(inp_path, cmd="ffprobe") - if ( - info["streams"][0]["channels"] == 2 - and info["streams"][0]["sample_rate"] == "44100" - ): - need_reformat = 0 - pre_fun._path_audio_( - inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3 + done = 0 + try: + y, sr = librosa.load(inp_path, sr=None) + info = sf.info(inp_path) + channels = info.channels + if channels == 2 and sr == 44100: + need_reformat = 0 + pre_fun._path_audio_( + inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3 + ) + done = 1 + else: + need_reformat = 1 + except: + need_reformat = 1 + traceback.print_exc() + if need_reformat == 1: + tmp_path = "%s/%s.reformatted.wav" % ( + os.path.join(os.environ["TEMP"]), + os.path.basename(inp_path), ) - done = 1 - except: - need_reformat = 1 - traceback.print_exc() - if need_reformat == 1: - tmp_path = "%s/%s.reformatted.wav" % ( - os.path.join(os.environ["TEMP"]), - os.path.basename(inp_path), - ) - os.system( - "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" - % (inp_path, tmp_path) - ) - inp_path = tmp_path - try: - if done == 0: - pre_fun._path_audio_( - inp_path, save_root_ins, save_root_vocal, format0 - ) - infos.append("%s->Success" % (os.path.basename(inp_path))) - yield "\n".join(infos) - except: + y_resampled = librosa.resample(y, sr, 44100) + sf.write(tmp_path, y_resampled, 44100, "PCM_16") + inp_path = tmp_path try: if done == 0: pre_fun._path_audio_( @@ -92,10 +86,21 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format infos.append("%s->Success" % (os.path.basename(inp_path))) yield "\n".join(infos) except: - infos.append( - "%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) - ) - yield "\n".join(infos) + try: + if done == 0: + pre_fun._path_audio_( + inp_path, save_root_ins, save_root_vocal, format0 + ) + infos.append("%s->Success" % (os.path.basename(inp_path))) + yield "\n".join(infos) + except: + infos.append( + "%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) + ) + yield "\n".join(infos) + except: + infos.append("Oh my god. %s->%s"%(os.path.basename(inp_path), traceback.format_exc())) + yield "\n".join(infos) except: infos.append(traceback.format_exc()) yield "\n".join(infos) @@ -115,10 +120,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format yield "\n".join(infos) -with gr.Blocks(title="UVR5 WebUI") as app: +with gr.Blocks(title="RVC WebUI") as app: gr.Markdown( value= - "MIT license. https://github.com/Anjok07/ultimatevocalremovergui" + i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") ) with gr.Tabs(): with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): @@ -143,7 +148,7 @@ with gr.Blocks(title="UVR5 WebUI") as app: minimum=0, maximum=20, step=1, - label="人声提取激进程度", + label=i18n("人声提取激进程度"), value=10, interactive=True, visible=False, # 先不开放调整 @@ -179,6 +184,7 @@ with gr.Blocks(title="UVR5 WebUI") as app: app.queue(concurrency_count=511, max_size=1022).launch( server_name="0.0.0.0", inbrowser=True, - server_port=9873, + share=is_share, + server_port=webui_port_uvr5, quiet=True, -) \ No newline at end of file +)