GPT-SoVITS/tools/uvr5/webui.py
ChasonJiang 29f22115fb
[fast_inference] 回退策略,减少padding影响,开放选项,同步代码 (#986)
* Update README

* Optimize-English-G2P

* docs: change akward expression

* docs: update Changelog_KO.md

* Fix CN punc in EN,add 's match

* Adjust normalize and g2p logic

* Update zh_CN.json

* Update README (#827)

Update README.md
Update some outdated file paths and commands

* 修复英文多音字,调整字典热加载,新增姓名匹配 (#869)

* Fix homograph dict

* Add JSON in dict

* Adjust hot dict to hot reload

* Add English name dict

* Adjust get name dict logic

* Make API Great Again (#894)

* Add zh/jp/en mix

* Optimize code readability and formatted output.

* Try OGG streaming

* Add stream mode arg

* Add media type arg

* Add cut punc arg

* Eliminate punc risk

* Update README (#895)

* Update README

* Update README

* update README

* update README

* fix typo s/Licence /License (#904)

* fix reformat cmd (#917)

Co-authored-by: starylan <starylan@outlook.com>

* Update README.md

* Normalize chinese arithmetic operations (#947)

* 改变训练和推理时的mask策略,以修复当batch_size>1时,产生的复读现象

* 同步main分支代码,增加“保持随机”选项

* 在colab中运行colab_webui.ipynb发生的uvr5模型缺失问题 (#968)

在colab中使用git下载uvr5模型时报错:
fatal: destination path 'uvr5_weights' already exists and is not an empty directory.
通过在下载前将原本从本仓库下载的uvr5_weights文件夹删除可以解决问题。

* [ASR] 修复FasterWhisper遍历输入路径失败 (#956)

* remove glob

* rename

* reset mirror pos

* 回退mask策略;
回退pad策略;
在T2SBlock中添加padding_mask,以减少pad的影响;
开放repetition_penalty参数,让用户自行调整重复惩罚的强度;
增加parallel_infer参数,用于开启或关闭并行推理,关闭时与0307版本保持一致;
在webui中增加“保持随机”选项;
同步main分支代码。

* 删除无用注释

---------

Co-authored-by: Lion <drain.daters.0p@icloud.com>
Co-authored-by: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Co-authored-by: KamioRinn <snowsdream@live.com>
Co-authored-by: Pengoose <pengoose_dev@naver.com>
Co-authored-by: Yuan-Man <68322456+Yuan-ManX@users.noreply.github.com>
Co-authored-by: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Co-authored-by: KamioRinn <63162909+KamioRinn@users.noreply.github.com>
Co-authored-by: Lion-Wu <130235128+Lion-Wu@users.noreply.github.com>
Co-authored-by: digger yu <digger-yu@outlook.com>
Co-authored-by: SapphireLab <36986837+SapphireLab@users.noreply.github.com>
Co-authored-by: starylan <starylan@outlook.com>
Co-authored-by: shadow01a <141255649+shadow01a@users.noreply.github.com>
2024-04-19 14:35:28 +08:00

177 lines
7.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import traceback,gradio as gr
import logging
from tools.i18n.i18n import I18nAuto
i18n = I18nAuto()
logger = logging.getLogger(__name__)
import librosa,ffmpeg
import soundfile as sf
import torch
import sys
from mdxnet import MDXNetDereverb
from vr import AudioPre, AudioPreDeEcho
weight_uvr5_root = "tools/uvr5/uvr5_weights"
uvr5_names = []
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name:
uvr5_names.append(name.replace(".pth", ""))
device=sys.argv[1]
is_half=eval(sys.argv[2])
webui_port_uvr5=int(sys.argv[3])
is_share=eval(sys.argv[4])
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
infos = []
try:
inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
save_root_vocal = (
save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
)
save_root_ins = (
save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
)
is_hp3 = "HP3" in model_name
if model_name == "onnx_dereverb_By_FoxJoy":
pre_fun = MDXNetDereverb(15)
else:
func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho
pre_fun = func(
agg=int(agg),
model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
device=device,
is_half=is_half,
)
if inp_root != "":
paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
else:
paths = [path.name for path in paths]
for path in paths:
inp_path = os.path.join(inp_root, path)
if(os.path.isfile(inp_path)==False):continue
need_reformat = 1
done = 0
try:
info = ffmpeg.probe(inp_path, cmd="ffprobe")
if (
info["streams"][0]["channels"] == 2
and info["streams"][0]["sample_rate"] == "44100"
):
need_reformat = 0
pre_fun._path_audio_(
inp_path, save_root_ins, save_root_vocal, format0,is_hp3
)
done = 1
except:
need_reformat = 1
traceback.print_exc()
if need_reformat == 1:
tmp_path = "%s/%s.reformatted.wav" % (
os.path.join(os.environ["TEMP"]),
os.path.basename(inp_path),
)
os.system(
f'ffmpeg -i "{inp_path}" -vn -acodec pcm_s16le -ac 2 -ar 44100 "{tmp_path}" -y'
)
inp_path = tmp_path
try:
if done == 0:
pre_fun._path_audio_(
inp_path, save_root_ins, save_root_vocal, format0,is_hp3
)
infos.append("%s->Success" % (os.path.basename(inp_path)))
yield "\n".join(infos)
except:
infos.append(
"%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
)
yield "\n".join(infos)
except:
infos.append(traceback.format_exc())
yield "\n".join(infos)
finally:
try:
if model_name == "onnx_dereverb_By_FoxJoy":
del pre_fun.pred.model
del pre_fun.pred.model_
else:
del pre_fun.model
del pre_fun
except:
traceback.print_exc()
print("clean_empty_cache")
if torch.cuda.is_available():
torch.cuda.empty_cache()
yield "\n".join(infos)
with gr.Blocks(title="UVR5 WebUI") as app:
gr.Markdown(
value=
i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
)
with gr.Tabs():
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
with gr.Group():
gr.Markdown(
value=i18n(
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。"
)
)
with gr.Row():
with gr.Column():
dir_wav_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径"),
placeholder="C:\\Users\\Desktop\\todo-songs",
)
wav_inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
)
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
agg = gr.Slider(
minimum=0,
maximum=20,
step=1,
label=i18n("人声提取激进程度"),
value=10,
interactive=True,
visible=False, # 先不开放调整
)
opt_vocal_root = gr.Textbox(
label=i18n("指定输出主人声文件夹"), value="output/uvr5_opt"
)
opt_ins_root = gr.Textbox(
label=i18n("指定输出非主人声文件夹"), value="output/uvr5_opt"
)
format0 = gr.Radio(
label=i18n("导出文件格式"),
choices=["wav", "flac", "mp3", "m4a"],
value="flac",
interactive=True,
)
but2 = gr.Button(i18n("转换"), variant="primary")
vc_output4 = gr.Textbox(label=i18n("输出信息"))
but2.click(
uvr,
[
model_choose,
dir_wav_input,
opt_vocal_root,
wav_inputs,
opt_ins_root,
agg,
format0,
],
[vc_output4],
api_name="uvr_convert",
)
app.queue(concurrency_count=511, max_size=1022).launch(
server_name="0.0.0.0",
inbrowser=True,
share=is_share,
server_port=webui_port_uvr5,
quiet=True,
)