Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements

This commit is contained in:
Karasukaigan 2025-05-31 20:15:19 +08:00 committed by GitHub
commit ab5e8dc0ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 37 additions and 9 deletions

View File

@ -300,3 +300,24 @@ https://github.com/RVC-Boss/GPT-SoVITS/pull/2112 https://github.com/RVC-Boss/GPT
1-v3支持并行推理 https://github.com/RVC-Boss/GPT-SoVITS/commit/03b662a769946b7a6a8569a354860e8eeeb743aa
2-整合包修复onnxruntime GPU推理的支持, 影响: (1) g2pw有个onnx模型原先是CPU推理现在用GPU, 显著降低推理的CPU瓶颈 (2) foxjoy去混响模型现在可使用GPU推理
### 202504/202505更新
1-修复uvr5和onnx去混响模型ffmpeg编码mp3和m4a原路径带空格会有bug的问题
https://github.com/RVC-Boss/GPT-SoVITS/commit/1934fc1e1b22c4c162bba1bbe7d7ebb132944cdc
2-标注界面增加友情提示标注完每一面都要点submit text否则白忙活
https://github.com/RVC-Boss/GPT-SoVITS/commit/fafe4e7f120fba56c5f053c6db30aa675d5951ba
https://github.com/RVC-Boss/GPT-SoVITS/commit/8c705784c50bf438c7b6d0be33a9e5e3cb90e6b2
3-通过缓存策略使sovits推理提速10%
https://github.com/RVC-Boss/GPT-SoVITS/pull/2377
4-混合语种切分识别逻辑优化
https://github.com/RVC-Boss/GPT-SoVITS/pull/2408
5-完善colab/kaggle notebook脚本完善linux环境配置脚本docker环境windows自动构建脚本
https://github.com/RVC-Boss/GPT-SoVITS/commit/ad7df5298bea51273c86c05b5b13f28ed7d9fe16
https://github.com/RVC-Boss/GPT-SoVITS/commit/d5e479dad6342222eb4887df627e69c048d2338c
预告端午后基于V2版本进行重大优化更新

View File

@ -1,4 +1,3 @@
# -*- coding: gbk -*-
import sys
from tools.i18n.i18n import I18nAuto, scan_language_list
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"

View File

@ -190,14 +190,14 @@ class Predictor:
opt_path_vocal = path_vocal[:-4] + ".%s" % format
opt_path_other = path_other[:-4] + ".%s" % format
if os.path.exists(path_vocal):
os.system("ffmpeg -i '%s' -vn '%s' -q:a 2 -y" % (path_vocal, opt_path_vocal))
os.system("ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path_vocal, opt_path_vocal))
if os.path.exists(opt_path_vocal):
try:
os.remove(path_vocal)
except:
pass
if os.path.exists(path_other):
os.system("ffmpeg -i '%s' -vn '%s' -q:a 2 -y" % (path_other, opt_path_other))
os.system("ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path_other, opt_path_other))
if os.path.exists(opt_path_other):
try:
os.remove(path_other)

View File

@ -140,7 +140,9 @@ class AudioPre:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
print(cmd)
os.system(cmd)
if os.path.exists(opt_format_path):
try:
os.remove(path)
@ -175,7 +177,9 @@ class AudioPre:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
print(cmd)
os.system(cmd)
if os.path.exists(opt_format_path):
try:
os.remove(path)
@ -303,7 +307,9 @@ class AudioPreDeEcho:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
print(cmd)
os.system(cmd)
if os.path.exists(opt_format_path):
try:
os.remove(path)
@ -334,7 +340,9 @@ class AudioPreDeEcho:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
print(cmd)
os.system(cmd)
if os.path.exists(opt_format_path):
try:
os.remove(path)

View File

@ -199,7 +199,7 @@ def set_default():
else:
default_sovits_epoch = 2
default_sovits_save_every_epoch = 1
max_sovits_epoch = 20 # 40 # 3
max_sovits_epoch = 16 # 40 # 3 #训太多=作死
max_sovits_save_every_epoch = 10 # 10 # 3
default_batch_size = max(1, default_batch_size)
@ -1419,7 +1419,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False) as app:
value=process_info(process_name_slice, "close"), variant="primary", visible=False
)
gr.Markdown(value="0bb-" + i18n("语音降噪工具")+i18n("(非必需)"))
gr.Markdown(value="0bb-" + i18n("语音降噪工具")+i18n("(不稳定,先别用,可能劣化模型效果!)"))
with gr.Row():
with gr.Column(scale=3):
with gr.Row():