Merge d32d70c2b103e1d3e5b17a98d8311daa1db0ed88 into 165882d64f474b3563fa91adc1a679436ae9c3b8

This commit is contained in:
laubonghaudoi 2025-03-19 11:41:53 -07:00 committed by GitHub
commit 5cdac67fc4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 660 additions and 521 deletions

View File

@ -1,6 +1,17 @@
# -*- coding: utf-8 -*-
import os
import os.path
import shutil
import traceback
from time import time as ttime
import torch
from text.cleaner import clean_text
from tqdm import tqdm
from transformers import AutoModelForMaskedLM, AutoTokenizer
from tools.my_utils import clean_path
inp_text = os.environ.get("inp_text")
inp_wav_dir = os.environ.get("inp_wav_dir")
@ -11,17 +22,8 @@ if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
opt_dir = os.environ.get("opt_dir")
bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
version = os.environ.get('version', None)
import sys, numpy as np, traceback, pdb
import os.path
from glob import glob
from tqdm import tqdm
from text.cleaner import clean_text
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
from tools.my_utils import clean_path
# inp_text=sys.argv[1]
# inp_wav_dir=sys.argv[2]
@ -32,11 +34,8 @@ from tools.my_utils import clean_path
# opt_dir="/data/docker/liujing04/gpt-vits/fine_tune_dataset/%s"%exp_name
# bert_pretrained_dir="/data/docker/liujing04/bert-vits2/Bert-VITS2-master20231106/bert/chinese-roberta-wwm-ext-large"
from time import time as ttime
import shutil
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
def my_save(fea, path): # fix issue: torch.save doesn't support chinese path
dir = os.path.dirname(path)
name = os.path.basename(path)
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
@ -56,11 +55,13 @@ if os.path.exists(txt_path) == False:
# device = "mps"
else:
device = "cpu"
if os.path.exists(bert_pretrained_dir):...
else:raise FileNotFoundError(bert_pretrained_dir)
if os.path.exists(bert_pretrained_dir):
...
else:
raise FileNotFoundError(bert_pretrained_dir)
tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir)
if is_half == True:
if is_half:
bert_model = bert_model.half().to(device)
else:
bert_model = bert_model.to(device)
@ -126,7 +127,7 @@ if os.path.exists(txt_path) == False:
"YUE": "yue",
"Yue": "yue",
}
for line in lines[int(i_part) :: int(all_parts)]:
for line in tqdm(lines[int(i_part):: int(all_parts)]):
try:
wav_name, spk_name, language, text = line.split("|")
# todo.append([name,text,"zh"])

View File

@ -1,6 +1,23 @@
# -*- coding: utf-8 -*-
"""
Step 2 of data preparation: Extract HuBERT features from the audio files,
and resample the audio to 32kHz and saving it.
"""
import os
import shutil
import sys
import traceback
from time import time as ttime
import librosa
import numpy as np
import torch
from feature_extractor import cnhubert
from scipy.io import wavfile
from tqdm import tqdm
from tools.my_utils import clean_path, load_audio
import sys,os
inp_text = os.environ.get("inp_text")
inp_wav_dir = os.environ.get("inp_wav_dir")
exp_name = os.environ.get("exp_name")
@ -8,18 +25,16 @@ i_part= os.environ.get("i_part")
all_parts = os.environ.get("all_parts")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
from feature_extractor import cnhubert
opt_dir = os.environ.get("opt_dir")
cnhubert.cnhubert_base_path = os.environ.get("cnhubert_base_dir")
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import pdb,traceback,numpy as np,logging
from scipy.io import wavfile
import librosa
now_dir = os.getcwd()
sys.path.append(now_dir)
from tools.my_utils import load_audio,clean_path
# from config import cnhubert_base_path
# cnhubert.cnhubert_base_path=cnhubert_base_path
@ -32,15 +47,14 @@ from tools.my_utils import load_audio,clean_path
# cnhubert.cnhubert_base_path=sys.argv[7]
# opt_dir="/data/docker/liujing04/gpt-vits/fine_tune_dataset/%s"%exp_name
from time import time as ttime
import shutil
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
def my_save(fea, path): # fix issue: torch.save doesn't support chinese path
dir = os.path.dirname(path)
name = os.path.basename(path)
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
tmp_path="%s%s.pth"%(ttime(),i_part)
tmp_path = f"{ttime()}{i_part}.pth"
torch.save(fea, tmp_path)
shutil.move(tmp_path,"%s/%s"%(dir,name))
shutil.move(tmp_path, f"{dir}/{name}")
hubert_dir = "%s/4-cnhubert" % (opt_dir)
wav32dir = "%s/5-wav32k" % (opt_dir)
@ -56,6 +70,8 @@ if torch.cuda.is_available():
# device = "mps"
else:
device = "cpu"
model = cnhubert.get_model()
# is_half=False
if (is_half == True):
@ -64,47 +80,66 @@ else:
model = model.to(device)
nan_fails = []
def name2go(wav_name, wav_path):
"""
Extract HuBERT features from the audio files, and resample the audio to 32kHz and saving it.
"""
# Skip if the file already exists
hubert_path = "%s/%s.pt" % (hubert_dir, wav_name)
if(os.path.exists(hubert_path)):return
tmp_audio = load_audio(wav_path, 32000)
tmp_max = np.abs(tmp_audio).max()
if tmp_max > 2.2:
print("%s-filtered,%s" % (wav_name, tmp_max))
if (os.path.exists(hubert_path)):
return
# Load the audio file in 32kHz sampling rate
tmp_audio = load_audio(wav_path, 32000)
# Check the maximum amplitude of the audio file
tmp_max = np.abs(tmp_audio).max()
# Skip if the maximum amplitude is too high (volume is too loud)
if tmp_max > 2.2:
print(f"{wav_name}-filtered,{tmp_max}")
return
# Normalize the audio
tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha * 32768)) + ((1 - alpha) * 32768) * tmp_audio
tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha * 1145.14)) + ((1 - alpha) * 1145.14) * tmp_audio
tmp_audio = librosa.resample(
tmp_audio32b, orig_sr=32000, target_sr=16000
) # 不是重采样问题
tensor_wav16 = torch.from_numpy(tmp_audio)
if (is_half == True):
# if half-precision is enabled, convert the tensor to half-precision
if is_half:
tensor_wav16 = tensor_wav16.half().to(device)
else:
tensor_wav16 = tensor_wav16.to(device)
# Extract HuBERT features from the audio file
ssl = model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1, 2).cpu() # torch.Size([1, 768, 215])
if np.isnan(ssl.detach().numpy()).sum() != 0:
nan_fails.append((wav_name, wav_path))
print("nan filtered:%s"%wav_name)
print(f"nan filtered:{wav_name}")
return
wavfile.write(
"%s/%s"%(wav32dir,wav_name),
f"{wav32dir}/{wav_name}",
32000,
tmp_audio32.astype("int16"),
)
my_save(ssl, hubert_path)
with open(inp_text, "r", encoding="utf8")as f:
lines = f.read().strip("\n").split("\n")
for line in lines[int(i_part)::int(all_parts)]:
for line in tqdm(lines[int(i_part)::int(all_parts)]):
try:
# wav_name,text=line.split("\t")
wav_name, spk_name, language, text = line.split("|")
wav_name = clean_path(wav_name)
if (inp_wav_dir != "" and inp_wav_dir != None):
if (inp_wav_dir != "" and inp_wav_dir is not None):
wav_name = os.path.basename(wav_name)
wav_path = "%s/%s"%(inp_wav_dir, wav_name)
wav_path = f"{inp_wav_dir}/{wav_name}"
else:
wav_path = wav_name
@ -113,7 +148,7 @@ for line in lines[int(i_part)::int(all_parts)]:
except:
print(line, traceback.format_exc())
if(len(nan_fails)>0 and is_half==True):
if (len(nan_fails) > 0 and is_half):
is_half = False
model = model.float()
for wav in nan_fails:

View File

@ -1,4 +1,12 @@
import logging
import os
import sys
import traceback
import torch
import utils
from tools.my_utils import clean_path
inp_text = os.environ.get("inp_text")
exp_name = os.environ.get("exp_name")
@ -10,8 +18,10 @@ opt_dir = os.environ.get("opt_dir")
pretrained_s2G = os.environ.get("pretrained_s2G")
s2config_path = os.environ.get("s2config_path")
if os.path.exists(pretrained_s2G):...
else:raise FileNotFoundError(pretrained_s2G)
if os.path.exists(pretrained_s2G):
...
else:
raise FileNotFoundError(pretrained_s2G)
# version=os.environ.get("version","v2")
size = os.path.getsize(pretrained_s2G)
if size < 82978 * 1024:
@ -24,24 +34,14 @@ elif size < 700 * 1024 * 1024:
version = "v2"
else:
version = "v3"
import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import math, traceback
import multiprocessing
import sys, pdb
now_dir = os.getcwd()
sys.path.append(now_dir)
from random import shuffle
import torch.multiprocessing as mp
from glob import glob
from tqdm import tqdm
import logging, librosa, utils
if version != "v3":
from module.models import SynthesizerTrn
else:
from module.models import SynthesizerTrnV3 as SynthesizerTrn
from tools.my_utils import clean_path
logging.getLogger("numba").setLevel(logging.WARNING)
# from config import pretrained_s2G

209
webui.py
View File

@ -1,15 +1,45 @@
import os,sys
if len(sys.argv)==1:sys.argv.append('v2')
import json
import os
import platform
import re
import shutil
import signal
import site
import subprocess
import sys
import traceback
import warnings
from multiprocessing import cpu_count
from subprocess import Popen
import gradio as gr
import psutil
import torch
import yaml
from config import (
exp_root,
infer_device,
is_half,
is_share,
python_exec,
webui_port_infer_tts,
webui_port_main,
webui_port_subfix,
webui_port_uvr5,
)
from tools import my_utils
from tools.asr.config import asr_dict
from tools.i18n.i18n import I18nAuto, scan_language_list
from tools.my_utils import check_details, check_for_existance
if len(sys.argv) == 1:
sys.argv.append('v2')
version = "v1"if sys.argv[1] == "v1" else "v2"
os.environ["version"] = version
now_dir = os.getcwd()
sys.path.insert(0, now_dir)
import warnings
warnings.filterwarnings("ignore")
import json,yaml,torch,pdb,re,shutil
import platform
import psutil
import signal
os.environ['TORCH_DISTRIBUTED_DEBUG'] = 'INFO'
torch.manual_seed(233333)
tmp = os.path.join(now_dir, "TEMP")
@ -17,7 +47,8 @@ os.makedirs(tmp, exist_ok=True)
os.environ["TEMP"] = tmp
if (os.path.exists(tmp)):
for name in os.listdir(tmp):
if(name=="jieba.cache"):continue
if (name == "jieba.cache"):
continue
path = "%s/%s" % (tmp, name)
delete = os.remove if os.path.isfile(path) else shutil.rmtree
try:
@ -25,13 +56,12 @@ if(os.path.exists(tmp)):
except Exception as e:
print(str(e))
pass
import site
import traceback
site_packages_roots = []
for path in site.getsitepackages():
if "packages" in path:
site_packages_roots.append(path)
if(site_packages_roots==[]):site_packages_roots=["%s/runtime/Lib/site-packages" % now_dir]
if (site_packages_roots == []):
site_packages_roots = ["%s/runtime/Lib/site-packages" % now_dir]
# os.environ["OPENBLAS_NUM_THREADS"] = "4"
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
os.environ["all_proxy"] = ""
@ -45,28 +75,17 @@ for site_packages_root in site_packages_roots:
% (now_dir, now_dir, now_dir, now_dir, now_dir, now_dir)
)
break
except PermissionError as e:
except PermissionError:
traceback.print_exc()
from tools import my_utils
import shutil
import pdb
import subprocess
from subprocess import Popen
import signal
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
from tools.i18n.i18n import I18nAuto, scan_language_list
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
os.environ["language"] = language
i18n = I18nAuto(language=language)
from scipy.io import wavfile
from tools.my_utils import load_audio, check_for_existance, check_details
from multiprocessing import cpu_count
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
try:
import gradio.analytics as analytics
analytics.version_check = lambda: None
except:...
import gradio as gr
except:
...
n_cpu = cpu_count()
ngpu = torch.cuda.device_count()
@ -92,6 +111,7 @@ if torch.cuda.is_available() or ngpu != 0:
# gpu_infos.append("%s\t%s" % ("0", "Apple GPU"))
# mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存
def set_default():
global default_batch_size, default_max_batch_size, gpu_info, default_sovits_epoch, default_sovits_save_every_epoch, max_sovits_epoch, max_sovits_save_every_epoch, default_batch_size_s1, if_force_ckpt
if_force_ckpt = False
@ -139,23 +159,32 @@ def set_default():
default_batch_size_s1 = max(1, default_batch_size_s1)
default_max_batch_size = default_batch_size * 3
set_default()
gpus = "-".join([i[0] for i in gpu_infos])
default_gpu_numbers = str(sorted(list(set_gpu_numbers))[0])
def fix_gpu_number(input): # 将越界的number强制改到界内
try:
if(int(input)not in set_gpu_numbers):return default_gpu_numbers
except:return input
if (int(input)not in set_gpu_numbers):
return default_gpu_numbers
except:
return input
return input
def fix_gpu_numbers(inputs):
output = []
try:
for input in inputs.split(","):output.append(str(fix_gpu_number(input)))
for input in inputs.split(","):
output.append(str(fix_gpu_number(input)))
return ",".join(output)
except:
return inputs
pretrained_sovits_name = ["GPT_SoVITS/pretrained_models/s2G488k.pth", "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2Gv3.pth"]
pretrained_gpt_name = ["GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt", "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1v3.ckpt"]
@ -170,31 +199,41 @@ if _:
_ = [[], []]
for i in range(3):
if os.path.exists(pretrained_gpt_name[i]):_[0].append(pretrained_gpt_name[i])
else:_[0].append("")##没有下pretrained模型的说不定他们是想自己从零训底模呢
if os.path.exists(pretrained_sovits_name[i]):_[-1].append(pretrained_sovits_name[i])
else:_[-1].append("")
if os.path.exists(pretrained_gpt_name[i]):
_[0].append(pretrained_gpt_name[i])
else:
_[0].append("") # 没有下pretrained模型的说不定他们是想自己从零训底模呢
if os.path.exists(pretrained_sovits_name[i]):
_[-1].append(pretrained_sovits_name[i])
else:
_[-1].append("")
pretrained_gpt_name, pretrained_sovits_name = _
SoVITS_weight_root = ["SoVITS_weights", "SoVITS_weights_v2", "SoVITS_weights_v3"]
GPT_weight_root = ["GPT_weights", "GPT_weights_v2", "GPT_weights_v3"]
for root in SoVITS_weight_root + GPT_weight_root:
os.makedirs(root, exist_ok=True)
def get_weights_names():
SoVITS_names = [name for name in pretrained_sovits_name if name != ""]
for path in SoVITS_weight_root:
for name in os.listdir(path):
if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (path, name))
if name.endswith(".pth"):
SoVITS_names.append("%s/%s" % (path, name))
GPT_names = [name for name in pretrained_gpt_name if name != ""]
for path in GPT_weight_root:
for name in os.listdir(path):
if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (path, name))
if name.endswith(".ckpt"):
GPT_names.append("%s/%s" % (path, name))
return SoVITS_names, GPT_names
SoVITS_names, GPT_names = get_weights_names()
for path in SoVITS_weight_root + GPT_weight_root:
os.makedirs(path, exist_ok=True)
def custom_sort_key(s):
# 使用正则表达式提取字符串中的数字部分和非数字部分
parts = re.split('(\d+)', s)
@ -202,16 +241,19 @@ def custom_sort_key(s):
parts = [int(part) if part.isdigit() else part for part in parts]
return parts
def change_choices():
SoVITS_names, GPT_names = get_weights_names()
return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
p_label = None
p_uvr5 = None
p_asr = None
p_denoise = None
p_tts_inference = None
def kill_proc_tree(pid, including_parent=True):
try:
parent = psutil.Process(pid)
@ -231,7 +273,10 @@ def kill_proc_tree(pid, including_parent=True):
except OSError:
pass
system = platform.system()
def kill_process(pid, process_name=""):
if (system == "Windows"):
cmd = "taskkill /t /f /pid %s" % pid
@ -241,6 +286,7 @@ def kill_process(pid, process_name=""):
kill_proc_tree(pid)
print(process_name + i18n("进程已终止"))
def process_info(process_name="", indicator=""):
if indicator == "opened":
return process_name + i18n("已开启")
@ -263,7 +309,10 @@ def process_info(process_name="", indicator=""):
else:
return process_name
process_name_subfix = i18n("音频标注WebUI")
def change_label(path_list):
global p_label
if p_label is None:
@ -278,7 +327,10 @@ def change_label(path_list):
p_label = None
yield process_info(process_name_subfix, "closed"), {'__type__': 'update', 'visible': True}, {'__type__': 'update', 'visible': False}
process_name_uvr5 = i18n("人声分离WebUI")
def change_uvr5():
global p_uvr5
if p_uvr5 is None:
@ -291,14 +343,17 @@ def change_uvr5():
p_uvr5 = None
yield process_info(process_name_uvr5, "closed"), {'__type__': 'update', 'visible': True}, {'__type__': 'update', 'visible': False}
process_name_tts = i18n("TTS推理WebUI")
def change_tts_inference(bert_path, cnhubert_base_path, gpu_number, gpt_path, sovits_path, batched_infer_enabled):
global p_tts_inference
if batched_infer_enabled:
cmd = '"%s" GPT_SoVITS/inference_webui_fast.py "%s"' % (python_exec, language)
else:
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
#####v3暂不支持加速推理
# v3暂不支持加速推理
if version == "v3":
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"' % (python_exec, language)
if p_tts_inference is None:
@ -318,9 +373,10 @@ def change_tts_inference(bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits
p_tts_inference = None
yield process_info(process_name_tts, "closed"), {'__type__': 'update', 'visible': True}, {'__type__': 'update', 'visible': False}
from tools.asr.config import asr_dict
process_name_asr = i18n("语音识别")
def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision):
global p_asr
if p_asr is None:
@ -345,6 +401,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_
else:
yield process_info(process_name_asr, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
def close_asr():
global p_asr
if p_asr is not None:
@ -352,7 +409,10 @@ def close_asr():
p_asr = None
return process_info(process_name_asr, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
process_name_denoise = i18n("语音降噪")
def open_denoise(denoise_inp_dir, denoise_opt_dir):
global p_denoise
if (p_denoise == None):
@ -370,6 +430,7 @@ def open_denoise(denoise_inp_dir, denoise_opt_dir):
else:
yield process_info(process_name_denoise, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}, {"__type__": "update"}, {"__type__": "update"}
def close_denoise():
global p_denoise
if p_denoise is not None:
@ -377,8 +438,11 @@ def close_denoise():
p_denoise = None
return process_info(process_name_denoise, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
p_train_SoVITS = None
process_name_sovits = i18n("SoVITS训练")
def open1Ba(batch_size, total_epoch, exp_name, text_low_lr_rate, if_save_latest, if_save_every_weights, save_every_epoch, gpu_numbers1Ba, pretrained_s2G, pretrained_s2D, if_grad_ckpt, lora_rank):
global p_train_SoVITS
if (p_train_SoVITS == None):
@ -409,7 +473,8 @@ def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_s
data["name"] = exp_name
data["version"] = version
tmp_config_path = "%s/tmp_s2.json" % tmp
with open(tmp_config_path,"w")as f:f.write(json.dumps(data))
with open(tmp_config_path, "w")as f:
f.write(json.dumps(data))
if version in ["v1", "v2"]:
cmd = '"%s" GPT_SoVITS/s2_train.py --config "%s"' % (python_exec, tmp_config_path)
else:
@ -423,6 +488,7 @@ def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_s
else:
yield process_info(process_name_sovits, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
def close1Ba():
global p_train_SoVITS
if p_train_SoVITS is not None:
@ -430,8 +496,11 @@ def close1Ba():
p_train_SoVITS = None
return process_info(process_name_sovits, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
p_train_GPT = None
process_name_gpt = i18n("GPT训练")
def open1Bb(batch_size, total_epoch, exp_name, if_dpo, if_save_latest, if_save_every_weights, save_every_epoch, gpu_numbers, pretrained_s1):
global p_train_GPT
if (p_train_GPT == None):
@ -462,7 +531,8 @@ def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_
os.environ["_CUDA_VISIBLE_DEVICES"] = fix_gpu_numbers(gpu_numbers.replace("-", ","))
os.environ["hz"] = "25hz"
tmp_config_path = "%s/tmp_s1.yaml" % tmp
with open(tmp_config_path, "w") as f:f.write(yaml.dump(data, default_flow_style=False))
with open(tmp_config_path, "w") as f:
f.write(yaml.dump(data, default_flow_style=False))
# cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" --train_semantic_path "%s/6-name2semantic.tsv" --train_phoneme_path "%s/2-name2text.txt" --output_dir "%s/logs_s1"'%(python_exec,tmp_config_path,s1_dir,s1_dir,s1_dir)
cmd = '"%s" GPT_SoVITS/s1_train.py --config_file "%s" ' % (python_exec, tmp_config_path)
yield process_info(process_name_gpt, "opened"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
@ -474,6 +544,7 @@ def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_
else:
yield process_info(process_name_gpt, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
def close1Bb():
global p_train_GPT
if p_train_GPT is not None:
@ -481,8 +552,11 @@ def close1Bb():
p_train_GPT = None
return process_info(process_name_gpt, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
ps_slice = []
process_name_slice = i18n("语音切分")
def open_slice(inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, n_parts):
global ps_slice
inp = my_utils.clean_path(inp)
@ -491,8 +565,10 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k
if (os.path.exists(inp) == False):
yield i18n("输入路径不存在"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
return
if os.path.isfile(inp):n_parts=1
elif os.path.isdir(inp):pass
if os.path.isfile(inp):
n_parts = 1
elif os.path.isdir(inp):
pass
else:
yield i18n("输入路径存在但不可用"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
return
@ -510,6 +586,7 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k
else:
yield process_info(process_name_slice, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
def close_slice():
global ps_slice
if (ps_slice != []):
@ -521,8 +598,11 @@ def close_slice():
ps_slice = []
return process_info(process_name_slice, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
ps1a = []
process_name_1a = i18n("文本分词与特征提取")
def open1a(inp_text, inp_wav_dir, exp_name, gpu_numbers, bert_pretrained_dir):
global ps1a
inp_text = my_utils.clean_path(inp_text)
@ -574,6 +654,7 @@ def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir):
else:
yield process_info(process_name_1a, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
def close1a():
global ps1a
if ps1a != []:
@ -585,8 +666,11 @@ def close1a():
ps1a = []
return process_info(process_name_1a, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
ps1b = []
process_name_1b = i18n("语音自监督特征提取")
def open1b(inp_text, inp_wav_dir, exp_name, gpu_numbers, ssl_pretrained_dir):
global ps1b
inp_text = my_utils.clean_path(inp_text)
@ -625,6 +709,7 @@ def open1b(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir):
else:
yield process_info(process_name_1b, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
def close1b():
global ps1b
if (ps1b != []):
@ -636,8 +721,11 @@ def close1b():
ps1b = []
return process_info(process_name_1b, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
ps1c = []
process_name_1c = i18n("语义Token提取")
def open1c(inp_text, exp_name, gpu_numbers, pretrained_s2G_path):
global ps1c
inp_text = my_utils.clean_path(inp_text)
@ -685,6 +773,7 @@ def open1c(inp_text,exp_name,gpu_numbers,pretrained_s2G_path):
else:
yield process_info(process_name_1c, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
def close1c():
global ps1c
if (ps1c != []):
@ -696,8 +785,11 @@ def close1c():
ps1c = []
return process_info(process_name_1c, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
ps1abc = []
process_name_1abc = i18n("训练集格式化一键三连")
def open1abc(inp_text, inp_wav_dir, exp_name, gpu_numbers1a, gpu_numbers1Ba, gpu_numbers1c, bert_pretrained_dir, ssl_pretrained_dir, pretrained_s2G_path):
global ps1abc
inp_text = my_utils.clean_path(inp_text)
@ -707,7 +799,7 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
if (ps1abc == []):
opt_dir = "%s/%s" % (exp_root, exp_name)
try:
#############################1a
# 1a
path_text = "%s/2-name2text.txt" % opt_dir
if (os.path.exists(path_text) == False or (os.path.exists(path_text) == True and len(open(path_text, "r", encoding="utf8").read().strip("\n").split("\n")) < 2)):
config = {
@ -734,7 +826,8 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
p = Popen(cmd, shell=True)
ps1abc.append(p)
yield i18n("进度") + ": 1A-Doing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
for p in ps1abc:p.wait()
for p in ps1abc:
p.wait()
opt = []
for i_part in range(all_parts): # txt_path="%s/2-name2text-%s.txt"%(opt_dir,i_part)
@ -747,7 +840,7 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
assert len("".join(opt)) > 0, process_info(process_name_1a, "failed")
yield i18n("进度") + ": 1A-Done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
ps1abc = []
#############################1b
# 1b
config = {
"inp_text": inp_text,
"inp_wav_dir": inp_wav_dir,
@ -771,10 +864,11 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
p = Popen(cmd, shell=True)
ps1abc.append(p)
yield i18n("进度") + ": 1A-Done, 1B-Doing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
for p in ps1abc:p.wait()
for p in ps1abc:
p.wait()
yield i18n("进度") + ": 1A-Done, 1B-Done", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
ps1abc = []
#############################1c
# 1c
path_semantic = "%s/6-name2semantic.tsv" % opt_dir
if (os.path.exists(path_semantic) == False or (os.path.exists(path_semantic) == True and os.path.getsize(path_semantic) < 31)):
config = {
@ -800,7 +894,8 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
p = Popen(cmd, shell=True)
ps1abc.append(p)
yield i18n("进度") + ": 1A-Done, 1B-Done, 1C-Doing", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
for p in ps1abc:p.wait()
for p in ps1abc:
p.wait()
opt = ["item_name\tsemantic_audio"]
for i_part in range(all_parts):
@ -820,6 +915,7 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
else:
yield process_info(process_name_1abc, "occupy"), {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}
def close1abc():
global ps1abc
if (ps1abc != []):
@ -831,11 +927,13 @@ def close1abc():
ps1abc = []
return process_info(process_name_1abc, "closed"), {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False}
def switch_version(version_):
os.environ["version"] = version_
global version
version = version_
if pretrained_sovits_name[int(version[-1])-1] !='' and pretrained_gpt_name[int(version[-1])-1] !='':...
if pretrained_sovits_name[int(version[-1]) - 1] != '' and pretrained_gpt_name[int(version[-1]) - 1] != '':
...
else:
gr.Warning(i18n('未下载模型') + ": " + version.upper())
set_default()
@ -852,23 +950,25 @@ def switch_version(version_):
{'__type__': 'update', "interactive": False if version == "v3" else True, "value": False}, \
{'__type__': 'update', "visible": True if version == "v3" else False}
if os.path.exists('GPT_SoVITS/text/G2PWModel'):...
if os.path.exists('GPT_SoVITS/text/G2PWModel'):
...
else:
cmd = '"%s" GPT_SoVITS/download.py' % python_exec
p = Popen(cmd, shell=True)
p.wait()
def sync(text):
return {'__type__': 'update', 'value': text}
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(
value=
i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + "<br>" + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + "<br>" + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
)
gr.Markdown(
value=
i18n("中文教程文档") + ": " + "https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e"
value=i18n("中文教程文档") + ": " + "https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e"
)
with gr.Tabs():
@ -931,8 +1031,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
def change_lang_choices(key): # 根据选择的模型修改可选的语言
return {"__type__": "update", "choices": asr_dict[key]['lang'], "value": asr_dict[key]['lang'][0]}
def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸
return {"__type__": "update", "choices": asr_dict[key]['size'], "value": asr_dict[key]['size'][-1]}
def change_precision_choices(key): # 根据选择的模型修改可选的语言
if key == "Faster Whisper (多语种)":
if default_batch_size <= 4:
@ -1121,7 +1223,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
version_checkbox.change(switch_version, [version_checkbox], [pretrained_s2G, pretrained_s2D, pretrained_s1, GPT_dropdown, SoVITS_dropdown, batch_size, total_epoch, save_every_epoch, text_low_lr_rate, if_grad_ckpt, batched_infer_enabled, lora_rank])
with gr.TabItem(i18n("2-GPT-SoVITS-变声")):gr.Markdown(value=i18n("施工中,请静候佳音"))
with gr.TabItem(i18n("2-GPT-SoVITS-变声")):
gr.Markdown(value=i18n("施工中,请静候佳音"))
app.queue().launch( # concurrency_count=511, max_size=1022
server_name="0.0.0.0",