mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Merge branch 'RVC-Boss:main' into main
This commit is contained in:
commit
032425857c
@ -12,6 +12,8 @@ bert_path = os.environ.get(
|
||||
)
|
||||
infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
|
||||
infer_ttswebui = int(infer_ttswebui)
|
||||
is_share = os.environ.get("is_share", "False")
|
||||
is_share=eval(is_share)
|
||||
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
||||
is_half = eval(os.environ.get("is_half", "True"))
|
||||
@ -115,7 +117,6 @@ vq_model.eval()
|
||||
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
||||
hz = 50
|
||||
max_sec = config["data"]["max_sec"]
|
||||
# t2s_model = Text2SemanticLightningModule.load_from_checkpoint(checkpoint_path=gpt_path, config=config, map_location="cpu")#########todo
|
||||
t2s_model = Text2SemanticLightningModule(config, "ojbk", is_train=False)
|
||||
t2s_model.load_state_dict(dict_s1["weight"])
|
||||
if is_half == True:
|
||||
@ -149,13 +150,21 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
|
||||
t0 = ttime()
|
||||
prompt_text = prompt_text.strip("\n")
|
||||
prompt_language, text = prompt_language, text.strip("\n")
|
||||
zero_wav = np.zeros(
|
||||
int(hps.data.sampling_rate * 0.3),
|
||||
dtype=np.float16 if is_half == True else np.float32,
|
||||
)
|
||||
with torch.no_grad():
|
||||
wav16k, sr = librosa.load(ref_wav_path, sr=16000) # 派蒙
|
||||
wav16k, sr = librosa.load(ref_wav_path, sr=16000)
|
||||
wav16k = torch.from_numpy(wav16k)
|
||||
zero_wav_torch = torch.from_numpy(zero_wav)
|
||||
if is_half == True:
|
||||
wav16k = wav16k.half().to(device)
|
||||
zero_wav_torch = zero_wav_torch.half().to(device)
|
||||
else:
|
||||
wav16k = wav16k.to(device)
|
||||
zero_wav_torch = zero_wav_torch.to(device)
|
||||
wav16k=torch.cat([wav16k,zero_wav_torch])
|
||||
ssl_content = ssl_model.model(wav16k.unsqueeze(0))[
|
||||
"last_hidden_state"
|
||||
].transpose(
|
||||
@ -170,10 +179,6 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
|
||||
phones1 = cleaned_text_to_sequence(phones1)
|
||||
texts = text.split("\n")
|
||||
audio_opt = []
|
||||
zero_wav = np.zeros(
|
||||
int(hps.data.sampling_rate * 0.3),
|
||||
dtype=np.float16 if is_half == True else np.float32,
|
||||
)
|
||||
for text in texts:
|
||||
# 解决输入目标文本的空行导致报错的问题
|
||||
if (len(text.strip()) == 0):
|
||||
|
@ -1,6 +1,8 @@
|
||||
import time, logging
|
||||
import time
|
||||
import logging
|
||||
import os
|
||||
import random, traceback
|
||||
import random
|
||||
import traceback
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.utils.data
|
||||
@ -12,15 +14,12 @@ from text import cleaned_text_to_sequence
|
||||
from utils import load_wav_to_torch, load_filepaths_and_text
|
||||
import torch.nn.functional as F
|
||||
from functools import lru_cache
|
||||
import torch
|
||||
import requests
|
||||
from scipy.io import wavfile
|
||||
from io import BytesIO
|
||||
|
||||
# from config import exp_dir
|
||||
from my_utils import load_audio
|
||||
|
||||
|
||||
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
|
||||
class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
"""
|
||||
1) loads audio, speaker_id, text pairs
|
||||
@ -44,7 +43,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
|
||||
for line in lines:
|
||||
tmp = line.split("\t")
|
||||
if len(tmp) != 4:
|
||||
if (len(tmp) != 4):
|
||||
continue
|
||||
self.phoneme_data[tmp[0]] = [tmp[1]]
|
||||
|
||||
@ -52,7 +51,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
tmp = self.audiopaths_sid_text
|
||||
leng = len(tmp)
|
||||
min_num = 100
|
||||
if leng < min_num:
|
||||
if (leng < min_num):
|
||||
self.audiopaths_sid_text = []
|
||||
for _ in range(max(2, int(min_num / leng))):
|
||||
self.audiopaths_sid_text += tmp
|
||||
@ -77,20 +76,28 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
for audiopath in tqdm(self.audiopaths_sid_text):
|
||||
try:
|
||||
phoneme = self.phoneme_data[audiopath][0]
|
||||
phoneme = phoneme.split(" ")
|
||||
phoneme = phoneme.split(' ')
|
||||
phoneme_ids = cleaned_text_to_sequence(phoneme)
|
||||
except Exception:
|
||||
print(f"{audiopath} not in self.phoneme_data !")
|
||||
skipped_phone += 1
|
||||
continue
|
||||
|
||||
size = os.path.getsize("%s/%s" % (self.path5, audiopath))
|
||||
duration = size / self.sampling_rate / 2
|
||||
|
||||
if duration == 0:
|
||||
print(f"Zero duration for {audiopath}, skipping...")
|
||||
skipped_dur += 1
|
||||
continue
|
||||
|
||||
if 54 > duration > 0.6 or self.val:
|
||||
audiopaths_sid_text_new.append([audiopath, phoneme_ids])
|
||||
lengths.append(size // (2 * self.hop_length))
|
||||
else:
|
||||
skipped_dur += 1
|
||||
continue
|
||||
|
||||
print("skipped_phone: ", skipped_phone, ", skipped_dur: ", skipped_dur)
|
||||
print("total left: ", len(audiopaths_sid_text_new))
|
||||
assert len(audiopaths_sid_text_new) > 1 # 至少能凑够batch size,这里todo
|
||||
@ -103,10 +110,8 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
try:
|
||||
spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath))
|
||||
with torch.no_grad():
|
||||
ssl = torch.load(
|
||||
"%s/%s.pt" % (self.path4, audiopath), map_location="cpu"
|
||||
)
|
||||
if ssl.shape[-1] != spec.shape[-1]:
|
||||
ssl = torch.load("%s/%s.pt" % (self.path4, audiopath), map_location="cpu")
|
||||
if (ssl.shape[-1] != spec.shape[-1]):
|
||||
typee = ssl.dtype
|
||||
ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)
|
||||
ssl.requires_grad = False
|
||||
@ -117,25 +122,15 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
ssl = torch.zeros(1, 768, 100)
|
||||
text = text[-1:]
|
||||
print("load audio or ssl error!!!!!!", audiopath)
|
||||
# print(ssl.requires_grad,spec.requires_grad,wav.requires_grad,text.requires_grad)
|
||||
return (ssl, spec, wav, text)
|
||||
|
||||
def get_audio(self, filename):
|
||||
audio_array = load_audio(
|
||||
filename, self.sampling_rate
|
||||
) # load_audio的方法是已经归一化到-1~1之间的,不用再/32768
|
||||
# print(filename,audio_array.max(),audio_array.min(),audio_array.mean())
|
||||
audio_array = load_audio(filename, self.sampling_rate) # load_audio的方法是已经归一化到-1~1之间的,不用再/32768
|
||||
audio = torch.FloatTensor(audio_array) # /32768
|
||||
audio_norm = audio
|
||||
audio_norm = audio_norm.unsqueeze(0)
|
||||
spec = spectrogram_torch(
|
||||
audio_norm,
|
||||
self.filter_length,
|
||||
self.sampling_rate,
|
||||
self.hop_length,
|
||||
self.win_length,
|
||||
center=False,
|
||||
)
|
||||
spec = spectrogram_torch(audio_norm, self.filter_length, self.sampling_rate, self.hop_length, self.win_length,
|
||||
center=False)
|
||||
spec = torch.squeeze(spec, 0)
|
||||
return spec, audio_norm
|
||||
|
||||
@ -152,14 +147,11 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
|
||||
def random_slice(self, ssl, wav, mel):
|
||||
assert abs(ssl.shape[-1] - wav.shape[-1] // self.hop_length) < 3, (
|
||||
"first",
|
||||
ssl.shape,
|
||||
wav.shape,
|
||||
)
|
||||
"first", ssl.shape, wav.shape)
|
||||
|
||||
len_mel = mel.shape[1]
|
||||
if self.val:
|
||||
reference_mel = mel[:, : len_mel // 3]
|
||||
reference_mel = mel[:, :len_mel // 3]
|
||||
return reference_mel, ssl, wav, mel
|
||||
dir = random.randint(0, 1)
|
||||
sep_point = random.randint(int(len_mel // 3), int(len_mel // 3 * 2))
|
||||
@ -167,29 +159,22 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||
if dir == 0:
|
||||
reference_mel = mel[:, :sep_point]
|
||||
ssl = ssl[:, :, sep_point:]
|
||||
wav2 = wav[:, sep_point * self.hop_length :]
|
||||
wav2 = wav[:, sep_point * self.hop_length:]
|
||||
mel = mel[:, sep_point:]
|
||||
else:
|
||||
reference_mel = mel[:, sep_point:]
|
||||
ssl = ssl[:, :, :sep_point]
|
||||
wav2 = wav[:, : sep_point * self.hop_length]
|
||||
wav2 = wav[:, :sep_point * self.hop_length]
|
||||
mel = mel[:, :sep_point]
|
||||
|
||||
assert abs(ssl.shape[-1] - wav2.shape[-1] // self.hop_length) < 3, (
|
||||
ssl.shape,
|
||||
wav.shape,
|
||||
wav2.shape,
|
||||
mel.shape,
|
||||
sep_point,
|
||||
self.hop_length,
|
||||
sep_point * self.hop_length,
|
||||
dir,
|
||||
)
|
||||
ssl.shape, wav.shape, wav2.shape, mel.shape, sep_point, self.hop_length, sep_point * self.hop_length, dir)
|
||||
return reference_mel, ssl, wav2, mel
|
||||
|
||||
|
||||
class TextAudioSpeakerCollate:
|
||||
"""Zero-pads model inputs and targets"""
|
||||
class TextAudioSpeakerCollate():
|
||||
""" Zero-pads model inputs and targets
|
||||
"""
|
||||
|
||||
def __init__(self, return_ids=False):
|
||||
self.return_ids = return_ids
|
||||
@ -202,8 +187,8 @@ class TextAudioSpeakerCollate:
|
||||
"""
|
||||
# Right zero-pad all one-hot text sequences to max input length
|
||||
_, ids_sorted_decreasing = torch.sort(
|
||||
torch.LongTensor([x[1].size(1) for x in batch]), dim=0, descending=True
|
||||
)
|
||||
torch.LongTensor([x[1].size(1) for x in batch]),
|
||||
dim=0, descending=True)
|
||||
|
||||
max_ssl_len = max([x[0].size(2) for x in batch])
|
||||
max_ssl_len = int(2 * ((max_ssl_len // 2) + 1))
|
||||
@ -231,31 +216,22 @@ class TextAudioSpeakerCollate:
|
||||
row = batch[ids_sorted_decreasing[i]]
|
||||
|
||||
ssl = row[0]
|
||||
ssl_padded[i, :, : ssl.size(2)] = ssl[0, :, :]
|
||||
ssl_padded[i, :, :ssl.size(2)] = ssl[0, :, :]
|
||||
ssl_lengths[i] = ssl.size(2)
|
||||
|
||||
spec = row[1]
|
||||
spec_padded[i, :, : spec.size(1)] = spec
|
||||
spec_padded[i, :, :spec.size(1)] = spec
|
||||
spec_lengths[i] = spec.size(1)
|
||||
|
||||
wav = row[2]
|
||||
wav_padded[i, :, : wav.size(1)] = wav
|
||||
wav_padded[i, :, :wav.size(1)] = wav
|
||||
wav_lengths[i] = wav.size(1)
|
||||
|
||||
text = row[3]
|
||||
text_padded[i, : text.size(0)] = text
|
||||
text_padded[i, :text.size(0)] = text
|
||||
text_lengths[i] = text.size(0)
|
||||
|
||||
return (
|
||||
ssl_padded,
|
||||
ssl_lengths,
|
||||
spec_padded,
|
||||
spec_lengths,
|
||||
wav_padded,
|
||||
wav_lengths,
|
||||
text_padded,
|
||||
text_lengths,
|
||||
)
|
||||
return ssl_padded, ssl_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, text_padded, text_lengths
|
||||
|
||||
|
||||
class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
|
||||
@ -268,18 +244,9 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
|
||||
Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dataset,
|
||||
batch_size,
|
||||
boundaries,
|
||||
num_replicas=None,
|
||||
rank=None,
|
||||
shuffle=True,
|
||||
):
|
||||
def __init__(self, dataset, batch_size, boundaries, num_replicas=None, rank=None, shuffle=True):
|
||||
super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
|
||||
self.lengths = dataset.lengths
|
||||
# print(233333333333333,self.lengths,dir(dataset))
|
||||
self.batch_size = batch_size
|
||||
self.boundaries = boundaries
|
||||
|
||||
@ -295,24 +262,22 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
|
||||
if idx_bucket != -1:
|
||||
buckets[idx_bucket].append(i)
|
||||
|
||||
for i in range(len(buckets) - 1, 0, -1):
|
||||
# for i in range(len(buckets) - 1, -1, -1):
|
||||
i = len(buckets) - 1
|
||||
while i >= 0:
|
||||
if len(buckets[i]) == 0:
|
||||
buckets.pop(i)
|
||||
self.boundaries.pop(i + 1)
|
||||
i -= 1
|
||||
|
||||
num_samples_per_bucket = []
|
||||
for i in range(len(buckets)):
|
||||
len_bucket = len(buckets[i])
|
||||
total_batch_size = self.num_replicas * self.batch_size
|
||||
rem = (
|
||||
total_batch_size - (len_bucket % total_batch_size)
|
||||
) % total_batch_size
|
||||
rem = (total_batch_size - (len_bucket % total_batch_size)) % total_batch_size
|
||||
num_samples_per_bucket.append(len_bucket + rem)
|
||||
return buckets, num_samples_per_bucket
|
||||
|
||||
def __iter__(self):
|
||||
# deterministically shuffle based on epoch
|
||||
g = torch.Generator()
|
||||
g.manual_seed(self.epoch)
|
||||
|
||||
@ -331,25 +296,13 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
|
||||
ids_bucket = indices[i]
|
||||
num_samples_bucket = self.num_samples_per_bucket[i]
|
||||
|
||||
# add extra samples to make it evenly divisible
|
||||
rem = num_samples_bucket - len_bucket
|
||||
ids_bucket = (
|
||||
ids_bucket
|
||||
+ ids_bucket * (rem // len_bucket)
|
||||
+ ids_bucket[: (rem % len_bucket)]
|
||||
)
|
||||
ids_bucket = ids_bucket + ids_bucket * (rem // len_bucket) + ids_bucket[:(rem % len_bucket)]
|
||||
|
||||
# subsample
|
||||
ids_bucket = ids_bucket[self.rank :: self.num_replicas]
|
||||
ids_bucket = ids_bucket[self.rank::self.num_replicas]
|
||||
|
||||
# batching
|
||||
for j in range(len(ids_bucket) // self.batch_size):
|
||||
batch = [
|
||||
bucket[idx]
|
||||
for idx in ids_bucket[
|
||||
j * self.batch_size : (j + 1) * self.batch_size
|
||||
]
|
||||
]
|
||||
batch = [bucket[idx] for idx in ids_bucket[j * self.batch_size:(j + 1) * self.batch_size]]
|
||||
batches.append(batch)
|
||||
|
||||
if self.shuffle:
|
||||
@ -376,4 +329,4 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
|
||||
return -1
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples // self.batch_size
|
||||
return self.num_samples // self.batch_size
|
@ -5,6 +5,7 @@ import sys
|
||||
sovits_path = ""
|
||||
gpt_path = ""
|
||||
is_half = True
|
||||
is_share=False
|
||||
|
||||
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
||||
bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
|
||||
|
25
docs/cn/Changelog_CN.md
Normal file
25
docs/cn/Changelog_CN.md
Normal file
@ -0,0 +1,25 @@
|
||||
### 20240121更新
|
||||
|
||||
1-config添加is_share,诸如colab等场景可以将此改为True,来使得webui映射到公网
|
||||
|
||||
2-WebUI添加英文系统英文翻译适配
|
||||
|
||||
3-cmd-asr自动判断是否已自带damo模型,如不在默认目录上将从modelscope自带下载
|
||||
|
||||
4-[SoVITS训练报错ZeroDivisionError](https://github.com/RVC-Boss/GPT-SoVITS/issues/79) 尝试修复(过滤长度0的样本等)
|
||||
|
||||
5-清理TEMP文件夹缓存音频等文件
|
||||
|
||||
6-在参考音频结尾留空0.3s,削弱合成音频包含参考音频结尾的问题
|
||||
|
||||
待修复:
|
||||
|
||||
1-过短输出文件返回重复参考音频的问题
|
||||
|
||||
2-batch size超过条数导致微调有问题
|
||||
|
||||
3-hubert提取在half下出现nan概率更高的问题
|
||||
|
||||
高优:
|
||||
|
||||
支持英文日文训练
|
@ -1,7 +1,7 @@
|
||||
<div align="center">
|
||||
|
||||
<h1>GPT-SoVITS-WebUI</h1>
|
||||
少样本强大的声音转换与文本到语音网络界面。<br><br>
|
||||
强大的少样本语音转换与语音合成Web用户界面。<br><br>
|
||||
|
||||
[](https://github.com/RVC-Boss/GPT-SoVITS)
|
||||
|
@ -20,3 +20,4 @@ transformers
|
||||
chardet
|
||||
PyYAML
|
||||
psutil
|
||||
jieba
|
||||
|
@ -6,11 +6,18 @@ import sys,os,traceback
|
||||
dir=sys.argv[1]
|
||||
# opt_name=dir.split("\\")[-1].split("/")[-1]
|
||||
opt_name=os.path.basename(dir)
|
||||
|
||||
path_asr='tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
|
||||
path_vad='tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
|
||||
path_punc='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
|
||||
path_asr=path_asr if os.path.exists(path_asr)else "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
|
||||
path_vad=path_vad if os.path.exists(path_vad)else "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
||||
path_punc=path_punc if os.path.exists(path_punc)else "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
||||
inference_pipeline = pipeline(
|
||||
task=Tasks.auto_speech_recognition,
|
||||
model='tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
|
||||
vad_model='tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch',
|
||||
punc_model='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
|
||||
model=path_asr,
|
||||
vad_model=path_vad,
|
||||
punc_model=path_punc,
|
||||
)
|
||||
|
||||
opt=[]
|
||||
|
@ -79,6 +79,7 @@ def b_change_index(index, batch):
|
||||
|
||||
|
||||
def b_next_index(index, batch):
|
||||
b_save_file()
|
||||
if (index + batch) <= g_max_json_index:
|
||||
return index + batch , *b_change_index(index + batch, batch)
|
||||
else:
|
||||
@ -86,6 +87,7 @@ def b_next_index(index, batch):
|
||||
|
||||
|
||||
def b_previous_index(index, batch):
|
||||
b_save_file()
|
||||
if (index - batch) >= 0:
|
||||
return index - batch , *b_change_index(index - batch, batch)
|
||||
else:
|
||||
@ -294,6 +296,7 @@ def set_global(load_json, load_list, json_key_text, json_key_path, batch):
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||
parser.add_argument('--load_json', default="None", help='source file, like demo.json')
|
||||
parser.add_argument('--is_share', default="False", help='whether webui is_share=True')
|
||||
parser.add_argument('--load_list', default="None", help='source file, like demo.list')
|
||||
parser.add_argument('--webui_port_subfix', default=9871, help='source file, like demo.list')
|
||||
parser.add_argument('--json_key_text', default="text", help='the text key name in json, Default: text')
|
||||
@ -488,5 +491,6 @@ if __name__ == "__main__":
|
||||
server_name="0.0.0.0",
|
||||
inbrowser=True,
|
||||
quiet=True,
|
||||
share=eval(args.is_share),
|
||||
server_port=int(args.webui_port_subfix)
|
||||
)
|
@ -19,7 +19,8 @@ for name in os.listdir(weight_uvr5_root):
|
||||
|
||||
device=sys.argv[1]
|
||||
is_half=sys.argv[2]
|
||||
|
||||
webui_port_uvr5=int(sys.argv[3])
|
||||
is_share=eval(sys.argv[4])
|
||||
|
||||
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
|
||||
infos = []
|
||||
@ -179,6 +180,7 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(
|
||||
server_name="0.0.0.0",
|
||||
inbrowser=True,
|
||||
server_port=9873,
|
||||
share=is_share,
|
||||
server_port=webui_port_uvr5,
|
||||
quiet=True,
|
||||
)
|
19
webui.py
19
webui.py
@ -1,7 +1,7 @@
|
||||
import json,yaml,warnings,torch
|
||||
import platform
|
||||
import psutil
|
||||
import os
|
||||
import os,shutil
|
||||
import signal
|
||||
from tools import my_utils
|
||||
|
||||
@ -12,6 +12,12 @@ now_dir = os.getcwd()
|
||||
tmp = os.path.join(now_dir, "TEMP")
|
||||
os.makedirs(tmp, exist_ok=True)
|
||||
os.environ["TEMP"] = tmp
|
||||
if(os.path.exists(tmp)):
|
||||
for name in os.listdir(tmp):
|
||||
if(name=="jieba.cache"):continue
|
||||
path="%s/%s"%(tmp,name)
|
||||
delete=os.remove if os.path.isfile(path) else shutil.rmtree
|
||||
delete(path)
|
||||
import site
|
||||
site_packages_roots = []
|
||||
for path in site.getsitepackages():
|
||||
@ -34,7 +40,7 @@ import pdb
|
||||
import gradio as gr
|
||||
from subprocess import Popen
|
||||
import signal
|
||||
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix
|
||||
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
i18n = I18nAuto()
|
||||
from scipy.io import wavfile
|
||||
@ -120,7 +126,7 @@ def kill_process(pid):
|
||||
def change_label(if_label,path_list):
|
||||
global p_label
|
||||
if(if_label==True and p_label==None):
|
||||
cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s'%(python_exec,path_list,webui_port_subfix)
|
||||
cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s'%(python_exec,path_list,webui_port_subfix,is_share)
|
||||
yield i18n("打标工具WebUI已开启")
|
||||
print(cmd)
|
||||
p_label = Popen(cmd, shell=True)
|
||||
@ -132,7 +138,7 @@ def change_label(if_label,path_list):
|
||||
def change_uvr5(if_uvr5):
|
||||
global p_uvr5
|
||||
if(if_uvr5==True and p_uvr5==None):
|
||||
cmd = '"%s" tools/uvr5/webui.py "%s" %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5)
|
||||
cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5,is_share)
|
||||
yield i18n("UVR5已开启")
|
||||
print(cmd)
|
||||
p_uvr5 = Popen(cmd, shell=True)
|
||||
@ -151,6 +157,7 @@ def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path
|
||||
os.environ["_CUDA_VISIBLE_DEVICES"]=gpu_number
|
||||
os.environ["is_half"]=str(is_half)
|
||||
os.environ["infer_ttswebui"]=str(webui_port_infer_tts)
|
||||
os.environ["is_share"]=str(is_share)
|
||||
cmd = '"%s" GPT_SoVITS/inference_webui.py'%(python_exec)
|
||||
yield i18n("TTS推理进程已开启")
|
||||
print(cmd)
|
||||
@ -659,7 +666,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
label=i18n("*训练集音频文件目录"),
|
||||
# value=r"D:\RVC1006\GPT-SoVITS\raw\xxx",
|
||||
interactive=True,
|
||||
placeholder=i18n("训练集音频文件目录 拼接 list文件里波形对应的文件名。")
|
||||
placeholder=i18n("训练集音频文件目录-拼接-list文件里波形对应的文件名(不是全路径)。")
|
||||
)
|
||||
gr.Markdown(value=i18n("1Aa-文本内容"))
|
||||
with gr.Row():
|
||||
@ -740,7 +747,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(
|
||||
server_name="0.0.0.0",
|
||||
inbrowser=True,
|
||||
share=True,
|
||||
share=is_share,
|
||||
server_port=webui_port_main,
|
||||
quiet=True,
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user