Merge branch 'RVC-Boss:main' into main

This commit is contained in:
梨梨梨 2024-01-22 02:03:46 +08:00 committed by GitHub
commit 032425857c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 115 additions and 110 deletions

View File

@ -12,6 +12,8 @@ bert_path = os.environ.get(
) )
infer_ttswebui = os.environ.get("infer_ttswebui", 9872) infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
infer_ttswebui = int(infer_ttswebui) infer_ttswebui = int(infer_ttswebui)
is_share = os.environ.get("is_share", "False")
is_share=eval(is_share)
if "_CUDA_VISIBLE_DEVICES" in os.environ: if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"] os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
is_half = eval(os.environ.get("is_half", "True")) is_half = eval(os.environ.get("is_half", "True"))
@ -115,7 +117,6 @@ vq_model.eval()
print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
hz = 50 hz = 50
max_sec = config["data"]["max_sec"] max_sec = config["data"]["max_sec"]
# t2s_model = Text2SemanticLightningModule.load_from_checkpoint(checkpoint_path=gpt_path, config=config, map_location="cpu")#########todo
t2s_model = Text2SemanticLightningModule(config, "ojbk", is_train=False) t2s_model = Text2SemanticLightningModule(config, "ojbk", is_train=False)
t2s_model.load_state_dict(dict_s1["weight"]) t2s_model.load_state_dict(dict_s1["weight"])
if is_half == True: if is_half == True:
@ -149,13 +150,21 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
t0 = ttime() t0 = ttime()
prompt_text = prompt_text.strip("\n") prompt_text = prompt_text.strip("\n")
prompt_language, text = prompt_language, text.strip("\n") prompt_language, text = prompt_language, text.strip("\n")
zero_wav = np.zeros(
int(hps.data.sampling_rate * 0.3),
dtype=np.float16 if is_half == True else np.float32,
)
with torch.no_grad(): with torch.no_grad():
wav16k, sr = librosa.load(ref_wav_path, sr=16000) # 派蒙 wav16k, sr = librosa.load(ref_wav_path, sr=16000)
wav16k = torch.from_numpy(wav16k) wav16k = torch.from_numpy(wav16k)
zero_wav_torch = torch.from_numpy(zero_wav)
if is_half == True: if is_half == True:
wav16k = wav16k.half().to(device) wav16k = wav16k.half().to(device)
zero_wav_torch = zero_wav_torch.half().to(device)
else: else:
wav16k = wav16k.to(device) wav16k = wav16k.to(device)
zero_wav_torch = zero_wav_torch.to(device)
wav16k=torch.cat([wav16k,zero_wav_torch])
ssl_content = ssl_model.model(wav16k.unsqueeze(0))[ ssl_content = ssl_model.model(wav16k.unsqueeze(0))[
"last_hidden_state" "last_hidden_state"
].transpose( ].transpose(
@ -170,10 +179,6 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
phones1 = cleaned_text_to_sequence(phones1) phones1 = cleaned_text_to_sequence(phones1)
texts = text.split("\n") texts = text.split("\n")
audio_opt = [] audio_opt = []
zero_wav = np.zeros(
int(hps.data.sampling_rate * 0.3),
dtype=np.float16 if is_half == True else np.float32,
)
for text in texts: for text in texts:
# 解决输入目标文本的空行导致报错的问题 # 解决输入目标文本的空行导致报错的问题
if (len(text.strip()) == 0): if (len(text.strip()) == 0):

View File

@ -1,6 +1,8 @@
import time, logging import time
import logging
import os import os
import random, traceback import random
import traceback
import numpy as np import numpy as np
import torch import torch
import torch.utils.data import torch.utils.data
@ -12,15 +14,12 @@ from text import cleaned_text_to_sequence
from utils import load_wav_to_torch, load_filepaths_and_text from utils import load_wav_to_torch, load_filepaths_and_text
import torch.nn.functional as F import torch.nn.functional as F
from functools import lru_cache from functools import lru_cache
import torch
import requests import requests
from scipy.io import wavfile from scipy.io import wavfile
from io import BytesIO from io import BytesIO
# from config import exp_dir
from my_utils import load_audio from my_utils import load_audio
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
class TextAudioSpeakerLoader(torch.utils.data.Dataset): class TextAudioSpeakerLoader(torch.utils.data.Dataset):
""" """
1) loads audio, speaker_id, text pairs 1) loads audio, speaker_id, text pairs
@ -44,7 +43,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
for line in lines: for line in lines:
tmp = line.split("\t") tmp = line.split("\t")
if len(tmp) != 4: if (len(tmp) != 4):
continue continue
self.phoneme_data[tmp[0]] = [tmp[1]] self.phoneme_data[tmp[0]] = [tmp[1]]
@ -52,7 +51,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
tmp = self.audiopaths_sid_text tmp = self.audiopaths_sid_text
leng = len(tmp) leng = len(tmp)
min_num = 100 min_num = 100
if leng < min_num: if (leng < min_num):
self.audiopaths_sid_text = [] self.audiopaths_sid_text = []
for _ in range(max(2, int(min_num / leng))): for _ in range(max(2, int(min_num / leng))):
self.audiopaths_sid_text += tmp self.audiopaths_sid_text += tmp
@ -77,20 +76,28 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
for audiopath in tqdm(self.audiopaths_sid_text): for audiopath in tqdm(self.audiopaths_sid_text):
try: try:
phoneme = self.phoneme_data[audiopath][0] phoneme = self.phoneme_data[audiopath][0]
phoneme = phoneme.split(" ") phoneme = phoneme.split(' ')
phoneme_ids = cleaned_text_to_sequence(phoneme) phoneme_ids = cleaned_text_to_sequence(phoneme)
except Exception: except Exception:
print(f"{audiopath} not in self.phoneme_data !") print(f"{audiopath} not in self.phoneme_data !")
skipped_phone += 1 skipped_phone += 1
continue continue
size = os.path.getsize("%s/%s" % (self.path5, audiopath)) size = os.path.getsize("%s/%s" % (self.path5, audiopath))
duration = size / self.sampling_rate / 2 duration = size / self.sampling_rate / 2
if duration == 0:
print(f"Zero duration for {audiopath}, skipping...")
skipped_dur += 1
continue
if 54 > duration > 0.6 or self.val: if 54 > duration > 0.6 or self.val:
audiopaths_sid_text_new.append([audiopath, phoneme_ids]) audiopaths_sid_text_new.append([audiopath, phoneme_ids])
lengths.append(size // (2 * self.hop_length)) lengths.append(size // (2 * self.hop_length))
else: else:
skipped_dur += 1 skipped_dur += 1
continue continue
print("skipped_phone: ", skipped_phone, ", skipped_dur: ", skipped_dur) print("skipped_phone: ", skipped_phone, ", skipped_dur: ", skipped_dur)
print("total left: ", len(audiopaths_sid_text_new)) print("total left: ", len(audiopaths_sid_text_new))
assert len(audiopaths_sid_text_new) > 1 # 至少能凑够batch size这里todo assert len(audiopaths_sid_text_new) > 1 # 至少能凑够batch size这里todo
@ -103,10 +110,8 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
try: try:
spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath)) spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath))
with torch.no_grad(): with torch.no_grad():
ssl = torch.load( ssl = torch.load("%s/%s.pt" % (self.path4, audiopath), map_location="cpu")
"%s/%s.pt" % (self.path4, audiopath), map_location="cpu" if (ssl.shape[-1] != spec.shape[-1]):
)
if ssl.shape[-1] != spec.shape[-1]:
typee = ssl.dtype typee = ssl.dtype
ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee) ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)
ssl.requires_grad = False ssl.requires_grad = False
@ -117,25 +122,15 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
ssl = torch.zeros(1, 768, 100) ssl = torch.zeros(1, 768, 100)
text = text[-1:] text = text[-1:]
print("load audio or ssl error!!!!!!", audiopath) print("load audio or ssl error!!!!!!", audiopath)
# print(ssl.requires_grad,spec.requires_grad,wav.requires_grad,text.requires_grad)
return (ssl, spec, wav, text) return (ssl, spec, wav, text)
def get_audio(self, filename): def get_audio(self, filename):
audio_array = load_audio( audio_array = load_audio(filename, self.sampling_rate) # load_audio的方法是已经归一化到-1~1之间的不用再/32768
filename, self.sampling_rate
) # load_audio的方法是已经归一化到-1~1之间的不用再/32768
# print(filename,audio_array.max(),audio_array.min(),audio_array.mean())
audio = torch.FloatTensor(audio_array) # /32768 audio = torch.FloatTensor(audio_array) # /32768
audio_norm = audio audio_norm = audio
audio_norm = audio_norm.unsqueeze(0) audio_norm = audio_norm.unsqueeze(0)
spec = spectrogram_torch( spec = spectrogram_torch(audio_norm, self.filter_length, self.sampling_rate, self.hop_length, self.win_length,
audio_norm, center=False)
self.filter_length,
self.sampling_rate,
self.hop_length,
self.win_length,
center=False,
)
spec = torch.squeeze(spec, 0) spec = torch.squeeze(spec, 0)
return spec, audio_norm return spec, audio_norm
@ -152,14 +147,11 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
def random_slice(self, ssl, wav, mel): def random_slice(self, ssl, wav, mel):
assert abs(ssl.shape[-1] - wav.shape[-1] // self.hop_length) < 3, ( assert abs(ssl.shape[-1] - wav.shape[-1] // self.hop_length) < 3, (
"first", "first", ssl.shape, wav.shape)
ssl.shape,
wav.shape,
)
len_mel = mel.shape[1] len_mel = mel.shape[1]
if self.val: if self.val:
reference_mel = mel[:, : len_mel // 3] reference_mel = mel[:, :len_mel // 3]
return reference_mel, ssl, wav, mel return reference_mel, ssl, wav, mel
dir = random.randint(0, 1) dir = random.randint(0, 1)
sep_point = random.randint(int(len_mel // 3), int(len_mel // 3 * 2)) sep_point = random.randint(int(len_mel // 3), int(len_mel // 3 * 2))
@ -167,29 +159,22 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
if dir == 0: if dir == 0:
reference_mel = mel[:, :sep_point] reference_mel = mel[:, :sep_point]
ssl = ssl[:, :, sep_point:] ssl = ssl[:, :, sep_point:]
wav2 = wav[:, sep_point * self.hop_length :] wav2 = wav[:, sep_point * self.hop_length:]
mel = mel[:, sep_point:] mel = mel[:, sep_point:]
else: else:
reference_mel = mel[:, sep_point:] reference_mel = mel[:, sep_point:]
ssl = ssl[:, :, :sep_point] ssl = ssl[:, :, :sep_point]
wav2 = wav[:, : sep_point * self.hop_length] wav2 = wav[:, :sep_point * self.hop_length]
mel = mel[:, :sep_point] mel = mel[:, :sep_point]
assert abs(ssl.shape[-1] - wav2.shape[-1] // self.hop_length) < 3, ( assert abs(ssl.shape[-1] - wav2.shape[-1] // self.hop_length) < 3, (
ssl.shape, ssl.shape, wav.shape, wav2.shape, mel.shape, sep_point, self.hop_length, sep_point * self.hop_length, dir)
wav.shape,
wav2.shape,
mel.shape,
sep_point,
self.hop_length,
sep_point * self.hop_length,
dir,
)
return reference_mel, ssl, wav2, mel return reference_mel, ssl, wav2, mel
class TextAudioSpeakerCollate: class TextAudioSpeakerCollate():
"""Zero-pads model inputs and targets""" """ Zero-pads model inputs and targets
"""
def __init__(self, return_ids=False): def __init__(self, return_ids=False):
self.return_ids = return_ids self.return_ids = return_ids
@ -202,8 +187,8 @@ class TextAudioSpeakerCollate:
""" """
# Right zero-pad all one-hot text sequences to max input length # Right zero-pad all one-hot text sequences to max input length
_, ids_sorted_decreasing = torch.sort( _, ids_sorted_decreasing = torch.sort(
torch.LongTensor([x[1].size(1) for x in batch]), dim=0, descending=True torch.LongTensor([x[1].size(1) for x in batch]),
) dim=0, descending=True)
max_ssl_len = max([x[0].size(2) for x in batch]) max_ssl_len = max([x[0].size(2) for x in batch])
max_ssl_len = int(2 * ((max_ssl_len // 2) + 1)) max_ssl_len = int(2 * ((max_ssl_len // 2) + 1))
@ -231,31 +216,22 @@ class TextAudioSpeakerCollate:
row = batch[ids_sorted_decreasing[i]] row = batch[ids_sorted_decreasing[i]]
ssl = row[0] ssl = row[0]
ssl_padded[i, :, : ssl.size(2)] = ssl[0, :, :] ssl_padded[i, :, :ssl.size(2)] = ssl[0, :, :]
ssl_lengths[i] = ssl.size(2) ssl_lengths[i] = ssl.size(2)
spec = row[1] spec = row[1]
spec_padded[i, :, : spec.size(1)] = spec spec_padded[i, :, :spec.size(1)] = spec
spec_lengths[i] = spec.size(1) spec_lengths[i] = spec.size(1)
wav = row[2] wav = row[2]
wav_padded[i, :, : wav.size(1)] = wav wav_padded[i, :, :wav.size(1)] = wav
wav_lengths[i] = wav.size(1) wav_lengths[i] = wav.size(1)
text = row[3] text = row[3]
text_padded[i, : text.size(0)] = text text_padded[i, :text.size(0)] = text
text_lengths[i] = text.size(0) text_lengths[i] = text.size(0)
return ( return ssl_padded, ssl_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, text_padded, text_lengths
ssl_padded,
ssl_lengths,
spec_padded,
spec_lengths,
wav_padded,
wav_lengths,
text_padded,
text_lengths,
)
class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler): class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
@ -268,18 +244,9 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded. Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded.
""" """
def __init__( def __init__(self, dataset, batch_size, boundaries, num_replicas=None, rank=None, shuffle=True):
self,
dataset,
batch_size,
boundaries,
num_replicas=None,
rank=None,
shuffle=True,
):
super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
self.lengths = dataset.lengths self.lengths = dataset.lengths
# print(233333333333333,self.lengths,dir(dataset))
self.batch_size = batch_size self.batch_size = batch_size
self.boundaries = boundaries self.boundaries = boundaries
@ -295,24 +262,22 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
if idx_bucket != -1: if idx_bucket != -1:
buckets[idx_bucket].append(i) buckets[idx_bucket].append(i)
for i in range(len(buckets) - 1, 0, -1): i = len(buckets) - 1
# for i in range(len(buckets) - 1, -1, -1): while i >= 0:
if len(buckets[i]) == 0: if len(buckets[i]) == 0:
buckets.pop(i) buckets.pop(i)
self.boundaries.pop(i + 1) self.boundaries.pop(i + 1)
i -= 1
num_samples_per_bucket = [] num_samples_per_bucket = []
for i in range(len(buckets)): for i in range(len(buckets)):
len_bucket = len(buckets[i]) len_bucket = len(buckets[i])
total_batch_size = self.num_replicas * self.batch_size total_batch_size = self.num_replicas * self.batch_size
rem = ( rem = (total_batch_size - (len_bucket % total_batch_size)) % total_batch_size
total_batch_size - (len_bucket % total_batch_size)
) % total_batch_size
num_samples_per_bucket.append(len_bucket + rem) num_samples_per_bucket.append(len_bucket + rem)
return buckets, num_samples_per_bucket return buckets, num_samples_per_bucket
def __iter__(self): def __iter__(self):
# deterministically shuffle based on epoch
g = torch.Generator() g = torch.Generator()
g.manual_seed(self.epoch) g.manual_seed(self.epoch)
@ -331,25 +296,13 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
ids_bucket = indices[i] ids_bucket = indices[i]
num_samples_bucket = self.num_samples_per_bucket[i] num_samples_bucket = self.num_samples_per_bucket[i]
# add extra samples to make it evenly divisible
rem = num_samples_bucket - len_bucket rem = num_samples_bucket - len_bucket
ids_bucket = ( ids_bucket = ids_bucket + ids_bucket * (rem // len_bucket) + ids_bucket[:(rem % len_bucket)]
ids_bucket
+ ids_bucket * (rem // len_bucket)
+ ids_bucket[: (rem % len_bucket)]
)
# subsample ids_bucket = ids_bucket[self.rank::self.num_replicas]
ids_bucket = ids_bucket[self.rank :: self.num_replicas]
# batching
for j in range(len(ids_bucket) // self.batch_size): for j in range(len(ids_bucket) // self.batch_size):
batch = [ batch = [bucket[idx] for idx in ids_bucket[j * self.batch_size:(j + 1) * self.batch_size]]
bucket[idx]
for idx in ids_bucket[
j * self.batch_size : (j + 1) * self.batch_size
]
]
batches.append(batch) batches.append(batch)
if self.shuffle: if self.shuffle:
@ -376,4 +329,4 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
return -1 return -1
def __len__(self): def __len__(self):
return self.num_samples // self.batch_size return self.num_samples // self.batch_size

View File

@ -5,6 +5,7 @@ import sys
sovits_path = "" sovits_path = ""
gpt_path = "" gpt_path = ""
is_half = True is_half = True
is_share=False
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base" cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"

25
docs/cn/Changelog_CN.md Normal file
View File

@ -0,0 +1,25 @@
### 20240121更新
1-config添加is_share诸如colab等场景可以将此改为True来使得webui映射到公网
2-WebUI添加英文系统英文翻译适配
3-cmd-asr自动判断是否已自带damo模型如不在默认目录上将从modelscope自带下载
4-[SoVITS训练报错ZeroDivisionError](https://github.com/RVC-Boss/GPT-SoVITS/issues/79) 尝试修复过滤长度0的样本等
5-清理TEMP文件夹缓存音频等文件
6-在参考音频结尾留空0.3s,削弱合成音频包含参考音频结尾的问题
待修复:
1-过短输出文件返回重复参考音频的问题
2-batch size超过条数导致微调有问题
3-hubert提取在half下出现nan概率更高的问题
高优:
支持英文日文训练

View File

@ -1,7 +1,7 @@
<div align="center"> <div align="center">
<h1>GPT-SoVITS-WebUI</h1> <h1>GPT-SoVITS-WebUI</h1>
少样本强大的声音转换与文本到语音网络界面。<br><br> 强大的少样本语音转换与语音合成Web用户界面。<br><br>
[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange
)](https://github.com/RVC-Boss/GPT-SoVITS) )](https://github.com/RVC-Boss/GPT-SoVITS)

View File

@ -20,3 +20,4 @@ transformers
chardet chardet
PyYAML PyYAML
psutil psutil
jieba

View File

@ -6,11 +6,18 @@ import sys,os,traceback
dir=sys.argv[1] dir=sys.argv[1]
# opt_name=dir.split("\\")[-1].split("/")[-1] # opt_name=dir.split("\\")[-1].split("/")[-1]
opt_name=os.path.basename(dir) opt_name=os.path.basename(dir)
path_asr='tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
path_vad='tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
path_punc='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
path_asr=path_asr if os.path.exists(path_asr)else "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
path_vad=path_vad if os.path.exists(path_vad)else "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
path_punc=path_punc if os.path.exists(path_punc)else "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
inference_pipeline = pipeline( inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition, task=Tasks.auto_speech_recognition,
model='tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', model=path_asr,
vad_model='tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch', vad_model=path_vad,
punc_model='tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', punc_model=path_punc,
) )
opt=[] opt=[]

View File

@ -79,6 +79,7 @@ def b_change_index(index, batch):
def b_next_index(index, batch): def b_next_index(index, batch):
b_save_file()
if (index + batch) <= g_max_json_index: if (index + batch) <= g_max_json_index:
return index + batch , *b_change_index(index + batch, batch) return index + batch , *b_change_index(index + batch, batch)
else: else:
@ -86,6 +87,7 @@ def b_next_index(index, batch):
def b_previous_index(index, batch): def b_previous_index(index, batch):
b_save_file()
if (index - batch) >= 0: if (index - batch) >= 0:
return index - batch , *b_change_index(index - batch, batch) return index - batch , *b_change_index(index - batch, batch)
else: else:
@ -294,6 +296,7 @@ def set_global(load_json, load_list, json_key_text, json_key_path, batch):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process some integers.') parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--load_json', default="None", help='source file, like demo.json') parser.add_argument('--load_json', default="None", help='source file, like demo.json')
parser.add_argument('--is_share', default="False", help='whether webui is_share=True')
parser.add_argument('--load_list', default="None", help='source file, like demo.list') parser.add_argument('--load_list', default="None", help='source file, like demo.list')
parser.add_argument('--webui_port_subfix', default=9871, help='source file, like demo.list') parser.add_argument('--webui_port_subfix', default=9871, help='source file, like demo.list')
parser.add_argument('--json_key_text', default="text", help='the text key name in json, Default: text') parser.add_argument('--json_key_text', default="text", help='the text key name in json, Default: text')
@ -488,5 +491,6 @@ if __name__ == "__main__":
server_name="0.0.0.0", server_name="0.0.0.0",
inbrowser=True, inbrowser=True,
quiet=True, quiet=True,
share=eval(args.is_share),
server_port=int(args.webui_port_subfix) server_port=int(args.webui_port_subfix)
) )

View File

@ -19,7 +19,8 @@ for name in os.listdir(weight_uvr5_root):
device=sys.argv[1] device=sys.argv[1]
is_half=sys.argv[2] is_half=sys.argv[2]
webui_port_uvr5=int(sys.argv[3])
is_share=eval(sys.argv[4])
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
infos = [] infos = []
@ -179,6 +180,7 @@ with gr.Blocks(title="RVC WebUI") as app:
app.queue(concurrency_count=511, max_size=1022).launch( app.queue(concurrency_count=511, max_size=1022).launch(
server_name="0.0.0.0", server_name="0.0.0.0",
inbrowser=True, inbrowser=True,
server_port=9873, share=is_share,
server_port=webui_port_uvr5,
quiet=True, quiet=True,
) )

View File

@ -1,7 +1,7 @@
import json,yaml,warnings,torch import json,yaml,warnings,torch
import platform import platform
import psutil import psutil
import os import os,shutil
import signal import signal
from tools import my_utils from tools import my_utils
@ -12,6 +12,12 @@ now_dir = os.getcwd()
tmp = os.path.join(now_dir, "TEMP") tmp = os.path.join(now_dir, "TEMP")
os.makedirs(tmp, exist_ok=True) os.makedirs(tmp, exist_ok=True)
os.environ["TEMP"] = tmp os.environ["TEMP"] = tmp
if(os.path.exists(tmp)):
for name in os.listdir(tmp):
if(name=="jieba.cache"):continue
path="%s/%s"%(tmp,name)
delete=os.remove if os.path.isfile(path) else shutil.rmtree
delete(path)
import site import site
site_packages_roots = [] site_packages_roots = []
for path in site.getsitepackages(): for path in site.getsitepackages():
@ -34,7 +40,7 @@ import pdb
import gradio as gr import gradio as gr
from subprocess import Popen from subprocess import Popen
import signal import signal
from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share
from tools.i18n.i18n import I18nAuto from tools.i18n.i18n import I18nAuto
i18n = I18nAuto() i18n = I18nAuto()
from scipy.io import wavfile from scipy.io import wavfile
@ -120,7 +126,7 @@ def kill_process(pid):
def change_label(if_label,path_list): def change_label(if_label,path_list):
global p_label global p_label
if(if_label==True and p_label==None): if(if_label==True and p_label==None):
cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s'%(python_exec,path_list,webui_port_subfix) cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s'%(python_exec,path_list,webui_port_subfix,is_share)
yield i18n("打标工具WebUI已开启") yield i18n("打标工具WebUI已开启")
print(cmd) print(cmd)
p_label = Popen(cmd, shell=True) p_label = Popen(cmd, shell=True)
@ -132,7 +138,7 @@ def change_label(if_label,path_list):
def change_uvr5(if_uvr5): def change_uvr5(if_uvr5):
global p_uvr5 global p_uvr5
if(if_uvr5==True and p_uvr5==None): if(if_uvr5==True and p_uvr5==None):
cmd = '"%s" tools/uvr5/webui.py "%s" %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5) cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5,is_share)
yield i18n("UVR5已开启") yield i18n("UVR5已开启")
print(cmd) print(cmd)
p_uvr5 = Popen(cmd, shell=True) p_uvr5 = Popen(cmd, shell=True)
@ -151,6 +157,7 @@ def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path
os.environ["_CUDA_VISIBLE_DEVICES"]=gpu_number os.environ["_CUDA_VISIBLE_DEVICES"]=gpu_number
os.environ["is_half"]=str(is_half) os.environ["is_half"]=str(is_half)
os.environ["infer_ttswebui"]=str(webui_port_infer_tts) os.environ["infer_ttswebui"]=str(webui_port_infer_tts)
os.environ["is_share"]=str(is_share)
cmd = '"%s" GPT_SoVITS/inference_webui.py'%(python_exec) cmd = '"%s" GPT_SoVITS/inference_webui.py'%(python_exec)
yield i18n("TTS推理进程已开启") yield i18n("TTS推理进程已开启")
print(cmd) print(cmd)
@ -659,7 +666,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
label=i18n("*训练集音频文件目录"), label=i18n("*训练集音频文件目录"),
# value=r"D:\RVC1006\GPT-SoVITS\raw\xxx", # value=r"D:\RVC1006\GPT-SoVITS\raw\xxx",
interactive=True, interactive=True,
placeholder=i18n("训练集音频文件目录 拼接 list文件里波形对应的文件名") placeholder=i18n("训练集音频文件目录-拼接-list文件里波形对应的文件名不是全路径")
) )
gr.Markdown(value=i18n("1Aa-文本内容")) gr.Markdown(value=i18n("1Aa-文本内容"))
with gr.Row(): with gr.Row():
@ -740,7 +747,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
app.queue(concurrency_count=511, max_size=1022).launch( app.queue(concurrency_count=511, max_size=1022).launch(
server_name="0.0.0.0", server_name="0.0.0.0",
inbrowser=True, inbrowser=True,
share=True, share=is_share,
server_port=webui_port_main, server_port=webui_port_main,
quiet=True, quiet=True,
) )