Make Gradio Great Again
附带vs2017下载链接
增加了各种数据集检查,若缺失会弹出warning
修复了warning乱弹的bug
修复了参考音频混合只能上传一条的bug
修复了下载G2PW Model出现import错误的bug
修复了Windows训练无N卡在预处理阶段的报错
This commit is contained in:
XXXXRT666 2024-08-13 16:31:12 +08:00 committed by GitHub
parent 7cfe578968
commit ea56b814bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
27 changed files with 447 additions and 181 deletions

View File

@ -1,5 +1,5 @@
import os, sys
now_dir = os.getcwd()
sys.path.insert(0, now_dir)
from .text.g2pw import G2PWPinyin
from text.g2pw import G2PWPinyin
g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True)

View File

@ -695,13 +695,14 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.Row():
inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频超过会报错"), type="filepath", scale=13)
with gr.Column(scale=13):
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True,scale=1)
gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=3, max_lines=3)
prompt_language = gr.Dropdown(
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=14
)
inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="file_count",scale=13)
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5,scale=1)
with gr.Column(scale=14):
prompt_language = gr.Dropdown(
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"),
)
inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="multiple")
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
with gr.Row():
with gr.Column(scale=13):

View File

@ -7,7 +7,8 @@ inp_wav_dir = os.environ.get("inp_wav_dir")
exp_name = os.environ.get("exp_name")
i_part = os.environ.get("i_part")
all_parts = os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
opt_dir = os.environ.get("opt_dir")
bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
import torch

View File

@ -6,7 +6,8 @@ inp_wav_dir= os.environ.get("inp_wav_dir")
exp_name= os.environ.get("exp_name")
i_part= os.environ.get("i_part")
all_parts= os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"]= os.environ.get("_CUDA_VISIBLE_DEVICES")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
from feature_extractor import cnhubert
opt_dir= os.environ.get("opt_dir")
cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir")

View File

@ -4,7 +4,8 @@ inp_text = os.environ.get("inp_text")
exp_name = os.environ.get("exp_name")
i_part = os.environ.get("i_part")
all_parts = os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
opt_dir = os.environ.get("opt_dir")
pretrained_s2G = os.environ.get("pretrained_s2G")
s2config_path = os.environ.get("s2config_path")

View File

@ -18,7 +18,7 @@ logging.getLogger("matplotlib").setLevel(logging.ERROR)
MATPLOTLIB_FLAG = False
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
logger = logging
@ -319,13 +319,13 @@ def check_git_hash(model_dir):
def get_logger(model_dir, filename="train.log"):
global logger
logger = logging.getLogger(os.path.basename(model_dir))
logger.setLevel(logging.DEBUG)
logger.setLevel(logging.ERROR)
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
if not os.path.exists(model_dir):
os.makedirs(model_dir)
h = logging.FileHandler(os.path.join(model_dir, filename))
h.setLevel(logging.DEBUG)
h.setLevel(logging.ERROR)
h.setFormatter(formatter)
logger.addHandler(h)
return logger

View File

@ -24,7 +24,7 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
2. **Few-shot TTS:** Fine-tune the model with just 1 minute of training data for improved voice similarity and realism.
3. **Cross-lingual Support:** Inference in languages different from the training dataset, currently supporting English, Japanese, and Chinese.
3. **Cross-lingual Support:** Inference in languages different from the training dataset, currently supporting English, Japanese, Korean, Cantonese and Chinese.
4. **WebUI Tools:** Integrated tools include voice accompaniment separation, automatic training set segmentation, Chinese ASR, and text labeling, assisting beginners in creating training datasets and GPT/SoVITS models.
@ -99,7 +99,7 @@ conda install -c conda-forge 'ffmpeg<7'
Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
Install [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) (Korean TTS Only)
Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only)
##### MacOS Users
```bash

View File

@ -24,7 +24,7 @@
2. **少样本 TTS** 仅需 1 分钟的训练数据即可微调模型,提升声音相似度和真实感。
3. **跨语言支持:** 支持与训练数据集不同语言的推理,目前支持英语、日语和中文。
3. **跨语言支持:** 支持与训练数据集不同语言的推理,目前支持英语、日语、韩语、粤语和中文。
4. **WebUI 工具:** 集成工具包括声音伴奏分离、自动训练集分割、中文自动语音识别(ASR)和文本标注,协助初学者创建训练数据集和 GPT/SoVITS 模型。
@ -99,7 +99,7 @@ conda install -c conda-forge 'ffmpeg<7'
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
安装 [Visual Studio 2022](https://visualstudio.microsoft.com/zh-hans/downloads/) 环境(仅限韩语TTS)
安装 [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) 环境(仅限韩语TTS)
##### MacOS 用户
```bash

View File

@ -24,7 +24,7 @@
2. **数ショット TTS:** わずか 1 分間のトレーニングデータでモデルを微調整し、音声の類似性とリアリズムを向上。
3. **多言語サポート:** 現在、英語、日本語、中国語をサポートしています。
3. **多言語サポート:** 現在、英語、日本語、韓語、粵語、中国語をサポートしています。
4. **WebUI ツール:** 統合されたツールには、音声伴奏の分離、トレーニングセットの自動セグメンテーション、中国語 ASR、テキストラベリングが含まれ、初心者がトレーニングデータセットと GPT/SoVITS モデルを作成するのを支援します。

View File

@ -24,7 +24,7 @@
2. **소량의 데이터 TTS:** 1분의 훈련 데이터만으로 모델을 미세 조정하여 음성 유사도와 실제감을 향상시킬 수 있습니다.
3. **다국어 지원:** 훈련 데이터셋과 다른 언어의 추론을 지원하며, 현재 영어, 일본어, 중국어를 지원합니다.
3. **다국어 지원:** 훈련 데이터셋과 다른 언어의 추론을 지원하며, 현재 영어, 일본어, 중국어, 광둥어, 한국어를 지원합니다.
4. **WebUI 도구:** 음성 반주 분리, 자동 훈련 데이터셋 분할, 중국어 자동 음성 인식(ASR) 및 텍스트 주석 등의 도구를 통합하여 초보자가 훈련 데이터셋과 GPT/SoVITS 모델을 생성하는 데 도움을 줍니다.

View File

@ -24,7 +24,7 @@ Güçlü Birkaç Örnekli Ses Dönüştürme ve Metinden Konuşmaya Web Arayüz
2. **Birkaç Örnekli Metinden Konuşmaya:** Daha iyi ses benzerliği ve gerçekçiliği için modeli yalnızca 1 dakikalık eğitim verisiyle ince ayarlayın.
3. **Çapraz Dil Desteği:** Eğitim veri setinden farklı dillerde çıkarım, şu anda İngilizce, Japonca ve Çinceyi destekliyor.
3. **Çapraz Dil Desteği:** Eğitim veri setinden farklı dillerde çıkarım, şu anda İngilizce, Japonca, Çince, Kantonca ve Koreceyi destekliyor.
4. **Web Arayüzü Araçları:** Entegre araçlar arasında vokal eşliğinde ayırma, otomatik eğitim seti segmentasyonu, Çince ASR ve metin etiketleme bulunur ve yeni başlayanların eğitim veri setleri ve GPT/SoVITS modelleri oluşturmalarına yardımcı olur.

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Pretrained SSL model path",
"预训练的SoVITS-D模型路径": "Pretrained SoVITS-D model path",
"预训练的SoVITS-G模型路径": "Pretrained SoVITS-G model path",
"预训练的中文BERT模型路径": " Pretrained BERT model path"
"预训练的中文BERT模型路径": " Pretrained BERT model path",
"音频加载失败": "Failed to Load Audio",
"以下文件或文件夹不存在": "No Such File or Folder",
"路径不能为空": "Expected No Empty Path",
"请填入正确的List路径": "Please Fill in the Correct List Path",
"请填入正确的音频文件夹路径": "Please Fill in the Correct Audio Folder Path",
"路径错误": "Path Error",
"缺少音素数据集": "Missing Phoneme Dataset",
"缺少Hubert数据集": "Missing Hubert Dataset",
"缺少音频数据集": "Missing Audio Dataset",
"缺少语义数据集": "Missing Semantics Dataset"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Ruta del modelo SSL preentrenado",
"预训练的SoVITS-D模型路径": "Ruta del modelo SoVITS-D preentrenado",
"预训练的SoVITS-G模型路径": "Ruta del modelo SoVITS-G preentrenado",
"预训练的中文BERT模型路径": "Ruta del modelo BERT en chino preentrenado"
}
"预训练的中文BERT模型路径": "Ruta del modelo BERT en chino preentrenado",
"音频加载失败": "Error al Cargar el Audio",
"以下文件或文件夹不存在": "No Existe Tal Archivo o Carpeta",
"路径不能为空": "Se Espera que la Ruta No Esté Vacía",
"请填入正确的List路径": "Por Favor, Introduzca la Ruta Correcta de la Lista",
"请填入正确的音频文件夹路径": "Por Favor, Introduzca la Ruta Correcta de la Carpeta de Audio",
"路径错误": "Error de Ruta",
"缺少音素数据集": "Falta el Conjunto de Datos de Fonemas",
"缺少Hubert数据集": "Falta el Conjunto de Datos de Hubert",
"缺少音频数据集": "Falta el Conjunto de Datos de Audio",
"缺少语义数据集": "Falta el Conjunto de Datos Semánticos"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Chemin du modèle SSL pré-entraîné",
"预训练的SoVITS-D模型路径": "Chemin du modèle SoVITS-D pré-entraîné",
"预训练的SoVITS-G模型路径": "Chemin du modèle SoVITS-G pré-entraîné",
"预训练的中文BERT模型路径": "Chemin du modèle BERT chinois pré-entraîné"
}
"预训练的中文BERT模型路径": "Chemin du modèle BERT chinois pré-entraîné",
"音频加载失败": "Échec du Chargement de l'Audio",
"以下文件或文件夹不存在": "Aucun Fichier ou Dossier de ce Type",
"路径不能为空": "Chemin Non Vide Attendu",
"请填入正确的List路径": "Veuillez Remplir le Chemin Correct de la Liste",
"请填入正确的音频文件夹路径": "Veuillez Remplir le Chemin Correct du Dossier Audio",
"路径错误": "Erreur de Chemin",
"缺少音素数据集": "Jeu de Données de Phonèmes Manquant",
"缺少Hubert数据集": "Jeu de Données Hubert Manquant",
"缺少音频数据集": "Jeu de Données Audio Manquant",
"缺少语义数据集": "Jeu de Données Sémantiques Manquant"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Percorso del modello SSL preaddestrato",
"预训练的SoVITS-D模型路径": "Percorso del modello preaddestrato SoVITS-D",
"预训练的SoVITS-G模型路径": "Percorso del modello preaddestrato SoVITS-G",
"预训练的中文BERT模型路径": "Percorso del modello BERT cinese preaddestrato"
}
"预训练的中文BERT模型路径": "Percorso del modello BERT cinese preaddestrato",
"音频加载失败": "Caricamento Audio Fallito",
"以下文件或文件夹不存在": "Nessun File o Cartella di Questo Tipo",
"路径不能为空": "Percorso Vuoto Non Consentito",
"请填入正确的List路径": "Si Prega di Inserire il Percorso Corretto della Lista",
"请填入正确的音频文件夹路径": "Si Prega di Inserire il Percorso Corretto della Cartella Audio",
"路径错误": "Errore di Percorso",
"缺少音素数据集": "Dataset di Fonemi Mancante",
"缺少Hubert数据集": "Dataset di Hubert Mancante",
"缺少音频数据集": "Dataset Audio Mancante",
"缺少语义数据集": "Dataset Semantico Mancante"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "事前にトレーニングされたSSLモデルのパス",
"预训练的SoVITS-D模型路径": "事前にトレーニングされたSoVITS-Dモデルのパス",
"预训练的SoVITS-G模型路径": "事前にトレーニングされたSoVITS-Gモデルのパス",
"预训练的中文BERT模型路径": "事前にトレーニングされた中文BERTモデルのパス"
}
"预训练的中文BERT模型路径": "事前にトレーニングされた中文BERTモデルのパス",
"音频加载失败": "音声の読み込みに失敗しました",
"以下文件或文件夹不存在": "そのようなファイルまたはフォルダは存在しません",
"路径不能为空": "空のパスは予期されていません",
"请填入正确的List路径": "正しいリストパスを入力してください",
"请填入正确的音频文件夹路径": "正しいオーディオフォルダパスを入力してください",
"路径错误": "パスエラー",
"缺少音素数据集": "音素データセットが欠落しています",
"缺少Hubert数据集": "Hubertデータセットが欠落しています",
"缺少音频数据集": "オーディオデータセットが欠落しています",
"缺少语义数据集": "セマンティクスデータセットが欠落しています"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "사전 훈련된 SSL 모델 경로",
"预训练的SoVITS-D模型路径": "사전 훈련된 SoVITS-D 모델 경로",
"预训练的SoVITS-G模型路径": "사전 훈련된 SoVITS-G 모델 경로",
"预训练的中文BERT模型路径": "사전 훈련된 중국어 BERT 모델 경로"
}
"预训练的中文BERT模型路径": "사전 훈련된 중국어 BERT 모델 경로",
"音频加载失败": "오디오 로드 실패",
"以下文件或文件夹不存在": "해당 파일이나 폴더가 없습니다",
"路径不能为空": "경로가 비어 있을 수 없습니다",
"请填入正确的List路径": "올바른 리스트 경로를 입력하세요",
"请填入正确的音频文件夹路径": "올바른 오디오 폴더 경로를 입력하세요",
"路径错误": "경로 오류",
"缺少音素数据集": "음소 데이터셋이 없습니다",
"缺少Hubert数据集": "Hubert 데이터셋이 없습니다",
"缺少音频数据集": "오디오 데이터셋이 없습니다",
"缺少语义数据集": "의미론 데이터셋이 없습니다"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Caminho do modelo SSL pre-train",
"预训练的SoVITS-D模型路径": "Caminho do modelo SoVITS-D pre-train",
"预训练的SoVITS-G模型路径": "Caminho do modelo SoVITS-G pre-train",
"预训练的中文BERT模型路径": "Caminho do modelo BERT chinês pre-train"
}
"预训练的中文BERT模型路径": "Caminho do modelo BERT chinês pre-train",
"音频加载失败": "Falha ao Carregar o Áudio",
"以下文件或文件夹不存在": "Nenhum Arquivo ou Pasta Encontrado",
"路径不能为空": "Esperado Caminho Não Vazio",
"请填入正确的List路径": "Por Favor, Insira o Caminho Correto da Lista",
"请填入正确的音频文件夹路径": "Por Favor, Insira o Caminho Correto da Pasta de Áudio",
"路径错误": "Erro de Caminho",
"缺少音素数据集": "Conjunto de Dados de Fonemas Ausente",
"缺少Hubert数据集": "Conjunto de Dados Hubert Ausente",
"缺少音频数据集": "Conjunto de Dados de Áudio Ausente",
"缺少语义数据集": "Conjunto de Dados Semânticos Ausente"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Путь к предварительно обученной модели SSL",
"预训练的SoVITS-D模型路径": "Путь к предварительно обученной модели SoVITS-D",
"预训练的SoVITS-G模型路径": "Путь к предварительно обученной модели SoVITS-G",
"预训练的中文BERT模型路径": "Путь к предварительно обученной китайской модели BERT"
}
"预训练的中文BERT模型路径": "Путь к предварительно обученной китайской модели BERT",
"音频加载失败": "Не удалось загрузить аудио",
"以下文件或文件夹不存在": "Такого файла или папки не существует",
"路径不能为空": "Ожидается, что путь не будет пустым",
"请填入正确的List路径": "Пожалуйста, укажите правильный путь к списку",
"请填入正确的音频文件夹路径": "Пожалуйста, укажите правильный путь к папке с аудио",
"路径错误": "Ошибка пути",
"缺少音素数据集": "Отсутствует набор данных фонем",
"缺少Hubert数据集": "Отсутствует набор данных Hubert",
"缺少音频数据集": "Отсутствует набор данных аудио",
"缺少语义数据集": "Отсутствует семантический набор данных"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Ön eğitilmiş SSL model yolu",
"预训练的SoVITS-D模型路径": "Ön eğitilmiş SoVITS-D model yolu",
"预训练的SoVITS-G模型路径": "Ön eğitilmiş SoVITS-G model yolu",
"预训练的中文BERT模型路径": "Ön eğitilmiş Çince BERT model yolu"
}
"预训练的中文BERT模型路径": "Ön eğitilmiş Çince BERT model yolu",
"音频加载失败": "Ses Yüklenemedi",
"以下文件或文件夹不存在": "Böyle Bir Dosya veya Klasör Yok",
"路径不能为空": "Boş Yol Beklenmiyor",
"请填入正确的List路径": "Lütfen Doğru Liste Yolunu Girin",
"请填入正确的音频文件夹路径": "Lütfen Doğru Ses Klasörü Yolunu Girin",
"路径错误": "Yol Hatası",
"缺少音素数据集": "Fonem Veri Seti Eksik",
"缺少Hubert数据集": "Hubert Veri Seti Eksik",
"缺少音频数据集": "Ses Veri Seti Eksik",
"缺少语义数据集": "Anlamsal Veri Seti Eksik"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "预训练的SSL模型路径",
"预训练的SoVITS-D模型路径": "预训练的SoVITS-D模型路径",
"预训练的SoVITS-G模型路径": "预训练的SoVITS-G模型路径",
"预训练的中文BERT模型路径": "预训练的中文BERT模型路径"
"预训练的中文BERT模型路径": "预训练的中文BERT模型路径",
"音频加载失败": "音频加载失败",
"以下文件或文件夹不存在": "以下文件或文件夹不存在",
"路径不能为空": "路径不能为空",
"请填入正确的List路径": "请填入正确的List路径",
"请填入正确的音频文件夹路径": "请填入正确的音频文件夹路径",
"路径错误": "路径错误",
"缺少音素数据集": "缺少音素数据集",
"缺少Hubert数据集": "缺少Hubert数据集",
"缺少音频数据集": "缺少音频数据集",
"缺少语义数据集": "缺少语义数据集"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "預訓練的SSL模型路徑",
"预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑",
"预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑",
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑"
}
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑",
"音频加载失败": "無法加載音頻",
"以下文件或文件夹不存在": "沒有此文件或文件夾",
"路径不能为空": "路徑不應該為空",
"请填入正确的List路径": "請填寫正確的列表路徑",
"请填入正确的音频文件夹路径": "請填寫正確的音頻文件夾路徑",
"路径错误": "路徑錯誤",
"缺少音素数据集": "缺少音素數據集",
"缺少Hubert数据集": "缺少Hubert數據集",
"缺少音频数据集": "缺少音頻數據集",
"缺少语义数据集": "缺少語義數據集"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "預訓練的SSL模型路徑",
"预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑",
"预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑",
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑"
}
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑",
"音频加载失败": "無法加載音頻",
"以下文件或文件夹不存在": "沒有此文件或文件夾",
"路径不能为空": "路徑不應該為空",
"请填入正确的List路径": "請填寫正確的列表路徑",
"请填入正确的音频文件夹路径": "請填寫正確的音頻文件夾路徑",
"路径错误": "路徑錯誤",
"缺少音素数据集": "缺少音素數據集",
"缺少Hubert数据集": "缺少Hubert數據集",
"缺少音频数据集": "缺少音頻數據集",
"缺少语义数据集": "缺少語義數據集"
}

View File

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "預訓練的SSL模型路徑",
"预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑",
"预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑",
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑"
}
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑",
"音频加载失败": "無法加載音頻",
"以下文件或文件夹不存在": "沒有此文件或文件夾",
"路径不能为空": "路徑不應該為空",
"请填入正确的List路径": "請填寫正確的列表路徑",
"请填入正确的音频文件夹路径": "請填寫正確的音頻文件夾路徑",
"路径错误": "路徑錯誤",
"缺少音素数据集": "缺少音素數據集",
"缺少Hubert数据集": "缺少Hubert數據集",
"缺少音频数据集": "缺少音頻數據集",
"缺少语义数据集": "缺少語義數據集"
}

View File

@ -1,7 +1,10 @@
import platform,os,traceback
import ffmpeg
import numpy as np
import gradio as gr
from tools.i18n.i18n import I18nAuto
import pandas as pd
i18n = I18nAuto(language=os.environ.get('language','Auto'))
def load_audio(file, sr):
try:
@ -20,7 +23,7 @@ def load_audio(file, sr):
)
except Exception as e:
traceback.print_exc()
raise RuntimeError(f"Failed to load audio: {e}")
raise RuntimeError(i18n("音频加载失败"))
return np.frombuffer(out, np.float32).flatten()
@ -30,3 +33,83 @@ def clean_path(path_str:str):
return clean_path(path_str[0:-1])
path_str = path_str.replace('/', os.sep).replace('\\', os.sep)
return path_str.strip(" ").strip('\'').strip("\n").strip('"').strip(" ").strip("\u202a")
def check_for_existance(file_list:list=None,is_train=False,is_dataset_processing=False):
files_status=[]
if is_train == True and file_list:
file_list.append(os.path.join(file_list[0],'2-name2text.txt'))
file_list.append(os.path.join(file_list[0],'3-bert'))
file_list.append(os.path.join(file_list[0],'4-cnhubert'))
file_list.append(os.path.join(file_list[0],'5-wav32k'))
file_list.append(os.path.join(file_list[0],'6-name2semantic.tsv'))
for file in file_list:
if os.path.exists(file):files_status.append(True)
else:files_status.append(False)
if sum(files_status)!=len(files_status):
if is_train:
for file,status in zip(file_list,files_status):
if status:pass
else:gr.Warning(file)
gr.Warning(i18n('以下文件或文件夹不存在'))
return False
elif is_dataset_processing:
if files_status[0]:
return True
elif not files_status[0]:
gr.Warning(file_list[0])
elif not files_status[1] and file_list[1]:
gr.Warning(file_list[1])
gr.Warning(i18n('以下文件或文件夹不存在'))
return False
else:
if file_list[0]:
gr.Warning(file_list[0])
gr.Warning(i18n('以下文件或文件夹不存在'))
else:
gr.Warning(i18n('路径不能为空'))
return False
return True
def check_details(path_list=None,is_train=False,is_dataset_processing=False):
if is_dataset_processing:
list_path, audio_path = path_list
if (not list_path.endswith('.list')):
gr.Warning(i18n('请填入正确的List路径'))
return
if audio_path:
if not os.path.isdir(audio_path):
gr.Warning(i18n('请填入正确的音频文件夹路径'))
return
with open(list_path,"r",encoding="utf8")as f:
line=f.readline().strip("\n").split("\n")
wav_name, _, __, ___ = line[0].split("|")
wav_name=clean_path(wav_name)
if (audio_path != "" and audio_path != None):
wav_name = os.path.basename(wav_name)
wav_path = "%s/%s"%(audio_path, wav_name)
else:
wav_path=wav_name
if os.path.exists(wav_path):
...
else:
gr.Warning(i18n('路径错误'))
return
if is_train:
path_list.append(os.path.join(path_list[0],'2-name2text.txt'))
path_list.append(os.path.join(path_list[0],'4-cnhubert'))
path_list.append(os.path.join(path_list[0],'5-wav32k'))
path_list.append(os.path.join(path_list[0],'6-name2semantic.tsv'))
phone_path, hubert_path, wav_path, semantic_path = path_list[1:]
with open(phone_path,'r',encoding='utf-8') as f:
if f.read(1):...
else:gr.Warning(i18n('缺少音素数据集'))
if os.listdir(hubert_path):...
else:gr.Warning(i18n('缺少Hubert数据集'))
if os.listdir(wav_path):...
else:gr.Warning(i18n('缺少音频数据集'))
df = pd.read_csv(
semantic_path, delimiter="\t", encoding="utf-8"
)
if len(df) >= 1:...
else:gr.Warning(i18n('缺少语义数据集'))

View File

@ -25,6 +25,16 @@ is_half=eval(sys.argv[2])
webui_port_uvr5=int(sys.argv[3])
is_share=eval(sys.argv[4])
def html_left(text, label='p'):
return f"""<div style="text-align: left; margin: 0; padding: 0;">
<{label} style="margin: 0; padding: 0;">{text}</{label}>
</div>"""
def html_center(text, label='p'):
return f"""<div style="text-align: center; margin: 100; padding: 50;">
<{label} style="margin: 0; padding: 0;">{text}</{label}>
</div>"""
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
infos = []
try:
@ -116,11 +126,11 @@ with gr.Blocks(title="UVR5 WebUI") as app:
value=
i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
)
with gr.Tabs():
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
with gr.Group():
with gr.Group():
gr.Markdown(html_center(i18n("伴奏人声分离&去混响&去回声"),'h2'))
with gr.Group():
gr.Markdown(
value=i18n("人声伴奏分离批量处理, 使用UVR5模型。") + "<br>" + \
value=html_left(i18n("人声伴奏分离批量处理, 使用UVR5模型。") + "<br>" + \
i18n("合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。")+ "<br>" + \
i18n("模型分为三类:") + "<br>" + \
i18n("1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点") + "<br>" + \
@ -131,10 +141,11 @@ with gr.Blocks(title="UVR5 WebUI") as app:
i18n("去混响/去延迟,附:") + "<br>" + \
i18n("1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍") + "<br>" + \
i18n("2、MDX-Net-Dereverb模型挺慢的") + "<br>" + \
i18n("3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。")
i18n("3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。"),'h4')
)
with gr.Row():
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
dir_wav_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径"),
placeholder="C:\\Users\\Desktop\\todo-songs",
@ -143,7 +154,6 @@ with gr.Blocks(title="UVR5 WebUI") as app:
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
)
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
agg = gr.Slider(
minimum=0,
maximum=20,
@ -165,8 +175,11 @@ with gr.Blocks(title="UVR5 WebUI") as app:
value="flac",
interactive=True,
)
but2 = gr.Button(i18n("转换"), variant="primary")
vc_output4 = gr.Textbox(label=i18n("输出信息"))
with gr.Column():
with gr.Row():
but2 = gr.Button(i18n("转换"), variant="primary")
with gr.Row():
vc_output4 = gr.Textbox(label=i18n("输出信息"),lines=3)
but2.click(
uvr,
[

290
webui.py
View File

@ -25,6 +25,7 @@ if(os.path.exists(tmp)):
print(str(e))
pass
import site
import traceback
site_packages_roots = []
for path in site.getsitepackages():
if "packages" in path:
@ -38,14 +39,13 @@ for site_packages_root in site_packages_roots:
try:
with open("%s/users.pth" % (site_packages_root), "w") as f:
f.write(
"%s\n%s/tools\n%s/tools/damo_asr\n%s/GPT_SoVITS\n%s/tools/uvr5"
"%s\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5"
% (now_dir, now_dir, now_dir, now_dir, now_dir)
)
break
except PermissionError:
pass
except PermissionError as e:
traceback.print_exc()
from tools import my_utils
import traceback
import shutil
import pdb
from subprocess import Popen
@ -56,11 +56,13 @@ language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
os.environ["language"]=language
i18n = I18nAuto(language=language)
from scipy.io import wavfile
from tools.my_utils import load_audio
from tools.my_utils import load_audio, check_for_existance, check_details
from multiprocessing import cpu_count
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
import gradio.analytics as analytics
analytics.version_check = lambda:None
try:
import gradio.analytics as analytics
analytics.version_check = lambda:None
except:...
import gradio as gr
n_cpu=cpu_count()
@ -195,34 +197,35 @@ def kill_process(pid):
kill_proc_tree(pid)
def change_label(if_label,path_list):
def change_label(path_list):
global p_label
if(if_label==True and p_label==None):
if(p_label==None):
check_for_existance([path_list])
path_list=my_utils.clean_path(path_list)
cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s'%(python_exec,path_list,webui_port_subfix,is_share)
yield i18n("打标工具WebUI已开启")
yield i18n("打标工具WebUI已开启"), {'__type__':'update','visible':False}, {'__type__':'update','visible':True}
print(cmd)
p_label = Popen(cmd, shell=True)
elif(if_label==False and p_label!=None):
elif(p_label!=None):
kill_process(p_label.pid)
p_label=None
yield i18n("打标工具WebUI已关闭")
yield i18n("打标工具WebUI已关闭"), {'__type__':'update','visible':True}, {'__type__':'update','visible':False}
def change_uvr5(if_uvr5):
def change_uvr5():
global p_uvr5
if(if_uvr5==True and p_uvr5==None):
if(p_uvr5==None):
cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5,is_share)
yield i18n("UVR5已开启")
yield i18n("UVR5已开启"), {'__type__':'update','visible':False}, {'__type__':'update','visible':True}
print(cmd)
p_uvr5 = Popen(cmd, shell=True)
elif(if_uvr5==False and p_uvr5!=None):
elif(p_uvr5!=None):
kill_process(p_uvr5.pid)
p_uvr5=None
yield i18n("UVR5已关闭")
yield i18n("UVR5已关闭"), {'__type__':'update','visible':True}, {'__type__':'update','visible':False}
def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits_path):
def change_tts_inference(bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits_path):
global p_tts_inference
if(if_tts==True and p_tts_inference==None):
if(p_tts_inference==None):
os.environ["gpt_path"]=gpt_path if "/" in gpt_path else "%s/%s"%(GPT_weight_root,gpt_path)
os.environ["sovits_path"]=sovits_path if "/"in sovits_path else "%s/%s"%(SoVITS_weight_root,sovits_path)
os.environ["cnhubert_base_path"]=cnhubert_base_path
@ -232,13 +235,13 @@ def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path
os.environ["infer_ttswebui"]=str(webui_port_infer_tts)
os.environ["is_share"]=str(is_share)
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language)
yield i18n("TTS推理进程已开启")
yield i18n("TTS推理进程已开启"), {'__type__':'update','visible':False}, {'__type__':'update','visible':True}
print(cmd)
p_tts_inference = Popen(cmd, shell=True)
elif(if_tts==False and p_tts_inference!=None):
elif(p_tts_inference!=None):
kill_process(p_tts_inference.pid)
p_tts_inference=None
yield i18n("TTS推理进程已关闭")
yield i18n("TTS推理进程已关闭"), {'__type__':'update','visible':True}, {'__type__':'update','visible':False}
from tools.asr.config import asr_dict
def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision):
@ -246,7 +249,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_
if(p_asr==None):
asr_inp_dir=my_utils.clean_path(asr_inp_dir)
asr_opt_dir=my_utils.clean_path(asr_opt_dir)
check_for_exists([asr_inp_dir])
check_for_existance([asr_inp_dir])
cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}'
cmd += f' -i "{asr_inp_dir}"'
cmd += f' -o "{asr_opt_dir}"'
@ -277,7 +280,7 @@ def open_denoise(denoise_inp_dir, denoise_opt_dir):
if(p_denoise==None):
denoise_inp_dir=my_utils.clean_path(denoise_inp_dir)
denoise_opt_dir=my_utils.clean_path(denoise_opt_dir)
check_for_exists([denoise_inp_dir])
check_for_existance([denoise_inp_dir])
cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s'%(python_exec,denoise_inp_dir,denoise_opt_dir,"float16"if is_half==True else "float32")
yield "语音降噪任务开启:%s"%cmd, {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}, {"__type__":"update"}
@ -306,7 +309,8 @@ def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_s
data=json.loads(data)
s2_dir="%s/%s"%(exp_root,exp_name)
os.makedirs("%s/logs_s2"%(s2_dir),exist_ok=True)
check_for_exists([s2_dir],is_train=True)
if check_for_existance([s2_dir],is_train=True):
check_details([s2_dir],is_train=True)
if(is_half==False):
data["train"]["fp16_run"]=False
batch_size=max(1,batch_size//2)
@ -353,7 +357,8 @@ def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_
data=yaml.load(data, Loader=yaml.FullLoader)
s1_dir="%s/%s"%(exp_root,exp_name)
os.makedirs("%s/logs_s1"%(s1_dir),exist_ok=True)
check_for_exists([s1_dir],is_train=True)
if check_for_existance([s1_dir],is_train=True):
check_details([s1_dir],is_train=True)
if(is_half==False):
data["train"]["precision"]="32"
batch_size = max(1, batch_size // 2)
@ -398,7 +403,7 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k
global ps_slice
inp = my_utils.clean_path(inp)
opt_root = my_utils.clean_path(opt_root)
check_for_exists([inp])
check_for_existance([inp])
if(os.path.exists(inp)==False):
yield "输入路径不存在", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
return
@ -437,7 +442,8 @@ def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir):
global ps1a
inp_text = my_utils.clean_path(inp_text)
inp_wav_dir = my_utils.clean_path(inp_wav_dir)
check_for_exists([inp_text,inp_wav_dir], is_dataset_processing=True)
if check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True):
check_details([inp_text,inp_wav_dir], is_dataset_processing=True)
if (ps1a == []):
opt_dir="%s/%s"%(exp_root,exp_name)
config={
@ -499,7 +505,8 @@ def open1b(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir):
global ps1b
inp_text = my_utils.clean_path(inp_text)
inp_wav_dir = my_utils.clean_path(inp_wav_dir)
check_for_exists([inp_text,inp_wav_dir], is_dataset_processing=True)
if check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True):
check_details([inp_text,inp_wav_dir], is_dataset_processing=True)
if (ps1b == []):
config={
"inp_text":inp_text,
@ -547,7 +554,8 @@ ps1c=[]
def open1c(inp_text,exp_name,gpu_numbers,pretrained_s2G_path):
global ps1c
inp_text = my_utils.clean_path(inp_text)
check_for_exists([inp_text,''], is_dataset_processing=True)
if check_for_existance([inp_text,''], is_dataset_processing=True):
check_details([inp_text,''], is_dataset_processing=True)
if (ps1c == []):
opt_dir="%s/%s"%(exp_root,exp_name)
config={
@ -606,7 +614,8 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
global ps1abc
inp_text = my_utils.clean_path(inp_text)
inp_wav_dir = my_utils.clean_path(inp_wav_dir)
check_for_exists([inp_text,inp_wav_dir])
if check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True):
check_details([inp_text,inp_wav_dir], is_dataset_processing=True)
if (ps1abc == []):
opt_dir="%s/%s"%(exp_root,exp_name)
try:
@ -743,38 +752,14 @@ def switch_version(version_):
gr.Warning(i18n(f'未下载{version.upper()}模型'))
return {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D")}, {'__type__':'update', 'value':pretrained_gpt_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_gpt_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2]}
def check_for_exists(file_list=None,is_train=False,is_dataset_processing=False):
missing_files=[]
if is_train == True and file_list:
file_list.append(os.path.join(file_list[0],'2-name2text.txt'))
file_list.append(os.path.join(file_list[0],'3-bert'))
file_list.append(os.path.join(file_list[0],'4-cnhubert'))
file_list.append(os.path.join(file_list[0],'5-wav32k'))
file_list.append(os.path.join(file_list[0],'6-name2semantic.tsv'))
for file in file_list:
if os.path.exists(file):pass
else:missing_files.append(file)
if missing_files:
if is_train:
for missing_file in missing_files:
if missing_file != '':
gr.Warning(missing_file)
gr.Warning(i18n('以下文件或文件夹不存在:'))
else:
for missing_file in missing_files:
if missing_file != '':
gr.Warning(missing_file)
if file_list[-1]==[''] and is_dataset_processing:
pass
else:
gr.Warning(i18n('以下文件或文件夹不存在:'))
if os.path.exists('GPT_SoVITS/text/G2PWModel'):...
else:
cmd = '"%s" GPT_SoVITS/download.py'%python_exec
p = Popen(cmd, shell=True)
p.wait()
def sync(text):
return {'__type__':'update','value':text}
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(
value=
@ -789,8 +774,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.TabItem(i18n("0-前置数据集获取工具")):#提前随机切片防止uvr5爆内存->uvr5->slicer->asr->打标
gr.Markdown(value=i18n("0a-UVR5人声伴奏分离&去混响去延迟工具"))
with gr.Row():
if_uvr5 = gr.Checkbox(label=i18n("是否开启UVR5-WebUI"),show_label=True)
uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息"))
with gr.Column(scale=3):
with gr.Row():
uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息"))
open_uvr5 = gr.Button(value=i18n("开启UVR5-WebUI"),variant="primary",visible=True)
close_uvr5 = gr.Button(value=i18n("关闭UVR5-WebUI"),variant="primary",visible=False)
gr.Markdown(value=i18n("0b-语音切分工具"))
with gr.Row():
with gr.Column(scale=3):
@ -805,9 +793,9 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500")
with gr.Row():
_max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True)
alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True)
n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True)
alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True)
with gr.Row():
n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True)
slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息"))
open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True)
close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False)
@ -890,60 +878,86 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(value=i18n("0d-语音文本校对标注工具"))
with gr.Row():
if_label = gr.Checkbox(label=i18n("是否开启打标WebUI"),show_label=True)
path_list = gr.Textbox(
label=i18n(".list标注文件的路径"),
value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list",
interactive=True,
)
label_info = gr.Textbox(label=i18n("打标工具进程输出信息"))
if_label.change(change_label, [if_label,path_list], [label_info])
if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info])
with gr.Column(scale=3):
with gr.Row():
path_list = gr.Textbox(
label=i18n(".list标注文件的路径"),
value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list",
interactive=True,
)
label_info = gr.Textbox(label=i18n("打标工具进程输出信息"))
open_label = gr.Button(value=i18n("开启打标WebUI"),variant="primary",visible=True)
close_label = gr.Button(value=i18n("关闭打标WebUI"),variant="primary",visible=False)
open_label.click(change_label, [path_list], [label_info,open_label,close_label])
close_label.click(change_label, [path_list], [label_info,open_label,close_label])
open_uvr5.click(change_uvr5, [], [uvr5_info,open_uvr5,close_uvr5])
close_uvr5.click(change_uvr5, [], [uvr5_info,open_uvr5,close_uvr5])
with gr.TabItem(i18n("1-GPT-SoVITS-TTS")):
with gr.Row():
exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True)
gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False)
version_checkbox = gr.Radio(label=i18n("版本"),value=version,choices=['v1','v2'])
pretrained_s2G = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value=pretrained_sovits_name[-int(version[-1])+2], interactive=True)
pretrained_s2D = gr.Textbox(label=i18n("预训练的SoVITS-D模型路径"), value=pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D"), interactive=True)
pretrained_s1 = gr.Textbox(label=i18n("预训练的GPT模型路径"), value=pretrained_gpt_name[-int(version[-1])+2], interactive=True)
with gr.Row():
exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True)
gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False)
version_checkbox = gr.Radio(label=i18n("版本"),value=version,choices=['v1','v2'])
with gr.Row():
pretrained_s2G = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value=pretrained_sovits_name[-int(version[-1])+2], interactive=True, lines=2, max_lines=3,scale=9)
pretrained_s2D = gr.Textbox(label=i18n("预训练的SoVITS-D模型路径"), value=pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D"), interactive=True, lines=2, max_lines=3,scale=9)
pretrained_s1 = gr.Textbox(label=i18n("预训练的GPT模型路径"), value=pretrained_gpt_name[-int(version[-1])+2], interactive=True, lines=2, max_lines=3,scale=10)
with gr.TabItem(i18n("1A-训练集格式化工具")):
gr.Markdown(value=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹"))
with gr.Row():
inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True)
inp_wav_dir = gr.Textbox(
label=i18n("*训练集音频文件目录"),
# value=r"D:\RVC1006\GPT-SoVITS\raw\xxx",
interactive=True,
placeholder=i18n("填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名不是全路径。如果留空则使用.list文件里的绝对全路径。")
)
with gr.Row():
inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True,scale=10)
with gr.Row():
inp_wav_dir = gr.Textbox(
label=i18n("*训练集音频文件目录"),
# value=r"D:\RVC1006\GPT-SoVITS\raw\xxx",
interactive=True,
placeholder=i18n("填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名不是全路径。如果留空则使用.list文件里的绝对全路径。"), scale=10
)
gr.Markdown(value=i18n("1Aa-文本内容"))
with gr.Row():
gpu_numbers1a = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
bert_pretrained_dir = gr.Textbox(label=i18n("预训练的中文BERT模型路径"),value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",interactive=False)
button1a_open = gr.Button(i18n("开启文本获取"), variant="primary",visible=True)
button1a_close = gr.Button(i18n("终止文本获取进程"), variant="primary",visible=False)
info1a=gr.Textbox(label=i18n("文本进程输出信息"))
with gr.Row():
gpu_numbers1a = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
with gr.Row():
bert_pretrained_dir = gr.Textbox(label=i18n("预训练的中文BERT模型路径"),value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",interactive=False,lines=2)
with gr.Row():
button1a_open = gr.Button(i18n("开启文本获取"), variant="primary",visible=True)
button1a_close = gr.Button(i18n("终止文本获取进程"), variant="primary",visible=False)
with gr.Row():
info1a=gr.Textbox(label=i18n("文本进程输出信息"))
gr.Markdown(value=i18n("1Ab-SSL自监督特征提取"))
with gr.Row():
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
cnhubert_base_dir = gr.Textbox(label=i18n("预训练的SSL模型路径"),value="GPT_SoVITS/pretrained_models/chinese-hubert-base",interactive=False)
button1b_open = gr.Button(i18n("开启SSL提取"), variant="primary",visible=True)
button1b_close = gr.Button(i18n("终止SSL提取进程"), variant="primary",visible=False)
info1b=gr.Textbox(label=i18n("SSL进程输出信息"))
with gr.Row():
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
with gr.Row():
cnhubert_base_dir = gr.Textbox(label=i18n("预训练的SSL模型路径"),value="GPT_SoVITS/pretrained_models/chinese-hubert-base",interactive=False,lines=2)
with gr.Row():
button1b_open = gr.Button(i18n("开启SSL提取"), variant="primary",visible=True)
button1b_close = gr.Button(i18n("终止SSL提取进程"), variant="primary",visible=False)
with gr.Row():
info1b=gr.Textbox(label=i18n("SSL进程输出信息"))
gr.Markdown(value=i18n("1Ac-语义token提取"))
with gr.Row():
gpu_numbers1c = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
button1c_open = gr.Button(i18n("开启语义token提取"), variant="primary",visible=True)
button1c_close = gr.Button(i18n("终止语义token提取进程"), variant="primary",visible=False)
info1c=gr.Textbox(label=i18n("语义token提取进程输出信息"))
with gr.Row():
gpu_numbers1c = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
with gr.Row():
pretrained_s2G_ = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value=pretrained_sovits_name[-int(version[-1])+2], interactive=False,lines=2)
with gr.Row():
button1c_open = gr.Button(i18n("开启语义token提取"), variant="primary",visible=True)
button1c_close = gr.Button(i18n("终止语义token提取进程"), variant="primary",visible=False)
with gr.Row():
info1c=gr.Textbox(label=i18n("语义token提取进程输出信息"))
gr.Markdown(value=i18n("1Aabc-训练集格式化一键三连"))
with gr.Row():
button1abc_open = gr.Button(i18n("开启一键三连"), variant="primary",visible=True)
button1abc_close = gr.Button(i18n("终止一键三连"), variant="primary",visible=False)
info1abc=gr.Textbox(label=i18n("一键三连进程输出信息"))
with gr.Row():
button1abc_open = gr.Button(i18n("开启一键三连"), variant="primary",visible=True)
button1abc_close = gr.Button(i18n("终止一键三连"), variant="primary",visible=False)
with gr.Row():
info1abc=gr.Textbox(label=i18n("一键三连进程输出信息"))
pretrained_s2G.change(sync,[pretrained_s2G],[pretrained_s2G_])
open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], [asr_info,open_asr_button,close_asr_button,path_list,inp_text,inp_wav_dir])
close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button])
open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button,asr_inp_dir,denoise_input_dir,inp_wav_dir])
@ -962,30 +976,46 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.TabItem(i18n("1B-微调训练")):
gr.Markdown(value=i18n("1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。"))
with gr.Row():
batch_size = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("总训练轮数total_epoch不建议太高"),value=8,interactive=True)
text_low_lr_rate = gr.Slider(minimum=0.2,maximum=0.6,step=0.05,label=i18n("文本模块学习率权重"),value=0.4,interactive=True)
save_every_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("保存频率save_every_epoch"),value=4,interactive=True)
if_save_latest = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Column():
with gr.Row():
batch_size = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("总训练轮数total_epoch不建议太高"),value=8,interactive=True)
with gr.Row():
text_low_lr_rate = gr.Slider(minimum=0.2,maximum=0.6,step=0.05,label=i18n("文本模块学习率权重"),value=0.4,interactive=True)
save_every_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("保存频率save_every_epoch"),value=4,interactive=True)
with gr.Column():
with gr.Column():
if_save_latest = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
with gr.Row():
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Row():
button1Ba_open = gr.Button(i18n("开启SoVITS训练"), variant="primary",visible=True)
button1Ba_close = gr.Button(i18n("终止SoVITS训练"), variant="primary",visible=False)
info1Ba=gr.Textbox(label=i18n("SoVITS训练进程输出信息"))
with gr.Row():
button1Ba_open = gr.Button(i18n("开启SoVITS训练"), variant="primary",visible=True)
button1Ba_close = gr.Button(i18n("终止SoVITS训练"), variant="primary",visible=False)
with gr.Row():
info1Ba=gr.Textbox(label=i18n("SoVITS训练进程输出信息"))
gr.Markdown(value=i18n("1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。"))
with gr.Row():
batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True)
if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True)
if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True)
gpu_numbers1Bb = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Column():
with gr.Row():
batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True)
with gr.Row():
save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True)
if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True)
with gr.Column():
with gr.Column():
if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
with gr.Row():
gpu_numbers1Bb = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Row():
button1Bb_open = gr.Button(i18n("开启GPT训练"), variant="primary",visible=True)
button1Bb_close = gr.Button(i18n("终止GPT训练"), variant="primary",visible=False)
info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息"))
with gr.Row():
button1Bb_open = gr.Button(i18n("开启GPT训练"), variant="primary",visible=True)
button1Bb_close = gr.Button(i18n("终止GPT训练"), variant="primary",visible=False)
with gr.Row():
info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息"))
button1Ba_open.click(open1Ba, [batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D], [info1Ba,button1Ba_open,button1Ba_close])
button1Ba_close.click(close1Ba, [], [info1Ba,button1Ba_open,button1Ba_close])
button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_dpo,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close])
@ -993,15 +1023,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.TabItem(i18n("1C-推理")):
gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模体验5秒Zero Shot TTS用。"))
with gr.Row():
GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name[0],interactive=True)
SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name[0],interactive=True)
gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True)
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
with gr.Row():
GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name[0],interactive=True)
SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name[0],interactive=True)
with gr.Row():
gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True)
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown])
with gr.Row():
if_tts = gr.Checkbox(label=i18n("是否开启TTS推理WebUI"), show_label=True)
tts_info = gr.Textbox(label=i18n("TTS推理WebUI进程输出信息"))
if_tts.change(change_tts_inference, [if_tts,bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info])
with gr.Row():
open_tts = gr.Button(value=i18n("开启TTS推理WebUI"),variant='primary',visible=True)
close_tts = gr.Button(value=i18n("关闭TTS推理WebUI"),variant='primary',visible=False)
with gr.Row():
tts_info = gr.Textbox(label=i18n("TTS推理WebUI进程输出信息"))
open_tts.click(change_tts_inference, [bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info,open_tts,close_tts])
close_tts.click(change_tts_inference, [bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info,open_tts,close_tts])
version_checkbox.change(switch_version,[version_checkbox],[pretrained_s2G,pretrained_s2D,pretrained_s1,GPT_dropdown,SoVITS_dropdown])
with gr.TabItem(i18n("2-GPT-SoVITS-变声")):gr.Markdown(value=i18n("施工中,请静候佳音"))
app.queue(concurrency_count=511, max_size=1022).launch(