Merge branch 'RVC-Boss:main' into main

This commit is contained in:
Ming 2024-08-01 22:02:37 +08:00 committed by GitHub
commit 7c4f7d3fcc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 150 additions and 88 deletions

View File

@ -10,13 +10,13 @@ all_parts = os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES") os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES")
opt_dir = os.environ.get("opt_dir") opt_dir = os.environ.get("opt_dir")
bert_pretrained_dir = os.environ.get("bert_pretrained_dir") bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
is_half = eval(os.environ.get("is_half", "True")) import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import sys, numpy as np, traceback, pdb import sys, numpy as np, traceback, pdb
import os.path import os.path
from glob import glob from glob import glob
from tqdm import tqdm from tqdm import tqdm
from text.cleaner import clean_text from text.cleaner import clean_text
import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np import numpy as np

View File

@ -10,11 +10,12 @@ os.environ["CUDA_VISIBLE_DEVICES"]= os.environ.get("_CUDA_VISIBLE_DEVICES")
from feature_extractor import cnhubert from feature_extractor import cnhubert
opt_dir= os.environ.get("opt_dir") opt_dir= os.environ.get("opt_dir")
cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir") cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir")
is_half=eval(os.environ.get("is_half","True")) import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import pdb,traceback,numpy as np,logging import pdb,traceback,numpy as np,logging
from scipy.io import wavfile from scipy.io import wavfile
import librosa,torch import librosa
now_dir = os.getcwd() now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
from tools.my_utils import load_audio from tools.my_utils import load_audio

View File

@ -8,7 +8,8 @@ os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES")
opt_dir = os.environ.get("opt_dir") opt_dir = os.environ.get("opt_dir")
pretrained_s2G = os.environ.get("pretrained_s2G") pretrained_s2G = os.environ.get("pretrained_s2G")
s2config_path = os.environ.get("s2config_path") s2config_path = os.environ.get("s2config_path")
is_half = eval(os.environ.get("is_half", "True")) import torch
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
import math, traceback import math, traceback
import multiprocessing import multiprocessing
import sys, pdb import sys, pdb
@ -19,7 +20,7 @@ from random import shuffle
import torch.multiprocessing as mp import torch.multiprocessing as mp
from glob import glob from glob import glob
from tqdm import tqdm from tqdm import tqdm
import logging, librosa, utils, torch import logging, librosa, utils
from module.models import SynthesizerTrn from module.models import SynthesizerTrn
logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("numba").setLevel(logging.WARNING)

View File

@ -106,7 +106,7 @@ conda install -c conda-forge 'ffmpeg<7'
Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root. Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
##### Mac Users ##### MacOS Users
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -156,7 +156,7 @@ For English or Japanese ASR (additionally), download models from [Faster Whisper
Users in the China region can download this model by entering the links below Users in the China region can download this model by entering the links below
- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3) (Click "Download a copy", log out if you encounter errors while downloading.) - [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/00bUEp9_mcjMq_dhHu_vrAFDQ#faster-whisper-large-v3) (Click "Download a copy", log out if you encounter errors while downloading.)
- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3) (HuggingFace mirror site) - [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3) (HuggingFace mirror site)
@ -227,7 +227,7 @@ ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
(No progress bars, GPU performance may cause time delays) (No progress bars, GPU performance may cause time delays)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
A custom list save path is enabled A custom list save path is enabled

View File

@ -106,7 +106,7 @@ conda install -c conda-forge 'ffmpeg<7'
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。 下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
##### Mac 用户 ##### MacOS 用户
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -155,7 +155,7 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。 对于英语与日语自动语音识别(附加),从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将它们放置在 `tools/asr/models` 中。 此外,[其他模型](https://huggingface.co/Systran)可能具有类似效果,但占用更小的磁盘空间。
中国地区用户可以通过以下链接下载: 中国地区用户可以通过以下链接下载:
- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/0c4pQxFs7oWyVU1iMTq2DbmLA#faster-whisper-large-v3)点击“下载副本”,如果下载时遇到错误,请退出登录) - [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/00bUEp9_mcjMq_dhHu_vrAFDQ#faster-whisper-large-v3)(点击“下载副本”,如果下载时遇到错误,请退出登录)
- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3)(Hugging Face镜像站) - [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3)(Hugging Face镜像站)
@ -185,7 +185,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
- [ ] **高优先级:** - [ ] **高优先级:**
- [x] 日语和英语的本地化。 - [x] 日语和英语的本地化。
- [ ] 用户指南。 - [x] 用户指南。
- [x] 日语和英语数据集微调训练。 - [x] 日语和英语数据集微调训练。
- [ ] **功能:** - [ ] **功能:**
@ -226,9 +226,9 @@ python tools/asr/funasr_asr.py -i <input> -o <output>
通过Faster_Whisper进行ASR处理除中文之外的ASR标记 通过Faster_Whisper进行ASR处理除中文之外的ASR标记
没有进度条GPU性能可能会导致时间延迟 没有进度条GPU性能可能会导致时间延迟
```` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
```` ```
启用自定义列表保存路径 启用自定义列表保存路径
## 致谢 ## 致谢

View File

@ -102,7 +102,7 @@ conda install -c conda-forge 'ffmpeg<7'
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。
##### Mac ユーザー ##### MacOS ユーザー
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -209,7 +209,7 @@ ASR処理はFaster_Whisperを通じて実行されます(中国語を除くASR
(進行状況バーは表示されません。GPU のパフォーマンスにより時間遅延が発生する可能性があります) (進行状況バーは表示されません。GPU のパフォーマンスにより時間遅延が発生する可能性があります)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
カスタムリストの保存パスが有効になっています カスタムリストの保存パスが有効になっています

View File

@ -102,7 +102,7 @@ conda install -c conda-forge 'ffmpeg<7'
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe)와 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe)를 GPT-SoVITS root 디렉토리에 넣습니다. [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe)와 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe)를 GPT-SoVITS root 디렉토리에 넣습니다.
##### Mac 사용자 ##### MacOS 사용자
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -213,7 +213,7 @@ ASR 처리는 Faster_Whisper(중국어를 제외한 ASR 마킹)를 통해 수행
(진행률 표시줄 없음, GPU 성능으로 인해 시간 지연이 발생할 수 있음) (진행률 표시줄 없음, GPU 성능으로 인해 시간 지연이 발생할 수 있음)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
사용자 정의 목록 저장 경로가 활성화되었습니다. 사용자 정의 목록 저장 경로가 활성화되었습니다.

View File

@ -102,7 +102,7 @@ conda install -c conda-forge 'ffmpeg<7'
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) ve [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) dosyalarını indirin ve GPT-SoVITS kök dizinine yerleştirin. [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) ve [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) dosyalarını indirin ve GPT-SoVITS kök dizinine yerleştirin.
##### Mac Kullanıcıları ##### MacOS Kullanıcıları
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```

View File

@ -21,11 +21,13 @@ asr_dict = {
'lang': ['zh'], 'lang': ['zh'],
'size': ['large'], 'size': ['large'],
'path': 'funasr_asr.py', 'path': 'funasr_asr.py',
'precision': ['float32']
}, },
"Faster Whisper (多语种)": { "Faster Whisper (多语种)": {
'lang': ['auto', 'zh', 'en', 'ja'], 'lang': ['auto', 'zh', 'en', 'ja'],
'size': check_fw_local_models(), 'size': check_fw_local_models(),
'path': 'fasterwhisper_asr.py' 'path': 'fasterwhisper_asr.py',
} 'precision': ['float32', 'float16', 'int8']
},
} }

View File

@ -101,8 +101,8 @@ if __name__ == '__main__':
parser.add_argument("-l", "--language", type=str, default='ja', parser.add_argument("-l", "--language", type=str, default='ja',
choices=language_code_list, choices=language_code_list,
help="Language of the audio files.") help="Language of the audio files.")
parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'], parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32','int8'],
help="fp16 or fp32") help="fp16, int8 or fp32")
cmd = parser.parse_args() cmd = parser.parse_args()
output_file_path = execute_asr( output_file_path = execute_asr(

View File

@ -4,7 +4,8 @@ import argparse
import os import os
import traceback import traceback
from tqdm import tqdm from tqdm import tqdm
# from funasr.utils import version_checker
# version_checker.check_for_update = lambda: None
from funasr import AutoModel from funasr import AutoModel
path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
@ -14,6 +15,7 @@ path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-l
path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
model = AutoModel( model = AutoModel(
model = path_asr, model = path_asr,
model_revision = "v2.0.4", model_revision = "v2.0.4",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Reverberation and delay removal model(by FoxJoy):", "3、去混响、去延迟模型by FoxJoy": "3. Reverberation and delay removal model(by FoxJoy):",
"ASR 模型": "ASR model", "ASR 模型": "ASR model",
"ASR 模型尺寸": "ASR model size", "ASR 模型尺寸": "ASR model size",
"数据类型精度": "Computing precision",
"ASR 语言设置": "ASR language", "ASR 语言设置": "ASR language",
"ASR进程输出信息": "ASR output log", "ASR进程输出信息": "ASR output log",
"GPT模型列表": "GPT weight list", "GPT模型列表": "GPT weight list",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Modelos de eliminación de reverberación y retardo (por FoxJoy)", "3、去混响、去延迟模型by FoxJoy": "3. Modelos de eliminación de reverberación y retardo (por FoxJoy)",
"ASR 模型": "Modelo ASR", "ASR 模型": "Modelo ASR",
"ASR 模型尺寸": "Tamaño del modelo ASR", "ASR 模型尺寸": "Tamaño del modelo ASR",
"数据类型精度": "precisión del tipo de datos",
"ASR 语言设置": "Configuración del idioma ASR", "ASR 语言设置": "Configuración del idioma ASR",
"ASR进程输出信息": "Información de salida del proceso ASR", "ASR进程输出信息": "Información de salida del proceso ASR",
"GPT模型列表": "Lista de modelos GPT", "GPT模型列表": "Lista de modelos GPT",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Modèle de suppression de réverbération et de retard (par FoxJoy) :", "3、去混响、去延迟模型by FoxJoy": "3. Modèle de suppression de réverbération et de retard (par FoxJoy) :",
"ASR 模型": "Modèle ASR", "ASR 模型": "Modèle ASR",
"ASR 模型尺寸": "Taille du modèle ASR", "ASR 模型尺寸": "Taille du modèle ASR",
"数据类型精度": "précision du type de données",
"ASR 语言设置": "Paramètres de langue ASR", "ASR 语言设置": "Paramètres de langue ASR",
"ASR进程输出信息": "Informations de processus ASR", "ASR进程输出信息": "Informations de processus ASR",
"GPT模型列表": "Liste des modèles GPT", "GPT模型列表": "Liste des modèles GPT",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Modello per rimuovere la riverberazione e il ritardo (by FoxJoy):", "3、去混响、去延迟模型by FoxJoy": "3. Modello per rimuovere la riverberazione e il ritardo (by FoxJoy):",
"ASR 模型": "Modello ASR", "ASR 模型": "Modello ASR",
"ASR 模型尺寸": "Dimensioni del modello ASR", "ASR 模型尺寸": "Dimensioni del modello ASR",
"数据类型精度": "precisione del tipo di dati",
"ASR 语言设置": "Impostazioni linguistiche ASR", "ASR 语言设置": "Impostazioni linguistiche ASR",
"ASR进程输出信息": "Informazioni sull'output del processo ASR", "ASR进程输出信息": "Informazioni sull'output del processo ASR",
"GPT模型列表": "Elenco dei modelli GPT", "GPT模型列表": "Elenco dei modelli GPT",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3、リバーブ除去と遅延除去モデルby FoxJoy", "3、去混响、去延迟模型by FoxJoy": "3、リバーブ除去と遅延除去モデルby FoxJoy",
"ASR 模型": "ASR モデル", "ASR 模型": "ASR モデル",
"ASR 模型尺寸": "ASRモデルサイズ", "ASR 模型尺寸": "ASRモデルサイズ",
"数据类型精度": "データ型の精度",
"ASR 语言设置": "ASR 言語設定", "ASR 语言设置": "ASR 言語設定",
"ASR进程输出信息": "ASRプロセスの出力情報", "ASR进程输出信息": "ASRプロセスの出力情報",
"GPT模型列表": "GPTモデルリスト", "GPT模型列表": "GPTモデルリスト",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. 잔향 제거 및 지연 제거 모델 (by FoxJoy):", "3、去混响、去延迟模型by FoxJoy": "3. 잔향 제거 및 지연 제거 모델 (by FoxJoy):",
"ASR 模型": "ASR 모델", "ASR 模型": "ASR 모델",
"ASR 模型尺寸": "ASR 모델 크기", "ASR 模型尺寸": "ASR 모델 크기",
"数据类型精度": "데이터 유형 정밀도",
"ASR 语言设置": "ASR 언어 설정", "ASR 语言设置": "ASR 언어 설정",
"ASR进程输出信息": "ASR 프로세스 출력 정보", "ASR进程输出信息": "ASR 프로세스 출력 정보",
"GPT模型列表": "GPT 모델 목록", "GPT模型列表": "GPT 모델 목록",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Modelo de remoção de reverberação e atraso (por FoxJoy):", "3、去混响、去延迟模型by FoxJoy": "3. Modelo de remoção de reverberação e atraso (por FoxJoy):",
"ASR 模型": "Modelo ASR", "ASR 模型": "Modelo ASR",
"ASR 模型尺寸": "Tamanho do modelo ASR", "ASR 模型尺寸": "Tamanho do modelo ASR",
"数据类型精度": "precisão do tipo de dado",
"ASR 语言设置": "Configurações de idioma do ASR", "ASR 语言设置": "Configurações de idioma do ASR",
"ASR进程输出信息": "Informações de saída do processo ASR", "ASR进程输出信息": "Informações de saída do processo ASR",
"GPT模型列表": "Lista de modelos GPT", "GPT模型列表": "Lista de modelos GPT",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Модель удаления реверберации и задержек (от FoxJoy):", "3、去混响、去延迟模型by FoxJoy": "3. Модель удаления реверберации и задержек (от FoxJoy):",
"ASR 模型": "Модель ASR", "ASR 模型": "Модель ASR",
"ASR 模型尺寸": "Размер модели ASR", "ASR 模型尺寸": "Размер модели ASR",
"数据类型精度": "точность типа данных",
"ASR 语言设置": "Настройки языка ASR", "ASR 语言设置": "Настройки языка ASR",
"ASR进程输出信息": "Информация о процессе ASR", "ASR进程输出信息": "Информация о процессе ASR",
"GPT模型列表": "Список моделей GPT", "GPT模型列表": "Список моделей GPT",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3. Yankı ve gecikme giderme modeli (FoxJoy tarafından):", "3、去混响、去延迟模型by FoxJoy": "3. Yankı ve gecikme giderme modeli (FoxJoy tarafından):",
"ASR 模型": "ASR modeli", "ASR 模型": "ASR modeli",
"ASR 模型尺寸": "ASR model boyutu", "ASR 模型尺寸": "ASR model boyutu",
"数据类型精度": "veri türü doğruluğu",
"ASR 语言设置": "ASR dil ayarları", "ASR 语言设置": "ASR dil ayarları",
"ASR进程输出信息": "ASR işlemi çıktı bilgisi", "ASR进程输出信息": "ASR işlemi çıktı bilgisi",
"GPT模型列表": "GPT model listesi", "GPT模型列表": "GPT model listesi",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3、去混响、去延迟模型by FoxJoy", "3、去混响、去延迟模型by FoxJoy": "3、去混响、去延迟模型by FoxJoy",
"ASR 模型": "ASR 模型", "ASR 模型": "ASR 模型",
"ASR 模型尺寸": "ASR 模型尺寸", "ASR 模型尺寸": "ASR 模型尺寸",
"数据类型精度": "数据类型精度",
"ASR 语言设置": "ASR 语言设置", "ASR 语言设置": "ASR 语言设置",
"ASR进程输出信息": "ASR进程输出信息", "ASR进程输出信息": "ASR进程输出信息",
"GPT模型列表": "GPT模型列表", "GPT模型列表": "GPT模型列表",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3、去混響、去延遲模型by FoxJoy", "3、去混响、去延迟模型by FoxJoy": "3、去混響、去延遲模型by FoxJoy",
"ASR 模型": "ASR 模型", "ASR 模型": "ASR 模型",
"ASR 模型尺寸": "ASR 模型尺寸", "ASR 模型尺寸": "ASR 模型尺寸",
"数据类型精度": "數據類型精度",
"ASR 语言设置": "ASR 語言設置", "ASR 语言设置": "ASR 語言設置",
"ASR进程输出信息": "ASR進程輸出信息", "ASR进程输出信息": "ASR進程輸出信息",
"GPT模型列表": "GPT模型列表", "GPT模型列表": "GPT模型列表",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3、去混響、去延遲模型by FoxJoy", "3、去混响、去延迟模型by FoxJoy": "3、去混響、去延遲模型by FoxJoy",
"ASR 模型": "ASR 模型", "ASR 模型": "ASR 模型",
"ASR 模型尺寸": "ASR 模型尺寸", "ASR 模型尺寸": "ASR 模型尺寸",
"数据类型精度": "數據類型精度",
"ASR 语言设置": "ASR 語言設定", "ASR 语言设置": "ASR 語言設定",
"ASR进程输出信息": "ASR進程輸出資訊", "ASR进程输出信息": "ASR進程輸出資訊",
"GPT模型列表": "GPT模型列表", "GPT模型列表": "GPT模型列表",

View File

@ -34,6 +34,7 @@
"3、去混响、去延迟模型by FoxJoy": "3、去混響、去延遲模型by FoxJoy", "3、去混响、去延迟模型by FoxJoy": "3、去混響、去延遲模型by FoxJoy",
"ASR 模型": "ASR 模型", "ASR 模型": "ASR 模型",
"ASR 模型尺寸": "ASR 模型尺寸", "ASR 模型尺寸": "ASR 模型尺寸",
"数据类型精度": "數據類型精度",
"ASR 语言设置": "ASR 語言設置", "ASR 语言设置": "ASR 語言設置",
"ASR进程输出信息": "ASR進程輸出資訊", "ASR进程输出信息": "ASR進程輸出資訊",
"GPT模型列表": "GPT模型列表", "GPT模型列表": "GPT模型列表",

View File

@ -479,27 +479,31 @@ class BSRoformer(Module):
'b s f t c -> b (f s) t c') # merge stereo / mono into the frequency, with frequency leading dimension, for band splitting 'b s f t c -> b (f s) t c') # merge stereo / mono into the frequency, with frequency leading dimension, for band splitting
x = rearrange(stft_repr, 'b f t c -> b t (f c)') x = rearrange(stft_repr, 'b f t c -> b t (f c)')
# print("460:", x.dtype)#fp32
x = self.band_split(x) x = self.band_split(x)
# axial / hierarchical attention # axial / hierarchical attention
# print("487:",x.dtype)#fp16
for transformer_block in self.layers: for transformer_block in self.layers:
if len(transformer_block) == 3: if len(transformer_block) == 3:
linear_transformer, time_transformer, freq_transformer = transformer_block linear_transformer, time_transformer, freq_transformer = transformer_block
x, ft_ps = pack([x], 'b * d') x, ft_ps = pack([x], 'b * d')
# print("494:", x.dtype)#fp16
x = linear_transformer(x) x = linear_transformer(x)
# print("496:", x.dtype)#fp16
x, = unpack(x, ft_ps, 'b * d') x, = unpack(x, ft_ps, 'b * d')
else: else:
time_transformer, freq_transformer = transformer_block time_transformer, freq_transformer = transformer_block
# print("501:", x.dtype)#fp16
x = rearrange(x, 'b t f d -> b f t d') x = rearrange(x, 'b t f d -> b f t d')
x, ps = pack([x], '* t d') x, ps = pack([x], '* t d')
x = time_transformer(x) x = time_transformer(x)
# print("505:", x.dtype)#fp16
x, = unpack(x, ps, '* t d') x, = unpack(x, ps, '* t d')
x = rearrange(x, 'b f t d -> b t f d') x = rearrange(x, 'b f t d -> b t f d')
x, ps = pack([x], '* f d') x, ps = pack([x], '* f d')
@ -508,10 +512,11 @@ class BSRoformer(Module):
x, = unpack(x, ps, '* f d') x, = unpack(x, ps, '* f d')
# print("515:", x.dtype)######fp16
x = self.final_norm(x) x = self.final_norm(x)
num_stems = len(self.mask_estimators) num_stems = len(self.mask_estimators)
# print("519:", x.dtype)#fp32
mask = torch.stack([fn(x) for fn in self.mask_estimators], dim=1) mask = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
mask = rearrange(mask, 'b n t (f c) -> b n f t c', c=2) mask = rearrange(mask, 'b n t (f c) -> b n f t c', c=2)

View File

@ -1,4 +1,5 @@
# This code is modified from https://github.com/ZFTurbo/ # This code is modified from https://github.com/ZFTurbo/
import pdb
import librosa import librosa
from tqdm import tqdm from tqdm import tqdm
@ -10,6 +11,7 @@ import torch.nn as nn
import warnings import warnings
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
from bs_roformer.bs_roformer import BSRoformer
class BsRoformer_Loader: class BsRoformer_Loader:
def get_model_from_config(self): def get_model_from_config(self):
@ -40,7 +42,7 @@ class BsRoformer_Loader:
} }
from bs_roformer.bs_roformer import BSRoformer
model = BSRoformer( model = BSRoformer(
**dict(config) **dict(config)
) )
@ -95,6 +97,8 @@ class BsRoformer_Loader:
part = nn.functional.pad(input=part, pad=(0, C - length), mode='reflect') part = nn.functional.pad(input=part, pad=(0, C - length), mode='reflect')
else: else:
part = nn.functional.pad(input=part, pad=(0, C - length, 0, 0), mode='constant', value=0) part = nn.functional.pad(input=part, pad=(0, C - length, 0, 0), mode='constant', value=0)
if(self.is_half==True):
part=part.half()
batch_data.append(part) batch_data.append(part)
batch_locations.append((i, length)) batch_locations.append((i, length))
i += step i += step
@ -102,6 +106,7 @@ class BsRoformer_Loader:
if len(batch_data) >= batch_size or (i >= mix.shape[1]): if len(batch_data) >= batch_size or (i >= mix.shape[1]):
arr = torch.stack(batch_data, dim=0) arr = torch.stack(batch_data, dim=0)
# print(23333333,arr.dtype)
x = model(arr) x = model(arr)
window = window_middle window = window_middle
@ -192,14 +197,18 @@ class BsRoformer_Loader:
# print("Elapsed time: {:.2f} sec".format(time.time() - start_time)) # print("Elapsed time: {:.2f} sec".format(time.time() - start_time))
def __init__(self, model_path, device): def __init__(self, model_path, device,is_half):
self.device = device self.device = device
self.extract_instrumental=True self.extract_instrumental=True
model = self.get_model_from_config() model = self.get_model_from_config()
state_dict = torch.load(model_path) state_dict = torch.load(model_path,map_location="cpu")
model.load_state_dict(state_dict) model.load_state_dict(state_dict)
self.is_half=is_half
if(is_half==False):
self.model = model.to(device) self.model = model.to(device)
else:
self.model = model.half().to(device)
def _path_audio_(self, input, others_root, vocal_root, format, is_hp3=False): def _path_audio_(self, input, others_root, vocal_root, format, is_hp3=False):

View File

@ -17,8 +17,8 @@ from bsroformer import BsRoformer_Loader
weight_uvr5_root = "tools/uvr5/uvr5_weights" weight_uvr5_root = "tools/uvr5/uvr5_weights"
uvr5_names = [] uvr5_names = []
for name in os.listdir(weight_uvr5_root): for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name: if name.endswith(".pth") or name.endswith(".ckpt") or "onnx" in name:
uvr5_names.append(name.replace(".pth", "")) uvr5_names.append(name.replace(".pth", "").replace(".ckpt", ""))
device=sys.argv[1] device=sys.argv[1]
is_half=eval(sys.argv[2]) is_half=eval(sys.argv[2])
@ -37,8 +37,9 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
elif model_name == "Bs_Roformer" or "bs_roformer" in model_name.lower(): elif model_name == "Bs_Roformer" or "bs_roformer" in model_name.lower():
func = BsRoformer_Loader func = BsRoformer_Loader
pre_fun = func( pre_fun = func(
model_path = os.path.join(weight_uvr5_root, model_name + ".pth"), model_path = os.path.join(weight_uvr5_root, model_name + ".ckpt"),
device = device, device = device,
is_half=is_half
) )
else: else:
func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho

View File

@ -192,7 +192,7 @@ def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path
yield i18n("TTS推理进程已关闭") yield i18n("TTS推理进程已关闭")
from tools.asr.config import asr_dict from tools.asr.config import asr_dict
def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang): def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision):
global p_asr global p_asr
if(p_asr==None): if(p_asr==None):
asr_inp_dir=my_utils.clean_path(asr_inp_dir) asr_inp_dir=my_utils.clean_path(asr_inp_dir)
@ -202,16 +202,18 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang):
cmd += f' -o "{asr_opt_dir}"' cmd += f' -o "{asr_opt_dir}"'
cmd += f' -s {asr_model_size}' cmd += f' -s {asr_model_size}'
cmd += f' -l {asr_lang}' cmd += f' -l {asr_lang}'
cmd += " -p %s"%("float16"if is_half==True else "float32") cmd += f" -p {asr_precision}"
output_file_name = os.path.basename(asr_inp_dir)
yield "ASR任务开启%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} output_folder = asr_opt_dir or "output/asr_opt"
output_file_path = os.path.abspath(f'{output_folder}/{output_file_name}.list')
yield "ASR任务开启%s"%cmd, {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}
print(cmd) print(cmd)
p_asr = Popen(cmd, shell=True) p_asr = Popen(cmd, shell=True)
p_asr.wait() p_asr.wait()
p_asr=None p_asr=None
yield f"ASR任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} yield f"ASR任务完成, 查看终端进行下一步", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__":"update","value":output_file_path}
else: else:
yield "已有正在进行的ASR任务需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} yield "已有正在进行的ASR任务需先终止才能开启下一次任务", {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}
# return None # return None
def close_asr(): def close_asr():
@ -227,14 +229,14 @@ def open_denoise(denoise_inp_dir, denoise_opt_dir):
denoise_opt_dir=my_utils.clean_path(denoise_opt_dir) denoise_opt_dir=my_utils.clean_path(denoise_opt_dir)
cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s'%(python_exec,denoise_inp_dir,denoise_opt_dir,"float16"if is_half==True else "float32") cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s'%(python_exec,denoise_inp_dir,denoise_opt_dir,"float16"if is_half==True else "float32")
yield "语音降噪任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} yield "语音降噪任务开启:%s"%cmd, {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}
print(cmd) print(cmd)
p_denoise = Popen(cmd, shell=True) p_denoise = Popen(cmd, shell=True)
p_denoise.wait() p_denoise.wait()
p_denoise=None p_denoise=None
yield f"语音降噪任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} yield f"语音降噪任务完成, 查看终端进行下一步", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__":"update","value":denoise_opt_dir}
else: else:
yield "已有正在进行的语音降噪任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} yield "已有正在进行的语音降噪任务,需先终止才能开启下一次任务", {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}
# return None # return None
def close_denoise(): def close_denoise():
@ -341,12 +343,12 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k
inp = my_utils.clean_path(inp) inp = my_utils.clean_path(inp)
opt_root = my_utils.clean_path(opt_root) opt_root = my_utils.clean_path(opt_root)
if(os.path.exists(inp)==False): if(os.path.exists(inp)==False):
yield "输入路径不存在",{"__type__":"update","visible":True},{"__type__":"update","visible":False} yield "输入路径不存在", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__": "update"}, {"__type__": "update"}
return return
if os.path.isfile(inp):n_parts=1 if os.path.isfile(inp):n_parts=1
elif os.path.isdir(inp):pass elif os.path.isdir(inp):pass
else: else:
yield "输入路径存在但既不是文件也不是文件夹",{"__type__":"update","visible":True},{"__type__":"update","visible":False} yield "输入路径存在但既不是文件也不是文件夹", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__": "update"}, {"__type__": "update"}
return return
if (ps_slice == []): if (ps_slice == []):
for i_part in range(n_parts): for i_part in range(n_parts):
@ -354,13 +356,13 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k
print(cmd) print(cmd)
p = Popen(cmd, shell=True) p = Popen(cmd, shell=True)
ps_slice.append(p) ps_slice.append(p)
yield "切割执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} yield "切割执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}, {"__type__": "update"}, {"__type__": "update"}
for p in ps_slice: for p in ps_slice:
p.wait() p.wait()
ps_slice=[] ps_slice=[]
yield "切割结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} yield "切割结束", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__": "update", "value":opt_root}, {"__type__": "update", "value":opt_root}
else: else:
yield "已有正在进行的切割任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} yield "已有正在进行的切割任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True}, {"__type__": "update"}, {"__type__": "update"}
def close_slice(): def close_slice():
global ps_slice global ps_slice
@ -689,33 +691,37 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息")) uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息"))
gr.Markdown(value=i18n("0b-语音切分工具")) gr.Markdown(value=i18n("0b-语音切分工具"))
with gr.Row(): with gr.Row():
with gr.Column(scale=3):
with gr.Row(): with gr.Row():
slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="") slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="")
slice_opt_root=gr.Textbox(label=i18n("切分后的子音频的输出根目录"),value="output/slicer_opt") slice_opt_root=gr.Textbox(label=i18n("切分后的子音频的输出根目录"),value="output/slicer_opt")
with gr.Row():
threshold=gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"),value="-34") threshold=gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"),value="-34")
min_length=gr.Textbox(label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4000") min_length=gr.Textbox(label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4000")
min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="300") min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="300")
hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10") hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10")
max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500") max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500")
with gr.Row(): with gr.Row():
open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True)
close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False)
_max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True) _max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True)
alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True)
n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True)
with gr.Row():
slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息"))
open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True)
close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False)
gr.Markdown(value=i18n("0bb-语音降噪工具")) gr.Markdown(value=i18n("0bb-语音降噪工具"))
with gr.Row(): with gr.Row():
open_denoise_button = gr.Button(i18n("开启语音降噪"), variant="primary",visible=True) with gr.Column(scale=3):
close_denoise_button = gr.Button(i18n("终止语音降噪进程"), variant="primary",visible=False) with gr.Row():
denoise_input_dir=gr.Textbox(label=i18n("降噪音频文件输入文件夹"),value="") denoise_input_dir=gr.Textbox(label=i18n("降噪音频文件输入文件夹"),value="")
denoise_output_dir=gr.Textbox(label=i18n("降噪结果输出文件夹"),value="output/denoise_opt") denoise_output_dir=gr.Textbox(label=i18n("降噪结果输出文件夹"),value="output/denoise_opt")
with gr.Row():
denoise_info = gr.Textbox(label=i18n("语音降噪进程输出信息")) denoise_info = gr.Textbox(label=i18n("语音降噪进程输出信息"))
open_denoise_button = gr.Button(i18n("开启语音降噪"), variant="primary",visible=True)
close_denoise_button = gr.Button(i18n("终止语音降噪进程"), variant="primary",visible=False)
gr.Markdown(value=i18n("0c-中文批量离线ASR工具")) gr.Markdown(value=i18n("0c-中文批量离线ASR工具"))
with gr.Row(): with gr.Row():
open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True) with gr.Column(scale=3):
close_asr_button = gr.Button(i18n("终止ASR进程"), variant="primary",visible=False)
with gr.Column():
with gr.Row(): with gr.Row():
asr_inp_dir = gr.Textbox( asr_inp_dir = gr.Textbox(
label=i18n("输入文件夹路径"), label=i18n("输入文件夹路径"),
@ -746,17 +752,39 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
interactive = True, interactive = True,
value="zh" value="zh"
) )
asr_precision = gr.Dropdown(
label = i18n("数据类型精度"),
choices = ["float32"],
interactive = True,
value="float32"
)
with gr.Row(): with gr.Row():
asr_info = gr.Textbox(label=i18n("ASR进程输出信息")) asr_info = gr.Textbox(label=i18n("ASR进程输出信息"))
open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True)
close_asr_button = gr.Button(i18n("终止ASR进程"), variant="primary",visible=False)
def change_lang_choices(key): #根据选择的模型修改可选的语言 def change_lang_choices(key): #根据选择的模型修改可选的语言
# return gr.Dropdown(choices=asr_dict[key]['lang']) # return gr.Dropdown(choices=asr_dict[key]['lang'])
return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]} return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]}
def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸
# return gr.Dropdown(choices=asr_dict[key]['size']) # return gr.Dropdown(choices=asr_dict[key]['size'])
return {"__type__": "update", "choices": asr_dict[key]['size']} return {"__type__": "update", "choices": asr_dict[key]['size'],"value":asr_dict[key]['size'][-1]}
def change_precision_choices(key): #根据选择的模型修改可选的语言
if key =="Faster Whisper (多语种)":
if default_batch_size <= 4:
precision = 'int8'
elif is_half:
precision = 'float16'
else:
precision = 'float32'
else:
precision = 'float32'
# return gr.Dropdown(choices=asr_dict[key]['precision'])
return {"__type__": "update", "choices": asr_dict[key]['precision'],"value":precision}
asr_model.change(change_lang_choices, [asr_model], [asr_lang]) asr_model.change(change_lang_choices, [asr_model], [asr_lang])
asr_model.change(change_size_choices, [asr_model], [asr_size]) asr_model.change(change_size_choices, [asr_model], [asr_size])
asr_model.change(change_precision_choices, [asr_model], [asr_precision])
gr.Markdown(value=i18n("0d-语音文本校对标注工具")) gr.Markdown(value=i18n("0d-语音文本校对标注工具"))
with gr.Row(): with gr.Row():
@ -769,11 +797,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
label_info = gr.Textbox(label=i18n("打标工具进程输出信息")) label_info = gr.Textbox(label=i18n("打标工具进程输出信息"))
if_label.change(change_label, [if_label,path_list], [label_info]) if_label.change(change_label, [if_label,path_list], [label_info])
if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info]) if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info])
open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button]) open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], [asr_info,open_asr_button,close_asr_button,path_list])
close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button])
open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button]) open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button,asr_inp_dir,denoise_input_dir])
close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button])
open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button]) open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button,asr_inp_dir])
close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button]) close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button])
with gr.TabItem(i18n("1-GPT-SoVITS-TTS")): with gr.TabItem(i18n("1-GPT-SoVITS-TTS")):