diff --git a/config.py b/config.py index 1f741285..c3a7376c 100644 --- a/config.py +++ b/config.py @@ -15,6 +15,19 @@ bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" pretrained_sovits_path = "GPT_SoVITS/pretrained_models/s2G488k.pth" pretrained_gpt_path = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +AUDIO_EXTENSIONS = [ + "mp3", + "wav", + "flac", + "ogg", + "m4a", + "wma", + "aac", + "aiff", + "aif", + "aifc", +] + exp_root = "logs" python_exec = sys.executable or "python" if torch.cuda.is_available(): diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index d749a798..9198b506 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -10,6 +10,7 @@ from faster_whisper import WhisperModel from tqdm import tqdm from tools.asr.config import check_fw_local_models +from config import AUDIO_EXTENSIONS language_code_list = [ "af", "am", "ar", "as", "az", @@ -34,19 +35,6 @@ language_code_list = [ "vi", "yi", "yo", "zh", "yue", "auto"] -AUDIO_EXTENSIONS = [ - "mp3", - "wav", - "flac", - "ogg", - "m4a", - "wma", - "aac", - "aiff", - "aif", - "aifc", -] - def execute_asr(input_folder, output_folder, model_size, language, precision): if '-local' in model_size: model_size = model_size[:-6] diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index 40bfe2e2..7c400e86 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -4,6 +4,7 @@ import argparse import os import traceback from tqdm import tqdm +from config import AUDIO_EXTENSIONS from funasr import AutoModel @@ -23,19 +24,6 @@ model = AutoModel( punc_model_revision = "v2.0.4", ) -AUDIO_EXTENSIONS = [ - "mp3", - "wav", - "flac", - "ogg", - "m4a", - "wma", - "aac", - "aiff", - "aif", - "aifc", -] - def only_asr(input_file): try: text = model.generate(input=input_file)[0]["text"] diff --git a/tools/cmd-denoise.py b/tools/cmd-denoise.py index 457cf6a9..bd4b559a 100644 --- a/tools/cmd-denoise.py +++ b/tools/cmd-denoise.py @@ -3,19 +3,7 @@ import os,argparse from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from tqdm import tqdm - -AUDIO_EXTENSIONS = [ - "mp3", - "wav", - "flac", - "ogg", - "m4a", - "wma", - "aac", - "aiff", - "aif", - "aifc", -] +from config import AUDIO_EXTENSIONS path_denoise = 'tools/denoise-model/speech_frcrn_ans_cirm_16k' path_denoise = path_denoise if os.path.exists(path_denoise) else "damo/speech_frcrn_ans_cirm_16k" diff --git a/tools/slice_audio.py b/tools/slice_audio.py index 4969d2cb..55caf9ff 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -5,19 +5,7 @@ from scipy.io import wavfile # sys.path.append(parent_directory) from my_utils import load_audio from slicer2 import Slicer - -AUDIO_EXTENSIONS = [ - "mp3", - "wav", - "flac", - "ogg", - "m4a", - "wma", - "aac", - "aiff", - "aif", - "aifc", -] +from config import AUDIO_EXTENSIONS def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,i_part,all_part): os.makedirs(opt_root,exist_ok=True) diff --git a/tools/uvr5/webui.py b/tools/uvr5/webui.py index c403c89f..bbe24bd0 100644 --- a/tools/uvr5/webui.py +++ b/tools/uvr5/webui.py @@ -2,6 +2,7 @@ import os import traceback,gradio as gr import logging from tools.i18n.i18n import I18nAuto +from config import AUDIO_EXTENSIONS i18n = I18nAuto() logger = logging.getLogger(__name__) @@ -23,19 +24,6 @@ is_half=eval(sys.argv[2]) webui_port_uvr5=int(sys.argv[3]) is_share=eval(sys.argv[4]) -AUDIO_EXTENSIONS = [ - "mp3", - "wav", - "flac", - "ogg", - "m4a", - "wma", - "aac", - "aiff", - "aif", - "aifc", -] - def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): infos = [] try: