diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index 669ac3aa..d749a798 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -34,6 +34,19 @@ language_code_list = [ "vi", "yi", "yo", "zh", "yue", "auto"] +AUDIO_EXTENSIONS = [ + "mp3", + "wav", + "flac", + "ogg", + "m4a", + "wma", + "aac", + "aiff", + "aif", + "aifc", +] + def execute_asr(input_folder, output_folder, model_size, language, precision): if '-local' in model_size: model_size = model_size[:-6] @@ -57,6 +70,8 @@ def execute_asr(input_folder, output_folder, model_size, language, precision): for file_name in tqdm(input_file_names): try: + if file_name.split(".")[-1] not in AUDIO_EXTENSIONS: + continue file_path = os.path.join(input_folder, file_name) segments, info = model.transcribe( audio = file_path, diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index 831da6c1..40bfe2e2 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -23,6 +23,19 @@ model = AutoModel( punc_model_revision = "v2.0.4", ) +AUDIO_EXTENSIONS = [ + "mp3", + "wav", + "flac", + "ogg", + "m4a", + "wma", + "aac", + "aiff", + "aif", + "aifc", +] + def only_asr(input_file): try: text = model.generate(input=input_file)[0]["text"] @@ -40,6 +53,8 @@ def execute_asr(input_folder, output_folder, model_size, language): for file_name in tqdm(input_file_names): try: + if file_name.split(".")[-1] not in AUDIO_EXTENSIONS: + continue file_path = os.path.join(input_folder, file_name) text = model.generate(input=file_path)[0]["text"] output.append(f"{file_path}|{output_file_name}|{language.upper()}|{text}") diff --git a/tools/cmd-denoise.py b/tools/cmd-denoise.py index 69b51e66..457cf6a9 100644 --- a/tools/cmd-denoise.py +++ b/tools/cmd-denoise.py @@ -4,6 +4,19 @@ from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from tqdm import tqdm +AUDIO_EXTENSIONS = [ + "mp3", + "wav", + "flac", + "ogg", + "m4a", + "wma", + "aac", + "aiff", + "aif", + "aifc", +] + path_denoise = 'tools/denoise-model/speech_frcrn_ans_cirm_16k' path_denoise = path_denoise if os.path.exists(path_denoise) else "damo/speech_frcrn_ans_cirm_16k" ans = pipeline(Tasks.acoustic_noise_suppression,model=path_denoise) @@ -12,6 +25,8 @@ def execute_denoise(input_folder,output_folder): # print(input_folder) # print(list(os.listdir(input_folder).sort())) for name in tqdm(os.listdir(input_folder)): + if name.split(".")[-1] not in AUDIO_EXTENSIONS: + continue ans("%s/%s"%(input_folder,name),output_path='%s/%s'%(output_folder,name)) if __name__ == '__main__': diff --git a/tools/slice_audio.py b/tools/slice_audio.py index 46ee408a..4969d2cb 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -6,6 +6,19 @@ from scipy.io import wavfile from my_utils import load_audio from slicer2 import Slicer +AUDIO_EXTENSIONS = [ + "mp3", + "wav", + "flac", + "ogg", + "m4a", + "wma", + "aac", + "aiff", + "aif", + "aifc", +] + def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,i_part,all_part): os.makedirs(opt_root,exist_ok=True) if os.path.isfile(inp): @@ -28,6 +41,8 @@ def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_ # print(inp_path) try: name = os.path.basename(inp_path) + if name.split(".")[-1] not in AUDIO_EXTENSIONS: + continue audio = load_audio(inp_path, 32000) # print(audio.shape) for chunk, start, end in slicer.slice(audio): # start和end是帧数 diff --git a/tools/uvr5/webui.py b/tools/uvr5/webui.py index a690a686..c403c89f 100644 --- a/tools/uvr5/webui.py +++ b/tools/uvr5/webui.py @@ -23,15 +23,28 @@ is_half=eval(sys.argv[2]) webui_port_uvr5=int(sys.argv[3]) is_share=eval(sys.argv[4]) +AUDIO_EXTENSIONS = [ + "mp3", + "wav", + "flac", + "ogg", + "m4a", + "wma", + "aac", + "aiff", + "aif", + "aifc", +] + def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): infos = [] try: - inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ").strip("\u202a") save_root_vocal = ( - save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ").strip("\u202a") ) save_root_ins = ( - save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ").strip("\u202a") ) is_hp3 = "HP3" in model_name if model_name == "onnx_dereverb_By_FoxJoy": @@ -49,6 +62,8 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format else: paths = [path.name for path in paths] for path in paths: + if path.split(".")[-1] not in AUDIO_EXTENSIONS: + continue inp_path = os.path.join(inp_root, path) if(os.path.isfile(inp_path)==False):continue need_reformat = 1