From 26c4709d336919bbad2993e9d8d567ecb9a1af5e Mon Sep 17 00:00:00 2001 From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:09:57 +0800 Subject: [PATCH 1/6] Add files via upload python WhisperASR.py -i -o -f -l --- tools/damo_asr/WhisperASR.py | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tools/damo_asr/WhisperASR.py diff --git a/tools/damo_asr/WhisperASR.py b/tools/damo_asr/WhisperASR.py new file mode 100644 index 0000000..3b0a946 --- /dev/null +++ b/tools/damo_asr/WhisperASR.py @@ -0,0 +1,42 @@ +import os +import argparse +import os +os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" +from glob import glob +from faster_whisper import WhisperModel + +def main(input_folder, output_folder, output_filename, language): + model = WhisperModel("large-v3", device="cuda", compute_type="float16") + + output_file = os.path.join(output_folder, output_filename) + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + with open(output_file, 'w', encoding='utf-8') as f: + for file in glob(os.path.join(input_folder, '**/*.wav'), recursive=True): + segments, _ = model.transcribe(file, beam_size=10, vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=700), language=language) + segments = list(segments) + + filename = os.path.basename(file).replace('.wav', '') + directory = os.path.dirname(file) + + result_line = f"{file}|{language.upper()}|{segments[0].text}\n" + f.write(result_line) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input_folder", type=str, required=True, + help="Path to the folder containing WAV files.") + parser.add_argument("-o", "--output_folder", type=str, required=True, help="Output folder to store transcriptions.") + parser.add_argument("-f", "--output_filename", type=str, default="transcriptions.txt", help="Name of the output text file.") + parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh', 'en', ...], + help="Language of the audio files.") + + cmd = parser.parse_args() + + input_folder = cmd.input_folder + output_folder = cmd.output_folder + output_filename = cmd.output_filename + language = cmd.language + main(input_folder, output_folder, output_filename, language) \ No newline at end of file From 1b4951b5dc85ddb97ab2e231c709fdac011cafb7 Mon Sep 17 00:00:00 2001 From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:10:22 +0800 Subject: [PATCH 2/6] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index d4800c8..6f62e57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ psutil jieba_fast jieba LangSegment +Faster_Whisper From c69d93bbd69f0df843e1b417a3eaafa8408f9c8d Mon Sep 17 00:00:00 2001 From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:20:09 +0800 Subject: [PATCH 3/6] Add files via upload --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 958e314..f7de2ea 100644 --- a/README.md +++ b/README.md @@ -197,8 +197,40 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] better sovits base model (enhanced audio quality) - [ ] model mix +## (Optional) If you need, here will provide the command line operation mode, custom port and custom number of lines displayed per page +Use the command line to open the WebUI for UVR5 +``` +python tools/uvr5/webui.py "" +``` +If you can't open a browser, follow the format below for UVR processing,This is using mdxnet for audio processing +``` +python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision +``` +This is how the audio segmentation of the dataset is done using the command line +``` +python audio_slicer.py \ + --input_path "" \ + --output_root "" \ + --threshold \ + --min_length \ + --min_interval + --hop_size +``` +This is how dataset ASR processing is done using the command line(Only Chinese) +``` +python tools/damo_asr/cmd-asr.py "" +``` +ASR processing is performed through Faster_Whisper(ASR marking except Chinese) + +(No progress bar, may be time dependent due to GPU performance) +``` +python ./tools/damo_asr/WhisperASR.py -i -o -f -l +``` +A custom list save path is enabled ## Credits + + Special thanks to the following projects and contributors: - [ar-vits](https://github.com/innnky/ar-vits) From d55f63880b5a5c4864e31c02ff25b6cb5642f658 Mon Sep 17 00:00:00 2001 From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:21:42 +0800 Subject: [PATCH 4/6] Add files via upload --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f7de2ea..7018218 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. - [ ] better sovits base model (enhanced audio quality) - [ ] model mix -## (Optional) If you need, here will provide the command line operation mode, custom port and custom number of lines displayed per page +## (Optional) If you need, here will provide the command line operation mode Use the command line to open the WebUI for UVR5 ``` python tools/uvr5/webui.py "" From 9d2a136aab02f3ace34062ebaab59054551f3387 Mon Sep 17 00:00:00 2001 From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:25:19 +0800 Subject: [PATCH 5/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7018218..4d1ddac 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ python tools/damo_asr/cmd-asr.py " -o -f -l ``` From 4ad13a84551d0661f2236d85d66dd042565c6da0 Mon Sep 17 00:00:00 2001 From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:26:06 +0800 Subject: [PATCH 6/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4d1ddac..c0eda03 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ python tools/damo_asr/cmd-asr.py " -o -f -l ```