From 26c4709d336919bbad2993e9d8d567ecb9a1af5e Mon Sep 17 00:00:00 2001
From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com>
Date: Mon, 5 Feb 2024 18:09:57 +0800
Subject: [PATCH 1/6] Add files via upload

python WhisperASR.py -i <input> -o <out_put> -f <file_name.list> -l <language>
---
 tools/damo_asr/WhisperASR.py | 42 ++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 tools/damo_asr/WhisperASR.py
diff --git a/tools/damo_asr/WhisperASR.py b/tools/damo_asr/WhisperASR.py
new file mode 100644
index 0000000..3b0a946
--- /dev/null
+++ b/tools/damo_asr/WhisperASR.py
@@ -0,0 +1,42 @@
+import os
+import argparse
+import os
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
+from glob import glob
+from faster_whisper import WhisperModel
+
+def main(input_folder, output_folder, output_filename, language):
+    model = WhisperModel("large-v3", device="cuda", compute_type="float16")
+
+    output_file = os.path.join(output_folder, output_filename)
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    with open(output_file, 'w', encoding='utf-8') as f:
+        for file in glob(os.path.join(input_folder, '**/*.wav'), recursive=True):
+            segments, _ = model.transcribe(file, beam_size=10, vad_filter=True,
+                                           vad_parameters=dict(min_silence_duration_ms=700), language=language)
+            segments = list(segments)
+
+            filename = os.path.basename(file).replace('.wav', '')
+            directory = os.path.dirname(file)
+
+            result_line = f"{file}|{language.upper()}|{segments[0].text}\n"
+            f.write(result_line)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input_folder", type=str, required=True,
+                        help="Path to the folder containing WAV files.")
+    parser.add_argument("-o", "--output_folder", type=str, required=True, help="Output folder to store transcriptions.")
+    parser.add_argument("-f", "--output_filename", type=str, default="transcriptions.txt", help="Name of the output text file.")
+    parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh', 'en', ...],
+                        help="Language of the audio files.")
+
+    cmd = parser.parse_args()
+
+    input_folder = cmd.input_folder
+    output_folder = cmd.output_folder
+    output_filename = cmd.output_filename
+    language = cmd.language
+    main(input_folder, output_folder, output_filename, language)
\ No newline at end of file

From 1b4951b5dc85ddb97ab2e231c709fdac011cafb7 Mon Sep 17 00:00:00 2001
From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com>
Date: Mon, 5 Feb 2024 18:10:22 +0800
Subject: [PATCH 2/6] Update requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index d4800c8..6f62e57 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,3 +23,4 @@ psutil
 jieba_fast
 jieba
 LangSegment
+Faster_Whisper

From c69d93bbd69f0df843e1b417a3eaafa8408f9c8d Mon Sep 17 00:00:00 2001
From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com>
Date: Mon, 5 Feb 2024 18:20:09 +0800
Subject: [PATCH 3/6] Add files via upload

---
 README.md | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/README.md b/README.md
index 958e314..f7de2ea 100644
--- a/README.md
+++ b/README.md
@@ -197,8 +197,40 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
   - [ ] better sovits base model (enhanced audio quality)
   - [ ] model mix
 
+## (Optional) If you need, here will provide the command line operation mode, custom port and custom number of lines displayed per page
+Use the command line to open the WebUI for UVR5
+```
+python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
+```
+If you can't open a browser, follow the format below for UVR processing,This is using mdxnet for audio processing
+```
+python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision 
+```
+This is how the audio segmentation of the dataset is done using the command line
+```
+python audio_slicer.py \
+    --input_path "<path_to_original_audio_file_or_directory>" \
+    --output_root "<directory_where_subdivided_audio_clips_will_be_saved>" \
+    --threshold <volume_threshold> \
+    --min_length <minimum_duration_of_each_subclip> \
+    --min_interval <shortest_time_gap_between_adjacent_subclips> 
+    --hop_size <step_size_for_computing_volume_curve>
+```
+This is how dataset ASR processing is done using the command line(Only Chinese)
+```
+python tools/damo_asr/cmd-asr.py "<Path to the directory containing input audio files>"
+```
+ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
+
+(No progress bar, may be time dependent due to GPU performance)
+```
+python ./tools/damo_asr/WhisperASR.py -i <input> -o <output> -f <file_name.list> -l <language>
+```
+A custom list save path is enabled
 ## Credits
 
+
+
 Special thanks to the following projects and contributors:
 
 - [ar-vits](https://github.com/innnky/ar-vits)

From d55f63880b5a5c4864e31c02ff25b6cb5642f658 Mon Sep 17 00:00:00 2001
From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com>
Date: Mon, 5 Feb 2024 18:21:42 +0800
Subject: [PATCH 4/6] Add files via upload

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f7de2ea..7018218 100644
--- a/README.md
+++ b/README.md
@@ -197,7 +197,7 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
   - [ ] better sovits base model (enhanced audio quality)
   - [ ] model mix
 
-## (Optional) If you need, here will provide the command line operation mode, custom port and custom number of lines displayed per page
+## (Optional) If you need, here will provide the command line operation mode
 Use the command line to open the WebUI for UVR5
 ```
 python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>

From 9d2a136aab02f3ace34062ebaab59054551f3387 Mon Sep 17 00:00:00 2001
From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com>
Date: Mon, 5 Feb 2024 18:25:19 +0800
Subject: [PATCH 5/6] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7018218..4d1ddac 100644
--- a/README.md
+++ b/README.md
@@ -222,7 +222,7 @@ python tools/damo_asr/cmd-asr.py "<Path to the directory containing input audio
 ```
 ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
 
-(No progress bar, may be time dependent due to GPU performance)
+(Without progress bars, GPU performance may cause time delays)
 ```
 python ./tools/damo_asr/WhisperASR.py -i <input> -o <output> -f <file_name.list> -l <language>
 ```

From 4ad13a84551d0661f2236d85d66dd042565c6da0 Mon Sep 17 00:00:00 2001
From: Shadow <124238783+ShadowLoveElysia@users.noreply.github.com>
Date: Mon, 5 Feb 2024 18:26:06 +0800
Subject: [PATCH 6/6] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4d1ddac..c0eda03 100644
--- a/README.md
+++ b/README.md
@@ -222,7 +222,7 @@ python tools/damo_asr/cmd-asr.py "<Path to the directory containing input audio
 ```
 ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
 
-(Without progress bars, GPU performance may cause time delays)
+(No progress bars, GPU performance may cause time delays)
 ```
 python ./tools/damo_asr/WhisperASR.py -i <input> -o <output> -f <file_name.list> -l <language>
 ```