From 134098cc4babc9af2f428fb138c8b010e9601130 Mon Sep 17 00:00:00 2001 From: Yuan-Man <68322456+Yuan-ManX@users.noreply.github.com> Date: Mon, 22 Jan 2024 01:09:10 +0800 Subject: [PATCH] Add process_dataset.py Support third-party dataset import. --- GPT_SoVITS/process_dataset.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 GPT_SoVITS/process_dataset.py diff --git a/GPT_SoVITS/process_dataset.py b/GPT_SoVITS/process_dataset.py new file mode 100644 index 0000000..cb5856a --- /dev/null +++ b/GPT_SoVITS/process_dataset.py @@ -0,0 +1,25 @@ +import os + + +# Supporting third-party datasets with the format where each audio file corresponds to a text file. +# For example, voice01.wav -> voice01.txt. +def convert_dataset(input_folder, output_file, language='zh'): + audio_files = [] + for f in os.listdir(input_folder): + if f.endswith('.wav'): + audio_files.append(f) + + with open(output_file, 'w', encoding='utf-8') as output: + + for audio_file in audio_files: + audio_path = os.path.join(input_folder, audio_file) + text_file = os.path.join(input_folder, audio_file.replace('.wav', '.txt')) + + with open(text_file, 'r', encoding='utf-8') as text_content: + text = text_content.read().replace('\n', '') + + speaker_name = os.path.splitext(audio_file)[0] + + output_line = f'{audio_path}|{speaker_name}|{language}|{text}\n' + output.write(output_line) +