Add process_dataset.py

Support third-party dataset import.
2026-06-05 05:48:14 +08:00 · 2024-01-22 01:09:10 +08:00 · 2024-01-22 01:09:10 +08:00 · 134098cc4b
commit 134098cc4b
parent 18f98f2ca5
1 changed files with 25 additions and 0 deletions
--- a/GPT_SoVITS/process_dataset.py
+++ b/GPT_SoVITS/process_dataset.py
@ -0,0 +1,25 @@
 import os
 # Supporting third-party datasets with the format where each audio file corresponds to a text file.
 # For example, voice01.wav -> voice01.txt.
 def convert_dataset(input_folder, output_file, language='zh'):
    audio_files = []
    for f in os.listdir(input_folder):
        if f.endswith('.wav'):
            audio_files.append(f)
    with open(output_file, 'w', encoding='utf-8') as output:
        for audio_file in audio_files:
            audio_path = os.path.join(input_folder, audio_file)
            text_file = os.path.join(input_folder, audio_file.replace('.wav', '.txt'))
            with open(text_file, 'r', encoding='utf-8') as text_content:
                text = text_content.read().replace('\n', '')
            speaker_name = os.path.splitext(audio_file)[0]
            output_line = f'{audio_path}|{speaker_name}|{language}|{text}\n'
            output.write(output_line)