From cc632b985d96cee9ccf844e1919b36759173e1a3 Mon Sep 17 00:00:00 2001 From: spicysama <122108331+AnyaCoder@users.noreply.github.com> Date: Wed, 17 Jan 2024 19:43:32 +0800 Subject: [PATCH] Update dataset.py pandas csv file doesn't have keys called "item_name", "sematic_text",update a method "iloc". which is more accurate. --- GPT_SoVITS/AR/data/dataset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPT_SoVITS/AR/data/dataset.py b/GPT_SoVITS/AR/data/dataset.py index 47adacc..b1ea69e 100644 --- a/GPT_SoVITS/AR/data/dataset.py +++ b/GPT_SoVITS/AR/data/dataset.py @@ -116,6 +116,7 @@ class Text2SemanticDataset(Dataset): phoneme_data_len = len(self.phoneme_data.keys()) print("semantic_data_len:", semantic_data_len) print("phoneme_data_len:", phoneme_data_len) + print(self.semantic_data) idx = 0 num_not_in = 0 num_deleted_bigger = 0 @@ -123,7 +124,7 @@ class Text2SemanticDataset(Dataset): for i in range(semantic_data_len): # 先依次遍历 # get str - item_name = self.semantic_data["item_name"][i] + item_name = self.semantic_data.iloc[i,0] # print(self.phoneme_data) try: phoneme, word2ph, text = self.phoneme_data[item_name] @@ -133,7 +134,7 @@ class Text2SemanticDataset(Dataset): num_not_in += 1 continue - semantic_str = self.semantic_data["semantic_audio"][i] + semantic_str = self.semantic_data.iloc[i,1] # get token list semantic_ids = [int(idx) for idx in semantic_str.split(" ")] # (T), 是否需要变成 (1, T) -> 不需要,因为需要求 len