diff --git a/GPT_SoVITS/AR/data/dataset.py b/GPT_SoVITS/AR/data/dataset.py index 47adacc..b1ea69e 100644 --- a/GPT_SoVITS/AR/data/dataset.py +++ b/GPT_SoVITS/AR/data/dataset.py @@ -116,6 +116,7 @@ class Text2SemanticDataset(Dataset): phoneme_data_len = len(self.phoneme_data.keys()) print("semantic_data_len:", semantic_data_len) print("phoneme_data_len:", phoneme_data_len) + print(self.semantic_data) idx = 0 num_not_in = 0 num_deleted_bigger = 0 @@ -123,7 +124,7 @@ class Text2SemanticDataset(Dataset): for i in range(semantic_data_len): # 先依次遍历 # get str - item_name = self.semantic_data["item_name"][i] + item_name = self.semantic_data.iloc[i,0] # print(self.phoneme_data) try: phoneme, word2ph, text = self.phoneme_data[item_name] @@ -133,7 +134,7 @@ class Text2SemanticDataset(Dataset): num_not_in += 1 continue - semantic_str = self.semantic_data["semantic_audio"][i] + semantic_str = self.semantic_data.iloc[i,1] # get token list semantic_ids = [int(idx) for idx in semantic_str.split(" ")] # (T), 是否需要变成 (1, T) -> 不需要,因为需要求 len