Update dataset.py

pandas csv file doesn't have keys called "item_name", "sematic_text",update a method "iloc". which is more accurate.
This commit is contained in:
spicysama 2024-01-17 19:43:32 +08:00 committed by GitHub
parent 90f940ed91
commit cc632b985d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -116,6 +116,7 @@ class Text2SemanticDataset(Dataset):
phoneme_data_len = len(self.phoneme_data.keys())
print("semantic_data_len:", semantic_data_len)
print("phoneme_data_len:", phoneme_data_len)
print(self.semantic_data)
idx = 0
num_not_in = 0
num_deleted_bigger = 0
@ -123,7 +124,7 @@ class Text2SemanticDataset(Dataset):
for i in range(semantic_data_len):
# 先依次遍历
# get str
item_name = self.semantic_data["item_name"][i]
item_name = self.semantic_data.iloc[i,0]
# print(self.phoneme_data)
try:
phoneme, word2ph, text = self.phoneme_data[item_name]
@ -133,7 +134,7 @@ class Text2SemanticDataset(Dataset):
num_not_in += 1
continue
semantic_str = self.semantic_data["semantic_audio"][i]
semantic_str = self.semantic_data.iloc[i,1]
# get token list
semantic_ids = [int(idx) for idx in semantic_str.split(" ")]
# (T), 是否需要变成 (1, T) -> 不需要,因为需要求 len