mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-06 03:57:44 +08:00
Update dataset.py
pandas csv file doesn't have keys called "item_name", "sematic_text",update a method "iloc". which is more accurate.
This commit is contained in:
parent
90f940ed91
commit
cc632b985d
@ -116,6 +116,7 @@ class Text2SemanticDataset(Dataset):
|
||||
phoneme_data_len = len(self.phoneme_data.keys())
|
||||
print("semantic_data_len:", semantic_data_len)
|
||||
print("phoneme_data_len:", phoneme_data_len)
|
||||
print(self.semantic_data)
|
||||
idx = 0
|
||||
num_not_in = 0
|
||||
num_deleted_bigger = 0
|
||||
@ -123,7 +124,7 @@ class Text2SemanticDataset(Dataset):
|
||||
for i in range(semantic_data_len):
|
||||
# 先依次遍历
|
||||
# get str
|
||||
item_name = self.semantic_data["item_name"][i]
|
||||
item_name = self.semantic_data.iloc[i,0]
|
||||
# print(self.phoneme_data)
|
||||
try:
|
||||
phoneme, word2ph, text = self.phoneme_data[item_name]
|
||||
@ -133,7 +134,7 @@ class Text2SemanticDataset(Dataset):
|
||||
num_not_in += 1
|
||||
continue
|
||||
|
||||
semantic_str = self.semantic_data["semantic_audio"][i]
|
||||
semantic_str = self.semantic_data.iloc[i,1]
|
||||
# get token list
|
||||
semantic_ids = [int(idx) for idx in semantic_str.split(" ")]
|
||||
# (T), 是否需要变成 (1, T) -> 不需要,因为需要求 len
|
||||
|
Loading…
x
Reference in New Issue
Block a user