fix: remove copying first video frame as conditioning image

This commit is contained in:
OleehyO 2025-01-09 15:52:51 +00:00
parent 07766001f6
commit f6d722cec7

View File

@ -139,7 +139,6 @@ class BaseI2VDataset(Dataset):
logger.info(f"Saved prompt embedding to {prompt_embedding_path}", main_process_only=False)
if encoded_video_path.exists():
# encoded_video = torch.load(encoded_video_path, weights_only=True)
encoded_video = load_file(encoded_video_path)["encoded_video"]
logger.debug(f"Loaded encoded video from {encoded_video_path}", main_process_only=False)
# shape of image: [C, H, W]
@ -151,10 +150,6 @@ class BaseI2VDataset(Dataset):
# Current shape of frames: [F, C, H, W]
frames = self.video_transform(frames)
# Add image into the first frame.
# Note, **this operation maybe model-specific**, and maybe change in the future.
frames = torch.cat([image.unsqueeze(0), frames], dim=0)
# Convert to [B, C, F, H, W]
frames = frames.unsqueeze(0)
frames = frames.permute(0, 2, 1, 3, 4).contiguous()