From f6d722cec7a94fcae036549130918beb37fe4330 Mon Sep 17 00:00:00 2001 From: OleehyO Date: Thu, 9 Jan 2025 15:52:51 +0000 Subject: [PATCH] fix: remove copying first video frame as conditioning image --- finetune/datasets/i2v_dataset.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/finetune/datasets/i2v_dataset.py b/finetune/datasets/i2v_dataset.py index b26bb7f..bde0caa 100644 --- a/finetune/datasets/i2v_dataset.py +++ b/finetune/datasets/i2v_dataset.py @@ -139,7 +139,6 @@ class BaseI2VDataset(Dataset): logger.info(f"Saved prompt embedding to {prompt_embedding_path}", main_process_only=False) if encoded_video_path.exists(): - # encoded_video = torch.load(encoded_video_path, weights_only=True) encoded_video = load_file(encoded_video_path)["encoded_video"] logger.debug(f"Loaded encoded video from {encoded_video_path}", main_process_only=False) # shape of image: [C, H, W] @@ -151,10 +150,6 @@ class BaseI2VDataset(Dataset): # Current shape of frames: [F, C, H, W] frames = self.video_transform(frames) - # Add image into the first frame. - # Note, **this operation maybe model-specific**, and maybe change in the future. - frames = torch.cat([image.unsqueeze(0), frames], dim=0) - # Convert to [B, C, F, H, W] frames = frames.unsqueeze(0) frames = frames.permute(0, 2, 1, 3, 4).contiguous()