From 6c740579050efa68c17e7cdf59e33f1126c3d6fb Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Sat, 7 Feb 2026 05:15:41 +0530 Subject: [PATCH] Fix patch_size_t padding calculation for frame count alignment --- finetune/models/cogvideox_t2v/lora_trainer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/finetune/models/cogvideox_t2v/lora_trainer.py b/finetune/models/cogvideox_t2v/lora_trainer.py index 5f0ec1c..895e7cd 100644 --- a/finetune/models/cogvideox_t2v/lora_trainer.py +++ b/finetune/models/cogvideox_t2v/lora_trainer.py @@ -109,10 +109,12 @@ class CogVideoXT2VLoraTrainer(Trainer): patch_size_t = self.state.transformer_config.patch_size_t if patch_size_t is not None: - ncopy = latent.shape[2] % patch_size_t + remainder = latent.shape[2] % patch_size_t + ncopy = (patch_size_t - remainder) % patch_size_t # Copy the first frame ncopy times to match patch_size_t - first_frame = latent[:, :, :1, :, :] # Get first frame [B, C, 1, H, W] - latent = torch.cat([first_frame.repeat(1, 1, ncopy, 1, 1), latent], dim=2) + if ncopy > 0: + first_frame = latent[:, :, :1, :, :] # Get first frame [B, C, 1, H, W] + latent = torch.cat([first_frame.repeat(1, 1, ncopy, 1, 1), latent], dim=2) assert latent.shape[2] % patch_size_t == 0 batch_size, num_channels, num_frames, height, width = latent.shape