diff --git a/finetune/models/cogvideox_i2v/lora_trainer.py b/finetune/models/cogvideox_i2v/lora_trainer.py
index 793cf76..f830c08 100644
--- a/finetune/models/cogvideox_i2v/lora_trainer.py
+++ b/finetune/models/cogvideox_i2v/lora_trainer.py
@@ -115,7 +115,7 @@ class CogVideoXI2VLoraTrainer(Trainer):
 
         patch_size_t = self.state.transformer_config.patch_size_t
         if patch_size_t is not None:
-            ncopy = latent.shape[2] % patch_size_t
+            ncopy = (patch_size_t - latent.shape[2] % patch_size_t) % patch_size_t
             # Copy the first frame ncopy times to match patch_size_t
             first_frame = latent[:, :, :1, :, :]  # Get first frame [B, C, 1, H, W]
             latent = torch.cat([first_frame.repeat(1, 1, ncopy, 1, 1), latent], dim=2)
diff --git a/finetune/models/cogvideox_t2v/lora_trainer.py b/finetune/models/cogvideox_t2v/lora_trainer.py
index 5f0ec1c..410d4de 100644
--- a/finetune/models/cogvideox_t2v/lora_trainer.py
+++ b/finetune/models/cogvideox_t2v/lora_trainer.py
@@ -109,7 +109,7 @@ class CogVideoXT2VLoraTrainer(Trainer):
 
         patch_size_t = self.state.transformer_config.patch_size_t
         if patch_size_t is not None:
-            ncopy = latent.shape[2] % patch_size_t
+            ncopy = (patch_size_t - latent.shape[2] % patch_size_t) % patch_size_t
             # Copy the first frame ncopy times to match patch_size_t
             first_frame = latent[:, :, :1, :, :]  # Get first frame [B, C, 1, H, W]
             latent = torch.cat([first_frame.repeat(1, 1, ncopy, 1, 1), latent], dim=2)