From 455b44a7b58775cf7b43d6468c83dc33965ddab2 Mon Sep 17 00:00:00 2001
From: OleehyO <leehy0357@gmail.com>
Date: Mon, 13 Jan 2025 11:53:13 +0000
Subject: [PATCH] chore: code cleanup and parameter optimization

- Remove redundant comments and debug information
- Adjust default parameters in training scripts
- Clean up code in lora_trainer and trainer implementations
---
 finetune/models/cogvideox_t2v/lora_trainer.py |  1 -
 finetune/train_zero_i2v.sh                    |  2 +-
 finetune/train_zero_t2v.sh                    |  2 +-
 finetune/trainer.py                           | 12 +-----------
 4 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/finetune/models/cogvideox_t2v/lora_trainer.py b/finetune/models/cogvideox_t2v/lora_trainer.py
index 6b38f1d..62582c8 100644
--- a/finetune/models/cogvideox_t2v/lora_trainer.py
+++ b/finetune/models/cogvideox_t2v/lora_trainer.py
@@ -197,7 +197,6 @@ class CogVideoXT2VLoraTrainer(Trainer):
             base_num_frames = num_frames
         else:
             base_num_frames = (num_frames + transformer_config.patch_size_t - 1) // transformer_config.patch_size_t
-        breakpoint()
         freqs_cos, freqs_sin = get_3d_rotary_pos_embed(
             embed_dim=transformer_config.attention_head_dim,
             crops_coords=None,
diff --git a/finetune/train_zero_i2v.sh b/finetune/train_zero_i2v.sh
index 9b0a977..2357a7e 100644
--- a/finetune/train_zero_i2v.sh
+++ b/finetune/train_zero_i2v.sh
@@ -49,7 +49,7 @@ SYSTEM_ARGS=(
 CHECKPOINT_ARGS=(
     --checkpointing_steps 10
     --checkpointing_limit 2
-    --resume_from_checkpoint "/absolute/path/to/checkpoint_dir"  # if you want to resume from a checkpoint, otherwise, comment this line
+    # --resume_from_checkpoint "/absolute/path/to/checkpoint_dir"  # if you want to resume from a checkpoint, otherwise, comment this line
 )
 
 # Validation Configuration
diff --git a/finetune/train_zero_t2v.sh b/finetune/train_zero_t2v.sh
index cc56ca1..80dbca3 100644
--- a/finetune/train_zero_t2v.sh
+++ b/finetune/train_zero_t2v.sh
@@ -48,7 +48,7 @@ SYSTEM_ARGS=(
 CHECKPOINT_ARGS=(
     --checkpointing_steps 10
     --checkpointing_limit 2
-    --resume_from_checkpoint "/absolute/path/to/checkpoint_dir"  # if you want to resume from a checkpoint, otherwise, comment this line
+    # --resume_from_checkpoint "/absolute/path/to/checkpoint_dir"  # if you want to resume from a checkpoint, otherwise, comment this line
 )
 
 # Validation Configuration
diff --git a/finetune/trainer.py b/finetune/trainer.py
index e5a4212..6e980af 100644
--- a/finetune/trainer.py
+++ b/finetune/trainer.py
@@ -758,16 +758,6 @@ class Trainer:
         self.accelerator.register_save_state_pre_hook(save_model_hook)
         self.accelerator.register_load_state_pre_hook(load_model_hook)
 
-    # def __maybe_save_checkpoint(self, global_step: int, must_save: bool = False):
-    #     if self.accelerator.distributed_type == DistributedType.DEEPSPEED or self.accelerator.is_main_process:
-    #         if must_save or global_step % self.args.checkpointing_steps == 0:
-    #             save_path = get_intermediate_ckpt_path(
-    #                 checkpointing_limit=self.args.checkpointing_limit,
-    #                 step=global_step,
-    #                 output_dir=self.args.output_dir,
-    #             )
-    #         self.accelerator.save_state(save_path, safe_serialization=True)
-
     def __maybe_save_checkpoint(self, global_step: int, must_save: bool = False):
         if self.accelerator.distributed_type == DistributedType.DEEPSPEED or self.accelerator.is_main_process:
             if must_save or global_step % self.args.checkpointing_steps == 0:
@@ -783,4 +773,4 @@ class Trainer:
                 pipe_save_path.mkdir(parents=True, exist_ok=True)
                 pipe.save_pretrained(pipe_save_path)
                 del pipe
-                torch.cuda.empty_cache()
+                free_memory()