From 455b44a7b58775cf7b43d6468c83dc33965ddab2 Mon Sep 17 00:00:00 2001 From: OleehyO Date: Mon, 13 Jan 2025 11:53:13 +0000 Subject: [PATCH] chore: code cleanup and parameter optimization - Remove redundant comments and debug information - Adjust default parameters in training scripts - Clean up code in lora_trainer and trainer implementations --- finetune/models/cogvideox_t2v/lora_trainer.py | 1 - finetune/train_zero_i2v.sh | 2 +- finetune/train_zero_t2v.sh | 2 +- finetune/trainer.py | 12 +----------- 4 files changed, 3 insertions(+), 14 deletions(-) diff --git a/finetune/models/cogvideox_t2v/lora_trainer.py b/finetune/models/cogvideox_t2v/lora_trainer.py index 6b38f1d..62582c8 100644 --- a/finetune/models/cogvideox_t2v/lora_trainer.py +++ b/finetune/models/cogvideox_t2v/lora_trainer.py @@ -197,7 +197,6 @@ class CogVideoXT2VLoraTrainer(Trainer): base_num_frames = num_frames else: base_num_frames = (num_frames + transformer_config.patch_size_t - 1) // transformer_config.patch_size_t - breakpoint() freqs_cos, freqs_sin = get_3d_rotary_pos_embed( embed_dim=transformer_config.attention_head_dim, crops_coords=None, diff --git a/finetune/train_zero_i2v.sh b/finetune/train_zero_i2v.sh index 9b0a977..2357a7e 100644 --- a/finetune/train_zero_i2v.sh +++ b/finetune/train_zero_i2v.sh @@ -49,7 +49,7 @@ SYSTEM_ARGS=( CHECKPOINT_ARGS=( --checkpointing_steps 10 --checkpointing_limit 2 - --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line + # --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line ) # Validation Configuration diff --git a/finetune/train_zero_t2v.sh b/finetune/train_zero_t2v.sh index cc56ca1..80dbca3 100644 --- a/finetune/train_zero_t2v.sh +++ b/finetune/train_zero_t2v.sh @@ -48,7 +48,7 @@ SYSTEM_ARGS=( CHECKPOINT_ARGS=( --checkpointing_steps 10 --checkpointing_limit 2 - --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line + # --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line ) # Validation Configuration diff --git a/finetune/trainer.py b/finetune/trainer.py index e5a4212..6e980af 100644 --- a/finetune/trainer.py +++ b/finetune/trainer.py @@ -758,16 +758,6 @@ class Trainer: self.accelerator.register_save_state_pre_hook(save_model_hook) self.accelerator.register_load_state_pre_hook(load_model_hook) - # def __maybe_save_checkpoint(self, global_step: int, must_save: bool = False): - # if self.accelerator.distributed_type == DistributedType.DEEPSPEED or self.accelerator.is_main_process: - # if must_save or global_step % self.args.checkpointing_steps == 0: - # save_path = get_intermediate_ckpt_path( - # checkpointing_limit=self.args.checkpointing_limit, - # step=global_step, - # output_dir=self.args.output_dir, - # ) - # self.accelerator.save_state(save_path, safe_serialization=True) - def __maybe_save_checkpoint(self, global_step: int, must_save: bool = False): if self.accelerator.distributed_type == DistributedType.DEEPSPEED or self.accelerator.is_main_process: if must_save or global_step % self.args.checkpointing_steps == 0: @@ -783,4 +773,4 @@ class Trainer: pipe_save_path.mkdir(parents=True, exist_ok=True) pipe.save_pretrained(pipe_save_path) del pipe - torch.cuda.empty_cache() + free_memory()