From 70c899f4441ceee4b255cc79791daea0ead76095 Mon Sep 17 00:00:00 2001 From: OleehyO Date: Sun, 12 Jan 2025 08:50:15 +0000 Subject: [PATCH] chore: update default training configurations --- finetune/accelerate_config.yaml | 6 +++--- finetune/train_zero_i2v.sh | 4 ++-- finetune/train_zero_t2v.sh | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/finetune/accelerate_config.yaml b/finetune/accelerate_config.yaml index 5d99cf4..f1e2ca4 100644 --- a/finetune/accelerate_config.yaml +++ b/finetune/accelerate_config.yaml @@ -1,11 +1,11 @@ compute_environment: LOCAL_MACHINE -gpu_ids: "0,1,2,4" -num_processes: 4 # should be the same as the number of GPUs +gpu_ids: "0,1,2,3,4,5,6,7" +num_processes: 8 # should be the same as the number of GPUs debug: false deepspeed_config: - deepspeed_config_file: /absolute/path/to/your/deepspeed_config.yaml # e.g. /home/user/cogvideo/finetune/configs/zero2.yaml + deepspeed_config_file: /home/lhy/code/CogVideo/finetune/configs/zero2.yaml # e.g. /home/user/cogvideo/finetune/configs/zero2.yaml zero3_init_flag: false distributed_type: DEEPSPEED downcast_bf16: 'no' diff --git a/finetune/train_zero_i2v.sh b/finetune/train_zero_i2v.sh index 036761e..9b0a977 100644 --- a/finetune/train_zero_i2v.sh +++ b/finetune/train_zero_i2v.sh @@ -47,8 +47,8 @@ SYSTEM_ARGS=( # Checkpointing Configuration CHECKPOINT_ARGS=( - --checkpointing_steps 5 - --checkpointing_limit 10 + --checkpointing_steps 10 + --checkpointing_limit 2 --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line ) diff --git a/finetune/train_zero_t2v.sh b/finetune/train_zero_t2v.sh index 75fb1d6..cc56ca1 100644 --- a/finetune/train_zero_t2v.sh +++ b/finetune/train_zero_t2v.sh @@ -46,8 +46,8 @@ SYSTEM_ARGS=( # Checkpointing Configuration CHECKPOINT_ARGS=( - --checkpointing_steps 5 - --checkpointing_limit 10 + --checkpointing_steps 10 + --checkpointing_limit 2 --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line )