mirror of
https://github.com/THUDM/CogVideo.git
synced 2025-09-26 17:30:05 +08:00
chore: update default training configurations
This commit is contained in:
parent
b362663679
commit
70c899f444
@ -1,11 +1,11 @@
|
|||||||
compute_environment: LOCAL_MACHINE
|
compute_environment: LOCAL_MACHINE
|
||||||
|
|
||||||
gpu_ids: "0,1,2,4"
|
gpu_ids: "0,1,2,3,4,5,6,7"
|
||||||
num_processes: 4 # should be the same as the number of GPUs
|
num_processes: 8 # should be the same as the number of GPUs
|
||||||
|
|
||||||
debug: false
|
debug: false
|
||||||
deepspeed_config:
|
deepspeed_config:
|
||||||
deepspeed_config_file: /absolute/path/to/your/deepspeed_config.yaml # e.g. /home/user/cogvideo/finetune/configs/zero2.yaml
|
deepspeed_config_file: /home/lhy/code/CogVideo/finetune/configs/zero2.yaml # e.g. /home/user/cogvideo/finetune/configs/zero2.yaml
|
||||||
zero3_init_flag: false
|
zero3_init_flag: false
|
||||||
distributed_type: DEEPSPEED
|
distributed_type: DEEPSPEED
|
||||||
downcast_bf16: 'no'
|
downcast_bf16: 'no'
|
||||||
|
@ -47,8 +47,8 @@ SYSTEM_ARGS=(
|
|||||||
|
|
||||||
# Checkpointing Configuration
|
# Checkpointing Configuration
|
||||||
CHECKPOINT_ARGS=(
|
CHECKPOINT_ARGS=(
|
||||||
--checkpointing_steps 5
|
--checkpointing_steps 10
|
||||||
--checkpointing_limit 10
|
--checkpointing_limit 2
|
||||||
--resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line
|
--resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -46,8 +46,8 @@ SYSTEM_ARGS=(
|
|||||||
|
|
||||||
# Checkpointing Configuration
|
# Checkpointing Configuration
|
||||||
CHECKPOINT_ARGS=(
|
CHECKPOINT_ARGS=(
|
||||||
--checkpointing_steps 5
|
--checkpointing_steps 10
|
||||||
--checkpointing_limit 10
|
--checkpointing_limit 2
|
||||||
--resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line
|
--resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint, otherwise, comment this line
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user