mirror of
https://github.com/THUDM/CogVideo.git
synced 2025-06-30 14:35:16 +08:00
- Add SFT (Supervised Fine-Tuning) trainers for all model variants: - CogVideoX I2V and T2V - CogVideoX-1.5 I2V and T2V - Add DeepSpeed ZeRO configuration files: - ZeRO-2 with and without CPU offload - ZeRO-3 with and without CPU offload - Add base accelerate config for distributed training - Update trainer.py to support SFT training mode This enables full-parameter fine-tuning with memory-efficient distributed training using DeepSpeed ZeRO optimization.
28 lines
678 B
YAML
28 lines
678 B
YAML
compute_environment: LOCAL_MACHINE
|
|
|
|
# gpu_ids: "0" # 0,1,2,3,4,5,6,7
|
|
# num_processes: 1
|
|
|
|
gpu_ids: all
|
|
num_processes: 8
|
|
|
|
debug: false
|
|
deepspeed_config:
|
|
deepspeed_config_file: /path/to/your/configs/zero2.yaml
|
|
# deepspeed_config_file: /path/to/your/configs/zero2_offload.yaml
|
|
# deepspeed_config_file: /path/to/your/configs/zero3.yaml
|
|
# deepspeed_config_file: /path/to/your/configs/zero3_offload.yaml
|
|
zero3_init_flag: false
|
|
distributed_type: DEEPSPEED
|
|
downcast_bf16: 'no'
|
|
enable_cpu_affinity: false
|
|
machine_rank: 0
|
|
main_training_function: main
|
|
num_machines: 1
|
|
rdzv_backend: static
|
|
same_network: true
|
|
tpu_env: []
|
|
tpu_use_cluster: false
|
|
tpu_use_sudo: false
|
|
use_cpu: false
|