feat: add training launch scripts for I2V and T2V models

Add two shell scripts to simplify model training:
- accelerate_train_i2v.sh: Launch script for Image-to-Video training
- accelerate_train_t2v.sh: Launch script for Text-to-Video training

Both scripts provide comprehensive configurations for:
- Model settings
- Data pipeline
- Training parameters
- System resources
- Checkpointing
- Validation
This commit is contained in:
OleehyO 2025-01-01 14:53:45 +00:00
parent 26b87cd4ff
commit 6e79472417
2 changed files with 91 additions and 0 deletions

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Prevent tokenizer parallelism issues
export TOKENIZERS_PARALLELISM=false
# Launch training with accelerate
accelerate launch train.py \
########## Model Configuration ##########
--model_path "THUDM/CogVideoX1.5-5B-I2V" \
--model_name "cogvideox1.5-i2v" \
--model_type "i2v" \
--training_type "lora" \
########## Output Configuration ##########
--output_dir "/path/to/output/dir" \
--report_to "tensorboard" \
########## Data Configuration ##########
--data_root "/path/to/data/dir" \
--caption_column "prompt.txt" \
--video_column "videos.txt" \
--image_column "images.txt" \
--train_resolution "48x768x1360" \
########## Training Configuration ##########
--train_epochs 10 \
--batch_size 1 \
--gradient_accumulation_steps 1 \
--mixed_precision "bf16" \
--seed 42 \
########## System Configuration ##########
--num_workers 8 \
--pin_memory True \
--nccl_timeout 1800 \
########## Checkpointing Configuration ##########
--checkpointing_steps 200 \
--checkpointing_limit 10 \
########## Validation Configuration ##########
--do_validation False \
--validation_dir "path/to/validation/dir" \
--validation_steps 400 \
--validation_prompts "prompts.txt" \
--gen_fps 15

View File

@ -0,0 +1,45 @@
#!/usr/bin/env bash
# Prevent tokenizer parallelism issues
export TOKENIZERS_PARALLELISM=false
# Launch training with accelerate
accelerate launch train.py \
########## Model Configuration ##########
--model_path "THUDM/CogVideoX1.5-5B" \
--model_name "cogvideox1.5-t2v" \
--model_type "t2v" \
--training_type "lora" \
########## Output Configuration ##########
--output_dir "/path/to/output/dir" \
--report_to "tensorboard" \
########## Data Configuration ##########
--data_root "/path/to/data/dir" \
--caption_column "prompt.txt" \
--video_column "videos.txt" \
--train_resolution "48x768x1360" \
########## Training Configuration ##########
--train_epochs 10 \
--batch_size 1 \
--gradient_accumulation_steps 1 \
--mixed_precision "bf16" \
--seed 42 \
########## System Configuration ##########
--num_workers 8 \
--pin_memory True \
--nccl_timeout 1800 \
########## Checkpointing Configuration ##########
--checkpointing_steps 200 \
--checkpointing_limit 10 \
########## Validation Configuration ##########
--do_validation False \
--validation_dir "path/to/validation/dir" \
--validation_steps 400 \
--validation_prompts "prompts.txt" \
--gen_fps 15