Reorganize training script arguments

This commit is contained in:
OleehyO 2025-01-01 15:52:39 +00:00
parent 6ef15dd2a5
commit 48ad178818
2 changed files with 124 additions and 79 deletions

View File

@ -3,44 +3,67 @@
# Prevent tokenizer parallelism issues # Prevent tokenizer parallelism issues
export TOKENIZERS_PARALLELISM=false export TOKENIZERS_PARALLELISM=false
# Launch training with accelerate # Model Configuration
accelerate launch train.py \ MODEL_ARGS=(
########## Model Configuration ########## --model_path "THUDM/CogVideoX1.5-5B-I2V"
--model_path "THUDM/CogVideoX1.5-5B-I2V" \ --model_name "cogvideox1.5-i2v"
--model_name "cogvideox1.5-i2v" \ --model_type "i2v"
--model_type "i2v" \ --training_type "lora"
--training_type "lora" \ )
########## Output Configuration ########## # Output Configuration
--output_dir "/path/to/output/dir" \ OUTPUT_ARGS=(
--report_to "tensorboard" \ --output_dir "/path/to/output/dir"
--report_to "tensorboard"
########## Data Configuration ########## )
--data_root "/path/to/data/dir" \
--caption_column "prompt.txt" \ # Data Configuration
--video_column "videos.txt" \ DATA_ARGS=(
--image_column "images.txt" \ --data_root "/path/to/data/dir"
--train_resolution "48x768x1360" \ --caption_column "prompt.txt"
--video_column "videos.txt"
########## Training Configuration ########## --image_column "images.txt"
--train_epochs 10 \ --train_resolution "80x768x1360"
--batch_size 1 \ )
--gradient_accumulation_steps 1 \
--mixed_precision "bf16" \ # Training Configuration
--seed 42 \ TRAIN_ARGS=(
--train_epochs 10
########## System Configuration ########## --batch_size 1
--num_workers 8 \ --gradient_accumulation_steps 1
--pin_memory True \ --mixed_precision "bf16"
--nccl_timeout 1800 \ --seed 42
)
########## Checkpointing Configuration ##########
--checkpointing_steps 200 \ # System Configuration
--checkpointing_limit 10 \ SYSTEM_ARGS=(
--num_workers 8
########## Validation Configuration ########## --pin_memory True
--do_validation False \ --nccl_timeout 1800
--validation_dir "path/to/validation/dir" \ )
--validation_steps 400 \
--validation_prompts "prompts.txt" \ # Checkpointing Configuration
CHECKPOINT_ARGS=(
--checkpointing_steps 200
--checkpointing_limit 10
)
# Validation Configuration
VALIDATION_ARGS=(
--do_validation False
--validation_dir "/path/to/validation/dir"
--validation_steps 400
--validation_prompts "prompts.txt"
--validation_images "images.txt"
--gen_fps 15 --gen_fps 15
)
# Combine all arguments and launch training
accelerate launch train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
"${TRAIN_ARGS[@]}" \
"${SYSTEM_ARGS[@]}" \
"${CHECKPOINT_ARGS[@]}" \
"${VALIDATION_ARGS[@]}"

View File

@ -3,43 +3,65 @@
# Prevent tokenizer parallelism issues # Prevent tokenizer parallelism issues
export TOKENIZERS_PARALLELISM=false export TOKENIZERS_PARALLELISM=false
# Launch training with accelerate # Model Configuration
accelerate launch train.py \ MODEL_ARGS=(
########## Model Configuration ########## --model_path "THUDM/CogVideoX1.5-5B"
--model_path "THUDM/CogVideoX1.5-5B" \ --model_name "cogvideox1.5-t2v"
--model_name "cogvideox1.5-t2v" \ --model_type "t2v"
--model_type "t2v" \ --training_type "lora"
--training_type "lora" \ )
########## Output Configuration ########## # Output Configuration
--output_dir "/path/to/output/dir" \ OUTPUT_ARGS=(
--report_to "tensorboard" \ --output_dir "/path/to/output/dir"
--report_to "tensorboard"
########## Data Configuration ########## )
--data_root "/path/to/data/dir" \
--caption_column "prompt.txt" \ # Data Configuration
--video_column "videos.txt" \ DATA_ARGS=(
--train_resolution "48x768x1360" \ --data_root "/path/to/data/dir"
--caption_column "prompt.txt"
########## Training Configuration ########## --video_column "videos.txt"
--train_epochs 10 \ --train_resolution "80x768x1360"
--batch_size 1 \ )
--gradient_accumulation_steps 1 \
--mixed_precision "bf16" \ # Training Configuration
--seed 42 \ TRAIN_ARGS=(
--train_epochs 10
########## System Configuration ########## --batch_size 1
--num_workers 8 \ --gradient_accumulation_steps 1
--pin_memory True \ --mixed_precision "bf16"
--nccl_timeout 1800 \ --seed 42
)
########## Checkpointing Configuration ##########
--checkpointing_steps 200 \ # System Configuration
--checkpointing_limit 10 \ SYSTEM_ARGS=(
--num_workers 8
########## Validation Configuration ########## --pin_memory True
--do_validation False \ --nccl_timeout 1800
--validation_dir "path/to/validation/dir" \ )
--validation_steps 400 \
--validation_prompts "prompts.txt" \ # Checkpointing Configuration
CHECKPOINT_ARGS=(
--checkpointing_steps 200
--checkpointing_limit 10
)
# Validation Configuration
VALIDATION_ARGS=(
--do_validation False
--validation_dir "/path/to/validation/dir"
--validation_steps 400
--validation_prompts "prompts.txt"
--gen_fps 15 --gen_fps 15
)
# Combine all arguments and launch training
accelerate launch train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
"${TRAIN_ARGS[@]}" \
"${SYSTEM_ARGS[@]}" \
"${CHECKPOINT_ARGS[@]}" \
"${VALIDATION_ARGS[@]}"