Reorganize training script arguments

This commit is contained in:
OleehyO 2025-01-01 15:52:39 +00:00
parent 6ef15dd2a5
commit 48ad178818
2 changed files with 124 additions and 79 deletions

View File

@ -3,44 +3,67 @@
# Prevent tokenizer parallelism issues
export TOKENIZERS_PARALLELISM=false
# Launch training with accelerate
accelerate launch train.py \
########## Model Configuration ##########
--model_path "THUDM/CogVideoX1.5-5B-I2V" \
--model_name "cogvideox1.5-i2v" \
--model_type "i2v" \
--training_type "lora" \
########## Output Configuration ##########
--output_dir "/path/to/output/dir" \
--report_to "tensorboard" \
########## Data Configuration ##########
--data_root "/path/to/data/dir" \
--caption_column "prompt.txt" \
--video_column "videos.txt" \
--image_column "images.txt" \
--train_resolution "48x768x1360" \
########## Training Configuration ##########
--train_epochs 10 \
--batch_size 1 \
--gradient_accumulation_steps 1 \
--mixed_precision "bf16" \
--seed 42 \
########## System Configuration ##########
--num_workers 8 \
--pin_memory True \
--nccl_timeout 1800 \
########## Checkpointing Configuration ##########
--checkpointing_steps 200 \
--checkpointing_limit 10 \
########## Validation Configuration ##########
--do_validation False \
--validation_dir "path/to/validation/dir" \
--validation_steps 400 \
--validation_prompts "prompts.txt" \
# Model Configuration
MODEL_ARGS=(
--model_path "THUDM/CogVideoX1.5-5B-I2V"
--model_name "cogvideox1.5-i2v"
--model_type "i2v"
--training_type "lora"
)
# Output Configuration
OUTPUT_ARGS=(
--output_dir "/path/to/output/dir"
--report_to "tensorboard"
)
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data/dir"
--caption_column "prompt.txt"
--video_column "videos.txt"
--image_column "images.txt"
--train_resolution "80x768x1360"
)
# Training Configuration
TRAIN_ARGS=(
--train_epochs 10
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16"
--seed 42
)
# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--nccl_timeout 1800
)
# Checkpointing Configuration
CHECKPOINT_ARGS=(
--checkpointing_steps 200
--checkpointing_limit 10
)
# Validation Configuration
VALIDATION_ARGS=(
--do_validation False
--validation_dir "/path/to/validation/dir"
--validation_steps 400
--validation_prompts "prompts.txt"
--validation_images "images.txt"
--gen_fps 15
)
# Combine all arguments and launch training
accelerate launch train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
"${TRAIN_ARGS[@]}" \
"${SYSTEM_ARGS[@]}" \
"${CHECKPOINT_ARGS[@]}" \
"${VALIDATION_ARGS[@]}"

View File

@ -3,43 +3,65 @@
# Prevent tokenizer parallelism issues
export TOKENIZERS_PARALLELISM=false
# Launch training with accelerate
accelerate launch train.py \
########## Model Configuration ##########
--model_path "THUDM/CogVideoX1.5-5B" \
--model_name "cogvideox1.5-t2v" \
--model_type "t2v" \
--training_type "lora" \
########## Output Configuration ##########
--output_dir "/path/to/output/dir" \
--report_to "tensorboard" \
########## Data Configuration ##########
--data_root "/path/to/data/dir" \
--caption_column "prompt.txt" \
--video_column "videos.txt" \
--train_resolution "48x768x1360" \
########## Training Configuration ##########
--train_epochs 10 \
--batch_size 1 \
--gradient_accumulation_steps 1 \
--mixed_precision "bf16" \
--seed 42 \
########## System Configuration ##########
--num_workers 8 \
--pin_memory True \
--nccl_timeout 1800 \
########## Checkpointing Configuration ##########
--checkpointing_steps 200 \
--checkpointing_limit 10 \
########## Validation Configuration ##########
--do_validation False \
--validation_dir "path/to/validation/dir" \
--validation_steps 400 \
--validation_prompts "prompts.txt" \
# Model Configuration
MODEL_ARGS=(
--model_path "THUDM/CogVideoX1.5-5B"
--model_name "cogvideox1.5-t2v"
--model_type "t2v"
--training_type "lora"
)
# Output Configuration
OUTPUT_ARGS=(
--output_dir "/path/to/output/dir"
--report_to "tensorboard"
)
# Data Configuration
DATA_ARGS=(
--data_root "/path/to/data/dir"
--caption_column "prompt.txt"
--video_column "videos.txt"
--train_resolution "80x768x1360"
)
# Training Configuration
TRAIN_ARGS=(
--train_epochs 10
--batch_size 1
--gradient_accumulation_steps 1
--mixed_precision "bf16"
--seed 42
)
# System Configuration
SYSTEM_ARGS=(
--num_workers 8
--pin_memory True
--nccl_timeout 1800
)
# Checkpointing Configuration
CHECKPOINT_ARGS=(
--checkpointing_steps 200
--checkpointing_limit 10
)
# Validation Configuration
VALIDATION_ARGS=(
--do_validation False
--validation_dir "/path/to/validation/dir"
--validation_steps 400
--validation_prompts "prompts.txt"
--gen_fps 15
)
# Combine all arguments and launch training
accelerate launch train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
"${TRAIN_ARGS[@]}" \
"${SYSTEM_ARGS[@]}" \
"${CHECKPOINT_ARGS[@]}" \
"${VALIDATION_ARGS[@]}"