diff --git a/finetune/accelerate_train_i2v.sh b/finetune/accelerate_train_i2v.sh index 372d2c4..ec3922e 100644 --- a/finetune/accelerate_train_i2v.sh +++ b/finetune/accelerate_train_i2v.sh @@ -3,44 +3,67 @@ # Prevent tokenizer parallelism issues export TOKENIZERS_PARALLELISM=false -# Launch training with accelerate -accelerate launch train.py \ - ########## Model Configuration ########## - --model_path "THUDM/CogVideoX1.5-5B-I2V" \ - --model_name "cogvideox1.5-i2v" \ - --model_type "i2v" \ - --training_type "lora" \ - - ########## Output Configuration ########## - --output_dir "/path/to/output/dir" \ - --report_to "tensorboard" \ - - ########## Data Configuration ########## - --data_root "/path/to/data/dir" \ - --caption_column "prompt.txt" \ - --video_column "videos.txt" \ - --image_column "images.txt" \ - --train_resolution "48x768x1360" \ - - ########## Training Configuration ########## - --train_epochs 10 \ - --batch_size 1 \ - --gradient_accumulation_steps 1 \ - --mixed_precision "bf16" \ - --seed 42 \ - - ########## System Configuration ########## - --num_workers 8 \ - --pin_memory True \ - --nccl_timeout 1800 \ - - ########## Checkpointing Configuration ########## - --checkpointing_steps 200 \ - --checkpointing_limit 10 \ - - ########## Validation Configuration ########## - --do_validation False \ - --validation_dir "path/to/validation/dir" \ - --validation_steps 400 \ - --validation_prompts "prompts.txt" \ +# Model Configuration +MODEL_ARGS=( + --model_path "THUDM/CogVideoX1.5-5B-I2V" + --model_name "cogvideox1.5-i2v" + --model_type "i2v" + --training_type "lora" +) + +# Output Configuration +OUTPUT_ARGS=( + --output_dir "/path/to/output/dir" + --report_to "tensorboard" +) + +# Data Configuration +DATA_ARGS=( + --data_root "/path/to/data/dir" + --caption_column "prompt.txt" + --video_column "videos.txt" + --image_column "images.txt" + --train_resolution "80x768x1360" +) + +# Training Configuration +TRAIN_ARGS=( + --train_epochs 10 + --batch_size 1 + --gradient_accumulation_steps 1 + --mixed_precision "bf16" + --seed 42 +) + +# System Configuration +SYSTEM_ARGS=( + --num_workers 8 + --pin_memory True + --nccl_timeout 1800 +) + +# Checkpointing Configuration +CHECKPOINT_ARGS=( + --checkpointing_steps 200 + --checkpointing_limit 10 +) + +# Validation Configuration +VALIDATION_ARGS=( + --do_validation False + --validation_dir "/path/to/validation/dir" + --validation_steps 400 + --validation_prompts "prompts.txt" + --validation_images "images.txt" --gen_fps 15 +) + +# Combine all arguments and launch training +accelerate launch train.py \ + "${MODEL_ARGS[@]}" \ + "${OUTPUT_ARGS[@]}" \ + "${DATA_ARGS[@]}" \ + "${TRAIN_ARGS[@]}" \ + "${SYSTEM_ARGS[@]}" \ + "${CHECKPOINT_ARGS[@]}" \ + "${VALIDATION_ARGS[@]}" \ No newline at end of file diff --git a/finetune/accelerate_train_t2v.sh b/finetune/accelerate_train_t2v.sh index bdb0140..0d2b7f6 100644 --- a/finetune/accelerate_train_t2v.sh +++ b/finetune/accelerate_train_t2v.sh @@ -3,43 +3,65 @@ # Prevent tokenizer parallelism issues export TOKENIZERS_PARALLELISM=false -# Launch training with accelerate -accelerate launch train.py \ - ########## Model Configuration ########## - --model_path "THUDM/CogVideoX1.5-5B" \ - --model_name "cogvideox1.5-t2v" \ - --model_type "t2v" \ - --training_type "lora" \ - - ########## Output Configuration ########## - --output_dir "/path/to/output/dir" \ - --report_to "tensorboard" \ - - ########## Data Configuration ########## - --data_root "/path/to/data/dir" \ - --caption_column "prompt.txt" \ - --video_column "videos.txt" \ - --train_resolution "48x768x1360" \ - - ########## Training Configuration ########## - --train_epochs 10 \ - --batch_size 1 \ - --gradient_accumulation_steps 1 \ - --mixed_precision "bf16" \ - --seed 42 \ - - ########## System Configuration ########## - --num_workers 8 \ - --pin_memory True \ - --nccl_timeout 1800 \ - - ########## Checkpointing Configuration ########## - --checkpointing_steps 200 \ - --checkpointing_limit 10 \ - - ########## Validation Configuration ########## - --do_validation False \ - --validation_dir "path/to/validation/dir" \ - --validation_steps 400 \ - --validation_prompts "prompts.txt" \ +# Model Configuration +MODEL_ARGS=( + --model_path "THUDM/CogVideoX1.5-5B" + --model_name "cogvideox1.5-t2v" + --model_type "t2v" + --training_type "lora" +) + +# Output Configuration +OUTPUT_ARGS=( + --output_dir "/path/to/output/dir" + --report_to "tensorboard" +) + +# Data Configuration +DATA_ARGS=( + --data_root "/path/to/data/dir" + --caption_column "prompt.txt" + --video_column "videos.txt" + --train_resolution "80x768x1360" +) + +# Training Configuration +TRAIN_ARGS=( + --train_epochs 10 + --batch_size 1 + --gradient_accumulation_steps 1 + --mixed_precision "bf16" + --seed 42 +) + +# System Configuration +SYSTEM_ARGS=( + --num_workers 8 + --pin_memory True + --nccl_timeout 1800 +) + +# Checkpointing Configuration +CHECKPOINT_ARGS=( + --checkpointing_steps 200 + --checkpointing_limit 10 +) + +# Validation Configuration +VALIDATION_ARGS=( + --do_validation False + --validation_dir "/path/to/validation/dir" + --validation_steps 400 + --validation_prompts "prompts.txt" --gen_fps 15 +) + +# Combine all arguments and launch training +accelerate launch train.py \ + "${MODEL_ARGS[@]}" \ + "${OUTPUT_ARGS[@]}" \ + "${DATA_ARGS[@]}" \ + "${TRAIN_ARGS[@]}" \ + "${SYSTEM_ARGS[@]}" \ + "${CHECKPOINT_ARGS[@]}" \ + "${VALIDATION_ARGS[@]}" \ No newline at end of file