add 10 second comment

This commit is contained in:
zR 2024-11-08 22:31:39 +08:00
parent e43a7645fd
commit d8ee013842
3 changed files with 8 additions and 11 deletions

View File

@ -23,7 +23,7 @@ model:
params:
time_embed_dim: 512
elementwise_affine: True
num_frames: 81
num_frames: 81 # for 5 seconds and 161 for 10 seconds
time_compressed_rate: 4
latent_width: 300
latent_height: 300

View File

@ -25,11 +25,10 @@ model:
network_config:
target: dit_video_concat.DiffusionTransformer
params:
# space_interpolation: 1.875
ofs_embed_dim: 512
time_embed_dim: 512
elementwise_affine: True
num_frames: 81
num_frames: 81 # for 5 seconds and 161 for 10 seconds
time_compressed_rate: 4
latent_width: 300
latent_height: 300

View File

@ -1,16 +1,14 @@
args:
image2video: False # True for image2video, False for text2video
# image2video: True # True for image2video, False for text2video
latent_channels: 16
mode: inference
load: "{your CogVideoX SAT folder}/transformer" # This is for Full model without lora adapter
# load: "{your lora folder} such as zRzRzRzRzRzRzR/lora-disney-08-20-13-28" # This is for Full model without lora adapter
batch_size: 1
input_type: txt
input_file: configs/test.txt
sampling_image_size: [480, 720]
sampling_num_frames: 13 # Must be 13, 11 or 9
sampling_fps: 8
# fp16: True # For CogVideoX-2B
bf16: True # For CogVideoX-5B and CoGVideoX-5B-I2V
output_dir: outputs/
sampling_image_size: [768, 1360] # remove this for I2V
sampling_num_frames: 22 # 42 for 10 seconds and 22 for 5 seconds
sampling_fps: 16
bf16: True
output_dir: outputs
force_inference: True