diff --git a/README.md b/README.md index 5f562cc..a37458e 100644 --- a/README.md +++ b/README.md @@ -300,6 +300,8 @@ of the **CogVideoX** open-source model. cd inference # For Linux and Windows users python gradio_web_demo.py +# or run 5b with pipe.enable_sequential_cpu_offload() and pipe.vae.enable_tiling() +python3 inference/gradio_web_demo.py --very-low-vram-5b # For macOS with Apple Silicon users, Intel not supported, this maybe 20x slower than RTX 4090 PYTORCH_ENABLE_MPS_FALLBACK=1 python gradio_web_demo.py diff --git a/inference/gradio_web_demo.py b/inference/gradio_web_demo.py index 8204a8f..a9953b2 100644 --- a/inference/gradio_web_demo.py +++ b/inference/gradio_web_demo.py @@ -17,10 +17,18 @@ from diffusers.utils import export_to_video from datetime import datetime, timedelta from openai import OpenAI import moviepy.editor as mp +import sys + dtype = torch.float16 -device = "cuda" if torch.cuda.is_available() else "cpu" -pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device) + +if '--very-low-vram-5b' in sys.argv: + pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=dtype) + pipe.enable_sequential_cpu_offload() + pipe.vae.enable_tiling() +else: + device = "cuda" if torch.cuda.is_available() else "cpu" + pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device) os.makedirs("./output", exist_ok=True) os.makedirs("./gradio_tmp", exist_ok=True)