Add an option to run gradio web demo with very low vram

2026-06-03 02:12:33 +08:00 · 2024-08-29 11:44:11 -03:00 · 2024-08-29 11:44:11 -03:00 · e70d90b0e6
commit e70d90b0e6
parent 3b4fa1d907
2 changed files with 12 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -300,6 +300,8 @@ of the **CogVideoX** open-source model.
 cd inference
 # For Linux and Windows users
 python gradio_web_demo.py
 # or run 5b with pipe.enable_sequential_cpu_offload() and  pipe.vae.enable_tiling()
 python3 inference/gradio_web_demo.py --very-low-vram-5b
 # For macOS with Apple Silicon users, Intel not supported, this maybe 20x slower than RTX 4090
 PYTORCH_ENABLE_MPS_FALLBACK=1 python gradio_web_demo.py
--- a/inference/gradio_web_demo.py
+++ b/inference/gradio_web_demo.py
@ -17,8 +17,16 @@ from diffusers.utils import export_to_video
 from datetime import datetime, timedelta
 from openai import OpenAI
 import moviepy.editor as mp
 import sys
 dtype = torch.float16
 if '--very-low-vram-5b' in sys.argv:
    pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=dtype)
    pipe.enable_sequential_cpu_offload()
    pipe.vae.enable_tiling()
 else:
    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device)