Add an option to run gradio web demo with very low vram

This commit is contained in:
Rodrigo Antonio de Araujo 2024-08-29 11:44:11 -03:00
parent 3b4fa1d907
commit e70d90b0e6
2 changed files with 12 additions and 2 deletions

View File

@ -300,6 +300,8 @@ of the **CogVideoX** open-source model.
cd inference cd inference
# For Linux and Windows users # For Linux and Windows users
python gradio_web_demo.py python gradio_web_demo.py
# or run 5b with pipe.enable_sequential_cpu_offload() and pipe.vae.enable_tiling()
python3 inference/gradio_web_demo.py --very-low-vram-5b
# For macOS with Apple Silicon users, Intel not supported, this maybe 20x slower than RTX 4090 # For macOS with Apple Silicon users, Intel not supported, this maybe 20x slower than RTX 4090
PYTORCH_ENABLE_MPS_FALLBACK=1 python gradio_web_demo.py PYTORCH_ENABLE_MPS_FALLBACK=1 python gradio_web_demo.py

View File

@ -17,8 +17,16 @@ from diffusers.utils import export_to_video
from datetime import datetime, timedelta from datetime import datetime, timedelta
from openai import OpenAI from openai import OpenAI
import moviepy.editor as mp import moviepy.editor as mp
import sys
dtype = torch.float16 dtype = torch.float16
if '--very-low-vram-5b' in sys.argv:
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=dtype)
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_tiling()
else:
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device) pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device)