mirror of
https://github.com/THUDM/CogVideo.git
synced 2025-12-03 03:02:09 +08:00
Add an option to run gradio web demo with very low vram
This commit is contained in:
parent
3b4fa1d907
commit
e70d90b0e6
@ -300,6 +300,8 @@ of the **CogVideoX** open-source model.
|
||||
cd inference
|
||||
# For Linux and Windows users
|
||||
python gradio_web_demo.py
|
||||
# or run 5b with pipe.enable_sequential_cpu_offload() and pipe.vae.enable_tiling()
|
||||
python3 inference/gradio_web_demo.py --very-low-vram-5b
|
||||
|
||||
# For macOS with Apple Silicon users, Intel not supported, this maybe 20x slower than RTX 4090
|
||||
PYTORCH_ENABLE_MPS_FALLBACK=1 python gradio_web_demo.py
|
||||
|
||||
@ -17,10 +17,18 @@ from diffusers.utils import export_to_video
|
||||
from datetime import datetime, timedelta
|
||||
from openai import OpenAI
|
||||
import moviepy.editor as mp
|
||||
import sys
|
||||
|
||||
|
||||
dtype = torch.float16
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device)
|
||||
|
||||
if '--very-low-vram-5b' in sys.argv:
|
||||
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=dtype)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.vae.enable_tiling()
|
||||
else:
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device)
|
||||
|
||||
os.makedirs("./output", exist_ok=True)
|
||||
os.makedirs("./gradio_tmp", exist_ok=True)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user