mirror of
https://github.com/THUDM/CogVideo.git
synced 2026-06-01 09:04:08 +08:00
feat(cli_demo): add native multi-GPU support via device_map
Add --device_map CLI argument to enable multi-GPU inference without
code modifications. This implements the functionality described in
existing comments at lines 92-93 and 146-150.
Changes:
- Add device_map parameter to generate_video() with options:
- None (default): Uses sequential CPU offload (backward compatible)
- 'balanced': Distributes model evenly across GPUs (recommended)
- 'auto': Automatic device placement by accelerate
- 'sequential': Fills GPUs one by one in order
- Conditionally enable CPU offload only when device_map is None
- Pass device_map to from_pretrained() for all three pipeline types:
CogVideoXPipeline, CogVideoXImageToVideoPipeline, CogVideoXVideoToVideoPipeline
- Update module docstring with multi-GPU usage examples
- Add validation for device_map values
Backward compatibility:
- Default behavior unchanged (device_map=None uses CPU offload)
- All existing scripts work without modification
Usage:
# Multi-GPU with balanced distribution (recommended):
python cli_demo.py --prompt '...' --model_path THUDM/CogVideoX1.5-5b \
--device_map balanced
# Multi-GPU with auto placement:
python cli_demo.py --prompt '...' --model_path THUDM/CogVideoX1.5-5b \
--device_map auto
This commit is contained in:
parent
7a1af71545
commit
207020afea
@ -11,11 +11,24 @@ Running the Script:
|
|||||||
To run the script, use the following command with appropriate arguments:
|
To run the script, use the following command with appropriate arguments:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Single GPU (default behavior, uses CPU offload):
|
||||||
$ python cli_demo.py --prompt "A girl riding a bike." --model_path THUDM/CogVideoX1.5-5b --generate_type "t2v"
|
$ python cli_demo.py --prompt "A girl riding a bike." --model_path THUDM/CogVideoX1.5-5b --generate_type "t2v"
|
||||||
|
|
||||||
|
# Multi-GPU with balanced device mapping:
|
||||||
|
$ python cli_demo.py --prompt "A girl riding a bike." --model_path THUDM/CogVideoX1.5-5b --generate_type "t2v" --device_map balanced
|
||||||
|
|
||||||
|
# Multi-GPU with auto device mapping:
|
||||||
|
$ python cli_demo.py --prompt "A girl riding a bike." --model_path THUDM/CogVideoX1.5-5b --generate_type "t2v" --device_map auto
|
||||||
```
|
```
|
||||||
|
|
||||||
You can change `pipe.enable_sequential_cpu_offload()` to `pipe.enable_model_cpu_offload()` to speed up inference, but this will use more GPU memory
|
You can change `pipe.enable_sequential_cpu_offload()` to `pipe.enable_model_cpu_offload()` to speed up inference, but this will use more GPU memory
|
||||||
|
|
||||||
|
Multi-GPU Support:
|
||||||
|
- Use `--device_map balanced` to distribute the model evenly across available GPUs (recommended for inference)
|
||||||
|
- Use `--device_map auto` for automatic device placement by accelerate
|
||||||
|
- Use `--device_map sequential` to fill GPUs sequentially (useful for uneven memory)
|
||||||
|
- Default behavior (no --device_map) uses CPU offload for single-GPU setups
|
||||||
|
|
||||||
Additional options are available to specify the model path, guidance scale, number of inference steps, video generation type, and output paths.
|
Additional options are available to specify the model path, guidance scale, number of inference steps, video generation type, and output paths.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -48,6 +61,9 @@ RESOLUTION_MAP = {
|
|||||||
"cogvideox-2b": (480, 720),
|
"cogvideox-2b": (480, 720),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Valid device_map options for multi-GPU support
|
||||||
|
VALID_DEVICE_MAPS = {"auto", "balanced", "sequential"}
|
||||||
|
|
||||||
|
|
||||||
def generate_video(
|
def generate_video(
|
||||||
prompt: str,
|
prompt: str,
|
||||||
@ -66,6 +82,7 @@ def generate_video(
|
|||||||
generate_type: str = Literal["t2v", "i2v", "v2v"], # i2v: image to video, v2v: video to video
|
generate_type: str = Literal["t2v", "i2v", "v2v"], # i2v: image to video, v2v: video to video
|
||||||
seed: int = 42,
|
seed: int = 42,
|
||||||
fps: int = 16,
|
fps: int = 16,
|
||||||
|
device_map: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Generates a video based on the given prompt and saves it to the specified path.
|
Generates a video based on the given prompt and saves it to the specified path.
|
||||||
@ -86,11 +103,23 @@ def generate_video(
|
|||||||
- generate_type (str): The type of video generation (e.g., 't2v', 'i2v', 'v2v').·
|
- generate_type (str): The type of video generation (e.g., 't2v', 'i2v', 'v2v').·
|
||||||
- seed (int): The seed for reproducibility.
|
- seed (int): The seed for reproducibility.
|
||||||
- fps (int): The frames per second for the generated video.
|
- fps (int): The frames per second for the generated video.
|
||||||
|
- device_map (str): Device placement strategy for multi-GPU support. Options:
|
||||||
|
- None (default): Uses sequential CPU offload for single-GPU setups
|
||||||
|
- "balanced": Distributes model layers evenly across available GPUs (recommended)
|
||||||
|
- "auto": Automatic device placement by accelerate library
|
||||||
|
- "sequential": Fills GPUs one by one in order
|
||||||
|
|
||||||
|
Multi-GPU Usage Examples:
|
||||||
|
# Balanced distribution across GPUs (recommended):
|
||||||
|
generate_video(prompt="...", model_path="...", device_map="balanced")
|
||||||
|
|
||||||
|
# Automatic device placement:
|
||||||
|
generate_video(prompt="...", model_path="...", device_map="auto")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 1. Load the pre-trained CogVideoX pipeline with the specified precision (bfloat16).
|
# 1. Load the pre-trained CogVideoX pipeline with the specified precision (bfloat16).
|
||||||
# add device_map="balanced" in the from_pretrained function and remove the enable_model_cpu_offload()
|
# When device_map is specified, the model is distributed across multiple GPUs.
|
||||||
# function to use Multi GPUs.
|
# When device_map is None (default), CPU offload is enabled for single-GPU setups.
|
||||||
|
|
||||||
image = None
|
image = None
|
||||||
video = None
|
video = None
|
||||||
@ -115,13 +144,25 @@ def generate_video(
|
|||||||
)
|
)
|
||||||
height, width = desired_resolution
|
height, width = desired_resolution
|
||||||
|
|
||||||
|
# Validate device_map if provided
|
||||||
|
if device_map is not None and device_map not in VALID_DEVICE_MAPS:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid device_map '{device_map}'. Must be one of: {', '.join(sorted(VALID_DEVICE_MAPS))} or None"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build kwargs for from_pretrained - add device_map only when specified
|
||||||
|
load_kwargs = {"torch_dtype": dtype}
|
||||||
|
if device_map is not None:
|
||||||
|
load_kwargs["device_map"] = device_map
|
||||||
|
logging.info(f"\033[1mUsing device_map='{device_map}' for multi-GPU inference\033[0m")
|
||||||
|
|
||||||
if generate_type == "i2v":
|
if generate_type == "i2v":
|
||||||
pipe = CogVideoXImageToVideoPipeline.from_pretrained(model_path, torch_dtype=dtype)
|
pipe = CogVideoXImageToVideoPipeline.from_pretrained(model_path, **load_kwargs)
|
||||||
image = load_image(image=image_or_video_path)
|
image = load_image(image=image_or_video_path)
|
||||||
elif generate_type == "t2v":
|
elif generate_type == "t2v":
|
||||||
pipe = CogVideoXPipeline.from_pretrained(model_path, torch_dtype=dtype)
|
pipe = CogVideoXPipeline.from_pretrained(model_path, **load_kwargs)
|
||||||
else:
|
else:
|
||||||
pipe = CogVideoXVideoToVideoPipeline.from_pretrained(model_path, torch_dtype=dtype)
|
pipe = CogVideoXVideoToVideoPipeline.from_pretrained(model_path, **load_kwargs)
|
||||||
video = load_video(image_or_video_path)
|
video = load_video(image_or_video_path)
|
||||||
|
|
||||||
# If you're using with lora, add this code
|
# If you're using with lora, add this code
|
||||||
@ -141,13 +182,16 @@ def generate_video(
|
|||||||
pipe.scheduler.config, timestep_spacing="trailing"
|
pipe.scheduler.config, timestep_spacing="trailing"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. Enable CPU offload for the model.
|
# 3. Enable CPU offload for the model (only when not using multi-GPU device_map).
|
||||||
# turn off if you have multiple GPUs or enough GPU memory(such as H100) and it will cost less time in inference
|
# When device_map is specified, the model is already distributed across GPUs,
|
||||||
# and enable to("cuda")
|
# so CPU offload is not needed and would conflict with device placement.
|
||||||
# pipe.to("cuda")
|
if device_map is None:
|
||||||
|
# Single-GPU mode: use CPU offload to manage memory
|
||||||
|
# Turn off if you have multiple GPUs or enough GPU memory (such as H100)
|
||||||
|
# pipe.enable_model_cpu_offload()
|
||||||
|
pipe.enable_sequential_cpu_offload()
|
||||||
|
|
||||||
# pipe.enable_model_cpu_offload()
|
# VAE optimizations work in both single and multi-GPU modes
|
||||||
pipe.enable_sequential_cpu_offload()
|
|
||||||
pipe.vae.enable_slicing()
|
pipe.vae.enable_slicing()
|
||||||
pipe.vae.enable_tiling()
|
pipe.vae.enable_tiling()
|
||||||
|
|
||||||
@ -248,6 +292,13 @@ if __name__ == "__main__":
|
|||||||
"--dtype", type=str, default="bfloat16", help="The data type for computation"
|
"--dtype", type=str, default="bfloat16", help="The data type for computation"
|
||||||
)
|
)
|
||||||
parser.add_argument("--seed", type=int, default=42, help="The seed for reproducibility")
|
parser.add_argument("--seed", type=int, default=42, help="The seed for reproducibility")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device_map",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
choices=["auto", "balanced", "sequential"],
|
||||||
|
help="Device placement strategy for multi-GPU inference. Options: 'balanced' (recommended, distributes evenly), 'auto' (automatic placement), 'sequential' (fills GPUs in order). Default: None (uses CPU offload for single-GPU)",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
|
dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
|
||||||
@ -268,4 +319,5 @@ if __name__ == "__main__":
|
|||||||
generate_type=args.generate_type,
|
generate_type=args.generate_type,
|
||||||
seed=args.seed,
|
seed=args.seed,
|
||||||
fps=args.fps,
|
fps=args.fps,
|
||||||
|
device_map=args.device_map,
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user