Merge pull request #411 from glide-the/diffuser_params

Add new command line arguments for LoRA weights and prompt
2026-01-03 09:46:58 +08:00 · 2024-10-14 20:11:49 +08:00 · 2024-10-14 20:11:49 +08:00 · 6a16207321
commit 6a16207321
parent a7e59424cc 24b2053596
1 changed files with 23 additions and 5 deletions
--- a/tools/load_cogvideox_lora.py
+++ b/tools/load_cogvideox_lora.py
@ -57,6 +57,23 @@ def get_args():
        The formula for lora_scale is: lora_r / alpha.
        """,
    )
+    parser.add_argument(
+        "--lora_alpha",
+        type=int,
+        default=1,
+        help="""LoRA weights have a rank parameter, with the default for 2B trans set at 128 and 5B trans set at 256. 
+        This part is used to calculate the value for lora_scale, which is by default divided by the alpha value, 
+        used for stable learning and to prevent underflow. In the SAT training framework,
+        alpha is set to 1 by default. The higher the rank, the better the expressive capability,
+        but it requires more memory and training time. Increasing this number blindly isn't always better.
+        The formula for lora_scale is: lora_r / alpha.
+        """,
+    )
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        help="prompt",
+    )
    parser.add_argument(
        "--output_dir",
        type=str,
@ -69,17 +86,18 @@ def get_args():
 if __name__ == "__main__":
    args = get_args()
    pipe = CogVideoXPipeline.from_pretrained(args.pretrained_model_name_or_path, torch_dtype=torch.bfloat16).to(device)
-    pipe.load_lora_weights(args.lora_weights_path,  weight_name="pytorch_lora_weights.safetensors", adapter_name="test_1")
-    pipe.fuse_lora(lora_scale=1/128)
+    pipe.load_lora_weights(args.lora_weights_path,  weight_name="pytorch_lora_weights.safetensors", adapter_name="cogvideox-lora")
+    # pipe.fuse_lora(lora_scale=args.lora_alpha/args.lora_r, ['transformer'])
+    lora_scaling=args.lora_alpha/args.lora_r
+    pipe.set_adapters(["cogvideox-lora"], [lora_scaling])


    pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")

    os.makedirs(args.output_dir, exist_ok=True)
-    prompt="""In the heart of a bustling city, a young woman with long, flowing brown hair and a radiant smile stands out. She's donned in a cozy white beanie adorned with playful animal ears, adding a touch of whimsy to her appearance. Her eyes sparkle with joy as she looks directly into the camera, her expression inviting and warm. The background is a blur of activity, with indistinct figures moving about, suggesting a lively public space. The lighting is soft and diffused, casting a gentle glow on her face and highlighting her features. The overall mood is cheerful and vibrant, capturing a moment of happiness in the midst of urban life.
-    """
+ 
    latents = pipe(
-        prompt=prompt,
+        prompt=args.prompt,
        num_videos_per_prompt=1,
        num_inference_steps=50,
        num_frames=49,