Wan-fusionX-Lora-T2V

Paused

App Files Files Community

rahul7star commited on Jul 3

Commit

c765ebd

verified ·

1 Parent(s): 78d1c13

Update app_more_lora.py

Browse files

Files changed (1) hide show

app_more_lora.py +51 -54

app_more_lora.py CHANGED Viewed

@@ -1,70 +1,62 @@
 import torch
-from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
-from transformers import CLIPVisionModel
 import gradio as gr
 import tempfile
-import spaces
 from huggingface_hub import hf_hub_download
-import numpy as np
-from PIL import Image
-import random
 DEFAULT_NAG_NEGATIVE_PROMPT = "Static, motionless, still, ugly, bad quality, worst quality, poorly drawn, low resolution, blurry, lack of details"
-# Base MODEL_ID (using original Wan model that's compatible with diffusers)
-MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
-# FusionX enhancement LoRAs (based on FusionX composition)
 LORA_REPO_ID = "Kijai/WanVideo_comfy"
 LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
-# Additional enhancement LoRAs for FusionX-like quality
-ACCVIDEO_LORA_REPO = "alibaba-pai/Wan2.1-Fun-Reward-LoRAs"
-MPS_LORA_FILENAME = "Wan2.1-MPS-Reward-LoRA.safetensors"
-# Load enhanced model components
-print("🚀 Loading FusionX Enhanced Wan2.1 I2V Model...")
-image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanImageToVideoPipeline.from_pretrained(
-    MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
 )
-# FusionX optimized scheduler settings
-pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
 pipe.to("cuda")
-# Load FusionX enhancement LoRAs
-lora_adapters = []
-lora_weights = []
-try:
-    # Load CausVid LoRA (strength 1.0 as per FusionX)
-    causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
-    pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
-    lora_adapters.append("causvid_lora")
-    lora_weights.append(1.0)  # FusionX uses 1.0 for CausVid
-    print("✅ CausVid LoRA loaded (strength: 1.0)")
-except Exception as e:
-    print(f"⚠️ CausVid LoRA not loaded: {e}")
-try:
-    # Load MPS Rewards LoRA (strength 0.7 as per FusionX)
-    mps_path = hf_hub_download(repo_id=ACCVIDEO_LORA_REPO, filename=MPS_LORA_FILENAME)
-    pipe.load_lora_weights(mps_path, adapter_name="mps_lora")
-    lora_adapters.append("mps_lora")
-    lora_weights.append(0.7)  # FusionX uses 0.7 for MPS
-    print("✅ MPS Rewards LoRA loaded (strength: 0.7)")
-except Exception as e:
-    print(f"⚠️ MPS LoRA not loaded: {e}")
-# Apply LoRA adapters if any were loaded
-if lora_adapters:
-    pipe.set_adapters(lora_adapters, adapter_weights=lora_weights)
-    pipe.fuse_lora()
-    print(f"🔥 FusionX Enhancement Applied: {len(lora_adapters)} LoRAs fused")
-else:
-    print("📝 No LoRAs loaded - using base Wan model")
 examples = [
     ["A ginger cat passionately plays eletric guitar with intensity and emotion on a stage. The background is shrouded in deep darkness. Spotlights casts dramatic shadows.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
@@ -157,9 +149,14 @@ def generate_video_with_example(
 with gr.Blocks() as demo:
-    gr.Markdown('''# Normalized Attention Guidance (NAG) for fast 4 steps Wan2.1-T2V-14B with FuxionX Base T2V
     Implementation of [Normalized Attention Guidance](https://chendaryen.github.io/NAG.github.io/).
     ''')
     with gr.Row():

+import types
+import random
+import spaces
 import torch
+import numpy as np
+from diffusers import AutoencoderKLWan, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
 import gradio as gr
 import tempfile
 from huggingface_hub import hf_hub_download
+from src.pipeline_wan_nag import NAGWanPipeline
+from src.transformer_wan_nag import NagWanTransformer3DModel
+MOD_VALUE = 32
+DEFAULT_DURATION_SECONDS = 4
+DEFAULT_STEPS = 4
+DEFAULT_SEED = 2025
+DEFAULT_H_SLIDER_VALUE = 480
+DEFAULT_W_SLIDER_VALUE = 832
+NEW_FORMULA_MAX_AREA = 480.0 * 832.0
+SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
+SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
+MAX_SEED = np.iinfo(np.int32).max
+FIXED_FPS = 16
+MIN_FRAMES_MODEL = 8
+MAX_FRAMES_MODEL = 81
 DEFAULT_NAG_NEGATIVE_PROMPT = "Static, motionless, still, ugly, bad quality, worst quality, poorly drawn, low resolution, blurry, lack of details"
+MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
 LORA_REPO_ID = "Kijai/WanVideo_comfy"
 LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
+pipe = NAGWanPipeline.from_pretrained(
+    MODEL_ID, vae=vae, torch_dtype=torch.bfloat16
 )
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=5.0)
 pipe.to("cuda")
+causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
+pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
+pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
+for name, param in pipe.transformer.named_parameters():
+    if "lora_B" in name:
+        if "blocks.0" in name:
+            param.data = param.data * 0.25
+pipe.fuse_lora()
+pipe.unload_lora_weights()
+pipe.transformer.__class__.attn_processors = NagWanTransformer3DModel.attn_processors
+pipe.transformer.__class__.set_attn_processor = NagWanTransformer3DModel.set_attn_processor
+pipe.transformer.__class__.forward = NagWanTransformer3DModel.forward
 examples = [
     ["A ginger cat passionately plays eletric guitar with intensity and emotion on a stage. The background is shrouded in deep darkness. Spotlights casts dramatic shadows.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
 with gr.Blocks() as demo:
+    gr.Markdown('''# Normalized Attention Guidance (NAG) for fast 4 steps Wan2.1-T2V-14B with CausVid LoRA
+    NAG demos: [LTX Video Fast](https://huggingface.co/spaces/ChenDY/NAG_ltx-video-distilled), [FLUX.1-dev](https://huggingface.co/spaces/ChenDY/NAG_FLUX.1-dev), [FLUX.1-schnell](https://huggingface.co/spaces/ChenDY/NAG_FLUX.1-schnell)
     Implementation of [Normalized Attention Guidance](https://chendaryen.github.io/NAG.github.io/).
+    [Paper](https://arxiv.org/abs/2505.21179), [GitHub](https://github.com/ChenDarYen/Normalized-Attention-Guidance), [ComfyUI](https://github.com/ChenDarYen/ComfyUI-NAG)
+    [CausVid](https://github.com/tianweiy/CausVid) is a distilled version of Wan2.1 to run faster in just 4-8 steps, [extracted as LoRA by Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors).
     ''')
     with gr.Row():