Dream-wan2-2-faster-Pro

Runtime error

App Files Files Community

HAL1993 commited on 4 days ago

Commit

4f8f533

verified ·

1 Parent(s): e797c87

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -100

app.py CHANGED Viewed

@@ -1,18 +1,32 @@
 # =============================================================
-#  1️⃣  FORCE ALL CACHE TO RAM‑DISK  ( /tmp )
 # =============================================================
-import os
-# All hugging‑face / torch caches point to /tmp – this area is NOT
-# counted towards the 150 GB quota of a Space.
 os.environ["HF_HUB_CACHE"]       = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_DATASETS_CACHE"]  = "/tmp/hf_cache"
 os.environ["TORCH_HOME"]         = "/tmp/torch_home"
-# ------------------------------------------------------------
-#  2️⃣  IMPORTS
-# ------------------------------------------------------------
 import spaces
 import torch
 import numpy as np
@@ -22,8 +36,6 @@ import tempfile
 import requests
 import logging
 from PIL import Image
-import shutil
-import pathlib
 import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
@@ -36,9 +48,9 @@ import aoti
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# ------------------------------------------------------------
-#  3️⃣  CONFIG
-# ------------------------------------------------------------
 MAX_DIM = 832
 MIN_DIM = 480
 SQUARE_DIM = 640
@@ -51,13 +63,14 @@ MAX_FRAMES_MODEL = 80
 default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
 default_negative_prompt = (
-    "colorful tones, overexposed, static, unclear details, subtitles, style, artwork, painting, screen, still, overall gray, worst quality, "
-    "low quality, JPEG compression artifacts, ugly, deformed, extra fingers, poorly drawn hands, poorly drawn face, deformed, mutated, "
-    "deformed limbs, fused fingers, still screen, messy background, three legs, many people in background, walking backwards"
 )
 # ------------------------------------------------------------
-#  4️⃣  UNIVERSAL TRANSLATOR (Albanian → English)
 # ------------------------------------------------------------
 def translate_albanian_to_english(text: str) -> str:
     if not text.strip():
@@ -81,23 +94,7 @@ def translate_albanian_to_english(text: str) -> str:
     return text
 # ------------------------------------------------------------
-#  5️⃣  CLEAN ANY PRE‑EXISTING CACHE (only needed on the *first* run)
-# ------------------------------------------------------------
-def _clean_existing_cache():
-    for p in [
-        pathlib.Path.home() / ".cache",
-        pathlib.Path("/workspace") / ".cache",
-        pathlib.Path("/tmp") / "hf_cache",
-        pathlib.Path("/tmp") / "torch_home",
-    ]:
-        if p.exists():
-            logger.info(f"Removing existing cache folder: {p}")
-            shutil.rmtree(p, ignore_errors=True)
-_clean_existing_cache()
-# ------------------------------------------------------------
-#  6️⃣  MODEL LOADING (all caches forced to /tmp)
 # ------------------------------------------------------------
 pipe = WanImageToVideoPipeline.from_pretrained(
     "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
@@ -149,42 +146,36 @@ aoti.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Wan2", variant="fp8da")
 aoti.aoti_blocks_load(pipe.transformer_2, "zerogpu-aoti/Wan2", variant="fp8da")
 # ------------------------------------------------------------
-#  7️⃣  IMAGE RESIZING HELPERS
 # ------------------------------------------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     """Resize / crop the input image so the model receives a valid size."""
-    width, height = image.size
-    if width == height:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
-    aspect_ratio = width / height
-    MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
-    MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
     img = image
-    if aspect_ratio > MAX_ASPECT_RATIO:
-        # Very wide → crop width
-        crop_w = int(round(height * MAX_ASPECT_RATIO))
-        left = (width - crop_w) // 2
-        img = image.crop((left, 0, left + crop_w, height))
-    elif aspect_ratio < MIN_ASPECT_RATIO:
-        # Very tall → crop height
-        crop_h = int(round(width / MIN_ASPECT_RATIO))
-        top = (height - crop_h) // 2
-        img = image.crop((0, top, width, top + crop_h))
     else:
-        # No cropping needed – just compute target size
-        if width > height:  # landscape
             target_w = MAX_DIM
-            target_h = int(round(target_w / aspect_ratio))
-        else:                # portrait
             target_h = MAX_DIM
-            target_w = int(round(target_h * aspect_ratio))
         img = image
-    # Round to the nearest multiple of MULTIPLE_OF and clamp
     final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
     final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
     final_w = max(MIN_DIM, min(MAX_DIM, final_w))
@@ -194,7 +185,7 @@ def resize_image(image: Image.Image) -> Image.Image:
 def get_num_frames(duration_seconds: float) -> int:
-    """Number of frames the model will generate for the requested duration."""
     return 1 + int(
         np.clip(
             int(round(duration_seconds * FIXED_FPS)),
@@ -214,26 +205,24 @@ def get_duration(
     guidance_scale_2,
     seed,
     randomize_seed,
-    progress,            # <-- required by @spaces.GPU
 ):
     """
-    Rough estimate of how long the GPU will be occupied.
-    Used by the @spaces.GPU decorator to enforce the 30‑second safety cap.
     """
-    BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-    BASE_STEP_DURATION = 15
     w, h = resize_image(input_image).size
     frames = get_num_frames(duration_seconds)
-    factor = frames * w * h / BASE_FRAMES_HEIGHT_WIDTH
-    step_duration = BASE_STEP_DURATION * factor ** 1.5
-    est = 10 + int(steps) * step_duration
-    # Never block the GPU > 30 s
-    return min(est, 30)
 # ------------------------------------------------------------
-#  8️⃣  GENERATION FUNCTION (keeps memory low)
 # ------------------------------------------------------------
 @spaces.GPU(duration=get_duration)
 def generate_video(
@@ -248,25 +237,19 @@ def generate_video(
     randomize_seed=False,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """Generate a video from an image + prompt. Returns (video_path, seed_used)."""
     if input_image is None:
         raise gr.Error("Please upload an input image.")
-    # -----------------------------------------------------------------
-    # Prompt translation (Albanian → English)
-    # -----------------------------------------------------------------
     prompt = translate_albanian_to_english(prompt_input)
-    # -----------------------------------------------------------------
-    # Prepare model inputs
-    # -----------------------------------------------------------------
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized = resize_image(input_image)
     num_frames = get_num_frames(duration_seconds)
-    # -----------------------------------------------------------------
-    # Model inference
-    # -----------------------------------------------------------------
     out = pipe(
         image=resized,
         prompt=prompt,
@@ -281,22 +264,16 @@ def generate_video(
     )
     frames = out.frames[0]
-    # -----------------------------------------------------------------
-    # Write temporary MP4 (still inside /tmp, will be removed later)
-    # -----------------------------------------------------------------
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
         video_path = tmp.name
     export_to_video(frames, video_path, fps=FIXED_FPS)
-    # -----------------------------------------------------------------
-    # Unload AoT blocks – they occupy several GB on disk
-    # -----------------------------------------------------------------
     aoti.aoti_blocks_unload(pipe.transformer)
     aoti.aoti_blocks_unload(pipe.transformer_2)
-    # -----------------------------------------------------------------
-    # GPU cleanup
-    # -----------------------------------------------------------------
     gc.collect()
     torch.cuda.empty_cache()
@@ -304,7 +281,7 @@ def generate_video(
 # ------------------------------------------------------------
-#  9️⃣  UI – EXACT SAME LOOK & FEEL AS THE ORIGINAL
 # ------------------------------------------------------------
 with gr.Blocks(
     css="""
@@ -559,8 +536,8 @@ footer,.gr-button-secondary{
 .gr-group{
     background:#000!important;
     border:none!important;
-    width:100%!important;
-    max-width:100vw!important;
 }
 @media (max-width:768px){
     h1{font-size:4rem;}
@@ -583,19 +560,19 @@ footer,.gr-button-secondary{
 ) as demo:
     # -------------------------------------------------
-    # 500‑ERROR GUARD – same unique link as before
     # -------------------------------------------------
     gr.HTML("""
     <script>
     if (!window.location.pathname.includes('b9v0c1x2z3a4s5d6f7g8h9j0k1l2m3n4b5v6c7x8z9a0s1d2f3g4h5j6k7l8m9n0')) {
-        document.body.innerHTML = '<h1 style="color:#ef4444;font-family:Orbitron,sans-serif;text-align:center;margin-top:100px;">500 Internal Server Error</h1>';
-        throw new Error('500');
     }
     </script>
     """)
     # -------------------------------------------------
-    # UI layout – identical to the original demo
     # -------------------------------------------------
     with gr.Row(elem_id="general_items"):
         gr.Markdown("# ")
@@ -637,6 +614,14 @@ footer,.gr-button-secondary{
     # -------------------------------------------------
     # Wiring – order must match generate_video signature
     # -------------------------------------------------
     generate_btn.click(
         fn=generate_video,
         inputs=[
@@ -649,13 +634,14 @@ footer,.gr-button-secondary{
             gr.State(value=1.5),                    # guidance_scale_2
             gr.State(value=42),                     # seed
             gr.State(value=True),                   # randomize_seed
-            # progress is injected automatically by @spaces.GPU
         ],
-        outputs=[output_video, gr.State(value=42)],  # hidden seed output
     )
 # ------------------------------------------------------------
-# 10️⃣  MAIN
 # ------------------------------------------------------------
 if __name__ == "__main__":
     demo.queue().launch(share=True)

 # =============================================================
+# 0️⃣  FORCE ALL CACHES TO EPHEMERAL /tmp  (DO NOT COUNT TO 150 GB)
 # =============================================================
+import os, shutil, pathlib
+# -----------------------------------------------------------------
+#  Clean any leftover cache that may already be on the persistent volume.
+#  This runs **once** at container start, before any import that touches HF.
+# -----------------------------------------------------------------
+for p in [
+    pathlib.Path.home() / ".cache",
+    pathlib.Path("/workspace") / ".cache",
+    pathlib.Path("/tmp") / "hf_cache",
+    pathlib.Path("/tmp") / "torch_home",
+]:
+    if p.exists():
+        shutil.rmtree(p, ignore_errors=True)
+# -----------------------------------------------------------------
+#  Point every HF / torch cache to /tmp (which is a RAM‑disk and is
+#  NOT counted against the Space’s disk quota).
+# -----------------------------------------------------------------
 os.environ["HF_HUB_CACHE"]       = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_DATASETS_CACHE"]  = "/tmp/hf_cache"
 os.environ["TORCH_HOME"]         = "/tmp/torch_home"
+# =============================================================
+# 1️⃣  IMPORTS
+# =============================================================
 import spaces
 import torch
 import numpy as np
 import requests
 import logging
 from PIL import Image
 import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# =============================================================
+# 2️⃣  CONFIG
+# =============================================================
 MAX_DIM = 832
 MIN_DIM = 480
 SQUARE_DIM = 640
 default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
 default_negative_prompt = (
+    "colorful tones, overexposed, static, unclear details, subtitles, style, artwork, painting, screen, "
+    "still, overall gray, worst quality, low quality, JPEG compression artifacts, ugly, deformed, "
+    "extra fingers, poorly drawn hands, poorly drawn face, deformed, mutated, deformed limbs, "
+    "fused fingers, still screen, messy background, three legs, many people in background, walking backwards"
 )
 # ------------------------------------------------------------
+# 3️⃣  TRANSLATOR (Albanian → English) – unchanged
 # ------------------------------------------------------------
 def translate_albanian_to_english(text: str) -> str:
     if not text.strip():
     return text
 # ------------------------------------------------------------
+# 4️⃣  MODEL LOADING  (all caches forced to /tmp)
 # ------------------------------------------------------------
 pipe = WanImageToVideoPipeline.from_pretrained(
     "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
 aoti.aoti_blocks_load(pipe.transformer_2, "zerogpu-aoti/Wan2", variant="fp8da")
 # ------------------------------------------------------------
+# 5️⃣  HELPER FUNCTIONS (resize, frame count, GPU‑time estimate)
 # ------------------------------------------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     """Resize / crop the input image so the model receives a valid size."""
+    w, h = image.size
+    if w == h:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
+    aspect = w / h
+    MAX_ASPECT = MAX_DIM / MIN_DIM
+    MIN_ASPECT = MIN_DIM / MAX_DIM
     img = image
+    if aspect > MAX_ASPECT:                     # very wide → crop width
+        crop_w = int(round(h * MAX_ASPECT))
+        left = (w - crop_w) // 2
+        img = image.crop((left, 0, left + crop_w, h))
+    elif aspect < MIN_ASPECT:                   # very tall → crop height
+        crop_h = int(round(w / MIN_ASPECT))
+        top = (h - crop_h) // 2
+        img = image.crop((0, top, w, top + crop_h))
     else:
+        if w > h:                               # landscape
             target_w = MAX_DIM
+            target_h = int(round(target_w / aspect))
+        else:                                   # portrait
             target_h = MAX_DIM
+            target_w = int(round(target_h * aspect))
         img = image
     final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
     final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
     final_w = max(MIN_DIM, min(MAX_DIM, final_w))
 def get_num_frames(duration_seconds: float) -> int:
+    """Number of frames for the requested duration."""
     return 1 + int(
         np.clip(
             int(round(duration_seconds * FIXED_FPS)),
     guidance_scale_2,
     seed,
     randomize_seed,
+    progress,            # <- required by @spaces.GPU
 ):
     """
+    Rough estimate of the GPU run‑time.
+    The @spaces.GPU decorator will cut the job at 30 s.
     """
+    BASE = 81 * 832 * 624          # reference size used by the original demo
+    BASE_STEP = 15
     w, h = resize_image(input_image).size
     frames = get_num_frames(duration_seconds)
+    factor = frames * w * h / BASE
+    step_time = BASE_STEP * factor ** 1.5
+    est = 10 + int(steps) * step_time
+    return min(est, 30)            # never exceed the 30‑second safety cap
 # ------------------------------------------------------------
+# 6️⃣  GENERATION FUNCTION
 # ------------------------------------------------------------
 @spaces.GPU(duration=get_duration)
 def generate_video(
     randomize_seed=False,
     progress=gr.Progress(track_tqdm=True),
 ):
+    """Run the model → return a temporary MP4 path and the seed used."""
     if input_image is None:
         raise gr.Error("Please upload an input image.")
+    # ---- translate prompt (Albanian → English) -----------------
     prompt = translate_albanian_to_english(prompt_input)
+    # ---- prepare inputs ----------------------------------------
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized = resize_image(input_image)
     num_frames = get_num_frames(duration_seconds)
+    # ---- model inference ----------------------------------------
     out = pipe(
         image=resized,
         prompt=prompt,
     )
     frames = out.frames[0]
+    # ---- write a temporary MP4 (still inside /tmp) -------------
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
         video_path = tmp.name
     export_to_video(frames, video_path, fps=FIXED_FPS)
+    # ---- unload AoT blocks (they occupy a few GB on disk) -----
     aoti.aoti_blocks_unload(pipe.transformer)
     aoti.aoti_blocks_unload(pipe.transformer_2)
+    # ---- GPU cleanup -------------------------------------------
     gc.collect()
     torch.cuda.empty_cache()
 # ------------------------------------------------------------
+# 7️⃣  UI – 100 % identical visual appearance to the original demo
 # ------------------------------------------------------------
 with gr.Blocks(
     css="""
 .gr-group{
     background:#000!important;
     border:none!important;
+    width:100% !important;
+    max-width:100vw !important;
 }
 @media (max-width:768px){
     h1{font-size:4rem;}
 ) as demo:
     # -------------------------------------------------
+    # 500‑ERROR GUARD – exact same unique path string
     # -------------------------------------------------
     gr.HTML("""
     <script>
     if (!window.location.pathname.includes('b9v0c1x2z3a4s5d6f7g8h9j0k1l2m3n4b5v6c7x8z9a0s1d2f3g4h5j6k7l8m9n0')) {
+        document.body.innerHTML = '<h1 style="color:#ef4444;font-family:Orbitron,sans-serif;text-align:center;margin-top:300px;">500 Internal Server Error</h1>';
+        throw new Error('Access denied');
     }
     </script>
     """)
     # -------------------------------------------------
+    # UI layout – identical visual hierarchy
     # -------------------------------------------------
     with gr.Row(elem_id="general_items"):
         gr.Markdown("# ")
     # -------------------------------------------------
     # Wiring – order must match generate_video signature
     # -------------------------------------------------
+    def _postprocess(video_path, seed):
+        """Delete the temporary file *after* Gradio has streamed it."""
+        try:
+            os.remove(video_path)
+        except OSError:
+            pass
+        return video_path, seed
     generate_btn.click(
         fn=generate_video,
         inputs=[
             gr.State(value=1.5),                    # guidance_scale_2
             gr.State(value=42),                     # seed
             gr.State(value=True),                   # randomize_seed
+            # progress is injected by @spaces.GPU – do NOT pass it here
         ],
+        outputs=[output_video, gr.State(value=42)],
+        postprocess=_postprocess,   # <-- guarantees the MP4 is removed
     )
 # ------------------------------------------------------------
+# 8️⃣  MAIN
 # ------------------------------------------------------------
 if __name__ == "__main__":
     demo.queue().launch(share=True)