Aduc-sdr-2_5

Paused

App Files Files Community

x2XcarleX2x commited on Sep 24

Commit

c23fa4c

verified ·

1 Parent(s): bcea26b

Update aduc_framework/managers/wan_manager.py

Browse files

Files changed (1) hide show

aduc_framework/managers/wan_manager.py +55 -24

aduc_framework/managers/wan_manager.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # aduc_framework/managers/wan_manager.py
 import os
 import tempfile
@@ -17,20 +18,23 @@ from diffusers.utils.export_utils import export_to_video
 class WanManager:
     """
-    Encapsula:
-    - Pipeline Wan I2V com dois transformadores (alto/baixo ruído).
-    - Fusão da LoRA Lightning (8 passos rápidos).
-    - Pré-processamento e geração a partir de images_condition_items.
-    - Ancoragem da last_image no índice latente 4 com peso ajustável (se suportado).
     """
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
     MAX_DIMENSION = 832
     MIN_DIMENSION = 480
     DIMENSION_MULTIPLE = 16
     SQUARE_SIZE = 480
     FIXED_FPS = 16
     MIN_FRAMES_MODEL = 8
     MAX_FRAMES_MODEL = 81
@@ -44,6 +48,7 @@ class WanManager:
     def __init__(self) -> None:
         print("Loading models into memory. This may take a few minutes...")
         self.pipe = WanImageToVideoPipeline.from_pretrained(
             self.MODEL_ID,
             transformer=WanTransformer3DModel.from_pretrained(
@@ -60,10 +65,13 @@ class WanManager:
             ),
             torch_dtype=torch.bfloat16,
         )
         self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
             self.pipe.scheduler.config, shift=32.0
         )
         print("Applying 8-step Lightning LoRA...")
         try:
             self.pipe.load_lora_weights(
@@ -83,6 +91,7 @@ class WanManager:
             print("Fusing LoRA weights into the main model...")
             self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
             self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
             self.pipe.unload_lora_weights()
             print("Lightning LoRA successfully fused. Model is ready for fast 8-step generation.")
         except Exception as e:
@@ -90,6 +99,8 @@ class WanManager:
         print("All models loaded. Service is ready.")
     def process_image_for_video(self, image: Image.Image) -> Image.Image:
         width, height = image.size
         if width == height:
@@ -125,9 +136,11 @@ class WanManager:
         left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
         return resized.crop((left, top, left + ref_width, top + ref_height))
     def generate_video_from_conditions(
         self,
-        images_condition_items: List[List[Any]],
         prompt: str,
         negative_prompt: Optional[str],
         duration_seconds: float,
@@ -139,25 +152,37 @@ class WanManager:
         output_type: str = "np",
     ) -> Tuple[str, int]:
         """
-        Primeiro item = image; último item = last_image; last_image é ancorada no índice latente 4 com peso end_peso [0,1] (se suportado) .
         """
         if not images_condition_items or len(images_condition_items) < 2:
             raise ValueError("Forneça ao menos dois itens (início e fim).")
-        first_item = images_condition_items[0]
-        last_item = images_condition_items[-1]
-        start_image = first_item[0]
-        end_image = last_item[0]
-        end_weight = float(last_item[2]) if len(last_item) >= 3 and last_item[2] is not None else 1.0
         if start_image is None or end_image is None:
             raise ValueError("As imagens inicial e final não podem ser vazias.")
         if not isinstance(start_image, Image.Image) or not isinstance(end_image, Image.Image):
             raise TypeError("Os 'patches' devem ser PIL.Image.")
         processed_start = self.process_image_for_video(start_image)
         processed_end = self.resize_and_crop_to_match(end_image, processed_start)
         target_height, target_width = processed_start.height, processed_start.width
         num_frames = int(round(duration_seconds * self.FIXED_FPS))
@@ -167,8 +192,8 @@ class WanManager:
         generator = torch.Generator().manual_seed(current_seed)
         call_kwargs = dict(
-            image=processed_start,
-            last_image=processed_end,
             prompt=prompt,
             negative_prompt=negative_prompt if negative_prompt is not None else self.default_negative_prompt,
             height=target_height,
@@ -181,22 +206,28 @@ class WanManager:
             output_type=output_type,
         )
-        result = None
         try:
-            # Ancorar no índice latente 4 com peso end_weight
-            result = self.pipe(
-                **call_kwargs,
-                anchor_weight_last=float(end_weight),
-                anchor_latent_index=4,
-            )
         except TypeError:
-            print("[WanManager] anchor_latent_index/anchor_weight_last não suportados; usando chamada padrão.")
             result = self.pipe(**call_kwargs)
         frames = result.frames[0]
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
             video_path = tmp.name
         export_to_video(frames, video_path, fps=self.FIXED_FPS)
         return video_path, current_seed

 # aduc_framework/managers/wan_manager.py
+# WanManager v0.0.1 (beta)
 import os
 import tempfile
 class WanManager:
     """
+    WanManager v0.0.1 (beta)
+    - image: primeiro item (peso fixo 1.0) -> latente 0
+    - handle: segundo item (se presente) -> latente 4, com handle_weight da lista
+    - last: último item -> último latente, com anchor_weight_last da lista
+    - Mantém LoRA Lightning fundida, FlowMatch Euler, device_map='auto' e contrato i2v.
+    - Fallback: se a pipeline não suportar os novos args, chama a API original sem handle/pesos.
     """
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
+    # Dimensões
     MAX_DIMENSION = 832
     MIN_DIMENSION = 480
     DIMENSION_MULTIPLE = 16
     SQUARE_SIZE = 480
+    # Vídeo
     FIXED_FPS = 16
     MIN_FRAMES_MODEL = 8
     MAX_FRAMES_MODEL = 81
     def __init__(self) -> None:
         print("Loading models into memory. This may take a few minutes...")
+        # Pipeline i2v com dois transformadores (alto/baixo ruído)
         self.pipe = WanImageToVideoPipeline.from_pretrained(
             self.MODEL_ID,
             transformer=WanTransformer3DModel.from_pretrained(
             ),
             torch_dtype=torch.bfloat16,
         )
+        # Scheduler
         self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
             self.pipe.scheduler.config, shift=32.0
         )
+        # LoRA Lightning (fused)
         print("Applying 8-step Lightning LoRA...")
         try:
             self.pipe.load_lora_weights(
             print("Fusing LoRA weights into the main model...")
             self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
             self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
             self.pipe.unload_lora_weights()
             print("Lightning LoRA successfully fused. Model is ready for fast 8-step generation.")
         except Exception as e:
         print("All models loaded. Service is ready.")
+    # ===== Utils =====
     def process_image_for_video(self, image: Image.Image) -> Image.Image:
         width, height = image.size
         if width == height:
         left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
         return resized.crop((left, top, left + ref_width, top + ref_height))
+    # ===== API =====
     def generate_video_from_conditions(
         self,
+        images_condition_items: List[List[Any]],  # [[patch(Image), frame(int|str), peso(float)], ...]
         prompt: str,
         negative_prompt: Optional[str],
         duration_seconds: float,
         output_type: str = "np",
     ) -> Tuple[str, int]:
         """
+        - Primeiro item: image (peso fixo 1.0) no latente 0.
+        - Segundo item (opcional): handle em latente 4 com peso da lista.
+        - Último item: last no último latente com peso da lista.
         """
         if not images_condition_items or len(images_condition_items) < 2:
             raise ValueError("Forneça ao menos dois itens (início e fim).")
+        items = images_condition_items
+        # image (peso fixo 1.0)
+        start_image = items[0][0]
+        # handle (segundo item se houver)
+        handle_image = items[1][0] if len(items) >= 3 else None
+        handle_weight = float(items[1][2]) if len(items) >= 3 and items[1][2] is not None else 1.0
+        # last (sempre o último item)
+        end_image = items[-1][0]
+        end_weight = float(items[-1][2]) if len(items[-1]) >= 3 and items[-1][2] is not None else 1.0
         if start_image is None or end_image is None:
             raise ValueError("As imagens inicial e final não podem ser vazias.")
         if not isinstance(start_image, Image.Image) or not isinstance(end_image, Image.Image):
             raise TypeError("Os 'patches' devem ser PIL.Image.")
+        if handle_image is not None and not isinstance(handle_image, Image.Image):
+            raise TypeError("O 'patch' do handle deve ser PIL.Image.")
         processed_start = self.process_image_for_video(start_image)
         processed_end = self.resize_and_crop_to_match(end_image, processed_start)
+        processed_handle = self.resize_and_crop_to_match(handle_image, processed_start) if handle_image is not None else None
         target_height, target_width = processed_start.height, processed_start.width
         num_frames = int(round(duration_seconds * self.FIXED_FPS))
         generator = torch.Generator().manual_seed(current_seed)
         call_kwargs = dict(
+            image=processed_start,              # latente 0 (peso 1.0 implícito)
+            last_image=processed_end,           # último latente (peso ajustável)
             prompt=prompt,
             negative_prompt=negative_prompt if negative_prompt is not None else self.default_negative_prompt,
             height=target_height,
             output_type=output_type,
         )
         try:
+            if processed_handle is not None:
+                # handle no latente 4 com peso da lista; last no último com end_weight
+                result = self.pipe(
+                    **call_kwargs,
+                    handle_image=processed_handle,
+                    handle_weight=float(handle_weight),
+                    handle_latent_index=4,
+                    anchor_weight_last=float(end_weight),
+                )
+            else:
+                # sem handle; apenas peso do last
+                result = self.pipe(
+                    **call_kwargs,
+                    anchor_weight_last=float(end_weight),
+                )
         except TypeError:
+            print("[WanManager] handle/anchor args não suportados; usando chamada padrão.")
             result = self.pipe(**call_kwargs)
         frames = result.frames[0]
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
             video_path = tmp.name
         export_to_video(frames, video_path, fps=self.FIXED_FPS)
         return video_path, current_seed