Test4

Paused

App Files Files Community

eeuuia commited on Oct 11

Commit

b8a0748

verified ·

1 Parent(s): c8b13b1

Update managers/vae_manager.py

Browse files

Files changed (1) hide show

managers/vae_manager.py +73 -72

managers/vae_manager.py CHANGED Viewed

@@ -1,90 +1,91 @@
-# vae_manager.py — versão simples (beta 1.0)
-# Responsável por decodificar latentes (B,C,T,H,W) → pixels (B,C,T,H',W') em [0,1].
 import torch
 import contextlib
-import os
-import subprocess
-import sys
-from pathlib import Path
-from huggingface_hub import logging
-logging.set_verbosity_error()
-logging.set_verbosity_warning()
-logging.set_verbosity_info()
-logging.set_verbosity_debug()
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-if not LTX_VIDEO_REPO_DIR.exists():
-    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
-    run_setup()
-def add_deps_to_path():
-    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
-        sys.path.insert(0, repo_path)
-        print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
-add_deps_to_path()
-from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
 class _SimpleVAEManager:
-    def __init__(self, pipeline=None, device=None, autocast_dtype=torch.float32):
-        """
-        pipeline: objeto do LTX que expõe decode_latents(...) ou .vae.decode(...)
-        device: "cuda" ou "cpu" onde a decodificação deve ocorrer
-        autocast_dtype: dtype de autocast quando em CUDA (bf16/fp16/fp32)
-        """
-        self.pipeline = pipeline
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        self.autocast_dtype = autocast_dtype
     def attach_pipeline(self, pipeline, device=None, autocast_dtype=None):
         self.pipeline = pipeline
         if device is not None:
-            self.device = device
         if autocast_dtype is not None:
             self.autocast_dtype = autocast_dtype
     @torch.no_grad()
     def decode(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
-        # Garante device e dtype conforme runtime
-        latent_tensor_gpu = latent_tensor.to(self.device, dtype=self.autocast_dtype if self.device == "cuda" else latent_tensor.dtype)
-        # Constrói o vetor de timesteps (um por item no batch B)
-        num_items_in_batch = latent_tensor_gpu.shape[0]
-        timestep_tensor = torch.tensor([decode_timestep] * num_items_in_batch, device=self.device, dtype=latent_tensor_gpu.dtype)
-        ctx = torch.autocast(device_type="cuda", dtype=self.autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
         with ctx:
-            pixels = vae_decode(
-                latent_tensor_gpu,
-                self.pipeline.vae if hasattr(self.pipeline, "vae") else self.pipeline,  # compat
-                is_video=True,
-                timestep=timestep_tensor,
-                vae_per_channel_normalize=True,
-            )
-        # Normaliza para [0,1] se vier em [-1,1]
-        if pixels.min() < 0:
-            pixels = (pixels.clamp(-1, 1) + 1.0) / 2.0
-        else:
-            pixels = pixels.clamp(0, 1)
-        return pixels
-# Singleton global de uso simples
-vae_manager_singleton = _SimpleVAEManager()

+# FILE: managers/vae_manager.py
+# DESCRIPTION: Singleton manager for VAE decoding operations, supporting dedicated GPU devices.
 import torch
 import contextlib
+import logging
 class _SimpleVAEManager:
+    """
+    Manages VAE decoding. It's designed to be aware that the VAE might reside
+    on a different GPU than the main generation pipeline (e.g., Transformer).
+    """
+    def __init__(self):
+        """Initializes the manager without a pipeline attached."""
+        self.pipeline = None
+        self.device = torch.device("cpu") # Defaults to CPU until a device is attached.
+        self.autocast_dtype = torch.float32
     def attach_pipeline(self, pipeline, device=None, autocast_dtype=None):
+        """
+        Attaches the main pipeline and, crucially, stores the specific device
+        that this manager and its associated VAE should operate on.
+        Args:
+            pipeline: The main LTX video pipeline instance.
+            device (torch.device or str): The target device for VAE operations (e.g., 'cuda:1').
+            autocast_dtype (torch.dtype): The precision for torch.autocast.
+        """
         self.pipeline = pipeline
         if device is not None:
+            self.device = torch.device(device)
+            logging.info(f"[VAEManager] VAE device successfully set to: {self.device}")
         if autocast_dtype is not None:
             self.autocast_dtype = autocast_dtype
     @torch.no_grad()
     def decode(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
+        """
+        Decodes a latent tensor into a pixel tensor.
+        This method ensures that the decoding operation happens on the correct,
+        potentially dedicated, VAE device.
+        Args:
+            latent_tensor (torch.Tensor): The latents to decode, typically on the main device or CPU.
+            decode_timestep (float): The timestep for VAE decoding.
+        Returns:
+            torch.Tensor: The resulting pixel tensor, moved to the CPU for general use.
+        """
+        if self.pipeline is None:
+            raise RuntimeError("VAEManager: No pipeline has been attached. Call attach_pipeline() first.")
+        if not hasattr(self.pipeline, 'vae'):
+            raise AttributeError("VAEManager: The attached pipeline does not have a 'vae' attribute.")
+        # 1. Move the input latents to the dedicated VAE device. This is the critical step.
+        logging.debug(f"[VAEManager] Moving latents from {latent_tensor.device} to VAE device {self.device} for decoding.")
+        latent_tensor_on_vae_device = latent_tensor.to(self.device)
+        # 2. Get a reference to the VAE model (which is already on the correct device).
+        vae = self.pipeline.vae
+        # 3. Prepare other necessary tensors on the same VAE device.
+        num_items_in_batch = latent_tensor_on_vae_device.shape[0]
+        timestep_tensor = torch.tensor([decode_timestep] * num_items_in_batch, device=self.device)
+        # 4. Set up the autocast context for the target device type.
+        autocast_device_type = self.device.type
+        ctx = torch.autocast(
+            device_type=autocast_device_type,
+            dtype=self.autocast_dtype,
+            enabled=(autocast_device_type == 'cuda')
+        )
+        # 5. Perform the decoding operation within the autocast context.
         with ctx:
+            logging.debug(f"[VAEManager] Decoding latents with shape {latent_tensor_on_vae_device.shape} on {self.device}.")
+            # The VAE expects latents scaled by its scaling factor.
+            scaled_latents = latent_tensor_on_vae_device / vae.config.scaling_factor
+            pixels = vae.decode(scaled_latents, timesteps=timestep_tensor).sample
+        # 6. Post-process the output: normalize to [0, 1] range.
+        pixels = (pixels.clamp(-1, 1) + 1.0) / 2.0
+        # 7. Move the final pixel tensor to the CPU. This is a safe default, as subsequent
+        #    operations like video saving or UI display typically expect CPU tensors.
+        logging.debug(f"[VAEManager] Decoding complete. Moving pixel tensor to CPU.")
+        return pixels.cpu()
+# Create a single, global instance of the manager to be used throughout the application.
+vae_manager_singleton = _SimpleVAEManager()