jbilcke-hf
/

LTX-Video-0.9.1-HFIE

@@ -69,6 +69,70 @@ apply_dirty_hack_to_patch_file_extensions_and_bypass_filter("/repository")
 #logger.info("💡 Printing directory structure of ""/repository"":")
 #print_directory_structure("/repository")
 @dataclass
 class GenerationConfig:
     """Configuration for video generation"""
@@ -339,12 +403,12 @@ class EndpointHandler:
                 # Check if image-to-video generation is requested
                 if input_image:
-                    # Process base64 image
-                    if input_image.startswith('data:'):
-                        input_image = input_image.split(',', 1)[1]
-                    image_bytes = base64.b64decode(input_image)
-                    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-                    generation_kwargs["image"] = image
                     frames = self.image_to_video(**generation_kwargs).frames
                 else:
                     frames = self.text_to_video(**generation_kwargs).frames

 #logger.info("💡 Printing directory structure of ""/repository"":")
 #print_directory_structure("/repository")
+def process_input_image(image_data: str, target_width: int, target_height: int) -> Image.Image:
+    """
+    Process input image from base64, resize and crop to target dimensions
+    Args:
+        image_data: Base64 encoded image data
+        target_width: Desired width
+        target_height: Desired height
+    Returns:
+        Processed PIL Image
+    """
+    try:
+        # Handle data URI format
+        if image_data.startswith('data:'):
+            image_data = image_data.split(',', 1)[1]
+        # Decode base64
+        image_bytes = base64.b64decode(image_data)
+        image = Image.open(io.BytesIO(image_bytes))
+        # Convert to RGB if necessary
+        if image.mode not in ('RGB', 'RGBA'):
+            image = image.convert('RGB')
+        elif image.mode == 'RGBA':
+            # Handle transparency by compositing on white background
+            background = Image.new('RGB', image.size, (255, 255, 255))
+            background.paste(image, mask=image.split()[3])
+            image = background
+        # Calculate target aspect ratio
+        target_aspect = target_width / target_height
+        # Get current dimensions
+        orig_width, orig_height = image.size
+        orig_aspect = orig_width / orig_height
+        # Calculate dimensions for resizing
+        if orig_aspect > target_aspect:
+            # Image is wider than target
+            new_height = target_height
+            new_width = int(target_height * orig_aspect)
+        else:
+            # Image is taller than target
+            new_width = target_width
+            new_height = int(target_width / orig_aspect)
+        # Resize image
+        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+        # Center crop to target dimensions
+        left = (new_width - target_width) // 2
+        top = (new_height - target_height) // 2
+        right = left + target_width
+        bottom = top + target_height
+        image = image.crop((left, top, right, bottom))
+        return image
+    except Exception as e:
+        raise ValueError(f"Failed to process input image: {str(e)}")
 @dataclass
 class GenerationConfig:
     """Configuration for video generation"""
                 # Check if image-to-video generation is requested
                 if input_image:
+                    processed_image = process_input_image(
+                        input_image,
+                        config.width,
+                        config.height
+                    )
+                    generation_kwargs["image"] = processed_image
                     frames = self.image_to_video(**generation_kwargs).frames
                 else:
                     frames = self.text_to_video(**generation_kwargs).frames