Multicentury-HTR-Demo

Sleeping

App Files Files Community

MikkoLipsanen commited on 21 days ago

Commit

c99fa8d

verified ·

1 Parent(s): f097c5b

Update segmentation to use rfdetr model

Browse files

Files changed (1) hide show

segment_image.py +456 -316

segment_image.py CHANGED Viewed

@@ -1,344 +1,484 @@
-from huggingface_hub import hf_hub_download
 from shapely.validation import make_valid
 from shapely.geometry import Polygon
-from ultralytics import YOLO
-from PIL import Image
 import numpy as np
 import os
-from reading_order import OrderPolygons
 class SegmentImage:
-    """Class for segmenting document image regions and text lines."""
     def __init__(self,
-                line_model_path,
-                device,
-                line_iou=0.5,
-                region_iou=0.5,
-                line_overlap=0.5,
-                line_nms_iou=0.7,
-                region_nms_iou=0.3,
-                line_conf_threshold=0.25,
-                region_conf_threshold=0.25,
-                region_model_path=None,
-                order_regions=True,
-                region_half_precision=False,
-                line_half_precision=False):
-        # Path to text line detection model
-        self.line_model_path = line_model_path
-        # Path to text region detection model
-        self.region_model_path = region_model_path
-        # Defines the IoU threshold used in the non-maximum suppression (NMS) process to
-        # determine which prediction boxes should be suppressed or discarded based on their overlap with other boxes
-        self.line_nms_iou = line_nms_iou
-        self.region_nms_iou = region_nms_iou
-        # Defines the IoU threshold for text lines
         self.line_iou = line_iou
-        # Defines the IoU threshold for text regions
         self.region_iou = region_iou
-        # Defines the extent of line polygon overlap used for merging the polygons
-        self.line_overlap = line_overlap
-        # Defines confidence threshold for line detection
-        self.line_conf_threshold = line_conf_threshold
-        # Defines confidence threshold for region detection
-        self.region_conf_threshold = region_conf_threshold
-        # Defines the device to be used ('cpu', gpu '0', gpu '1' etc.)
-        self.device = device
-        # Defines whether a reading order is also estimated for the region detections
-        self.order_regions = order_regions
-        # Defines whether half precision (FP16) is used by the region and line prediction models
-        self.region_half_precision = region_half_precision
-        self.line_half_precision = line_half_precision
-        self.order_poly = OrderPolygons()
-        # Initialize segmentation model(s)
-        self.line_model = self.init_line_model()
-        if self.region_model_path:
-            self.region_model = self.init_region_model()
-    def init_line_model(self):
-        """Function for initializing the line detection model."""
-        try:
-            # Load the trained line detection model
-            cached_model_path = hf_hub_download(repo_id=self.line_model_path, filename="lines_20240827.pt")
-            line_model = YOLO(cached_model_path)
-            return line_model
-        except Exception as e:
-            print('Failed to load the line detection model: %s' % e)
-    def init_region_model(self):
-        """Function for initializing the region detection model."""
         try:
-            # Load the trained line detection model
-            cached_model_path = hf_hub_download(repo_id=self.region_model_path, filename="tuomiokirja_regions_04122023.pt")
-            region_model = YOLO(cached_model_path)
-            return region_model
         except Exception as e:
-            print('Failed to load the region detection model: %s' % e)
-    def get_region_ids(self, coords, max_min, classes, names, box_confs, img_shape):
-        """Function for creating unique id for each detected region."""
-        n = min(len(classes), len(coords))
-        res = []
-        for i in range(n):
-            # Creates a simple index-based id for each region
-            region_id = str(i)
-            # Extracts region name corresponding to the index
-            region_type = names[classes[i]]
-            poly_dict = {'coords': coords[i],
-                        'max_min': max_min[i],
-                        'class': str(classes[i]),
-                        'name': region_type,
-                        'conf': box_confs[i],
-                        'id': region_id,
-                        'img_shape': img_shape}
-            res.append(poly_dict)
-        return res
-    def get_max_min(self, polygons):
-        """Creates an array with the minimum and maximum
-        x and y values of the input polygons."""
-        n_rows = len(polygons)
-        xy_array = np.zeros([n_rows, 4])
-        for i, poly in enumerate(polygons):
-            x = [point[0] for point in poly]
-            y = [point[1] for point in poly]
-            if x:
-                xy_array[i,0] = max(x)
-                xy_array[i,1] = min(x)
-            if y:
-                xy_array[i,2] = max(y)
-                xy_array[i,3] = min(y)
-        return xy_array
-    def validate_polygon(self, polygon):
-        """"Function for testing and correcting the validity of polygons."""
         if len(polygon) > 2:
-            polygon = Polygon(polygon)
-            if not polygon.is_valid:
-                polygon = make_valid(polygon)
-            return polygon
         else:
             return None
-    def get_iou(self, poly1, poly2):
-        """Function for calculating Intersection over Union (IoU) values."""
-        # If the polygons don't intersect, IoU is 0
-        iou = 0
-        poly1 = self.validate_polygon(poly1)
-        poly2 = self.validate_polygon(poly2)
-        if poly1 and poly2:
-            if poly1.intersects(poly2):
-                # Calculates intersection of the 2 polygons
-                intersect = poly1.intersection(poly2).area
-                # Calculates union of the 2 polygons
-                uni = poly1.union(poly2)
-                # Calculates intersection over union
-                iou = intersect / uni.area
-        return iou
-    def merge_polygons(self, polygons, iou_threshold, overlap_threshold = None):
-        """Merges polygons that have an IoU value
-        above the given threshold."""
-        new_polygons = []
-        dropped = set()
-        # Loops over all input polygons and merges them if the
-        # IoU value is over the given threshold
-        for i in range(0, len(polygons)):
-            poly1 = self.validate_polygon(polygons[i])
-            merged = None
-            for j in range(i+1, len(polygons)):
-                poly2 = self.validate_polygon(polygons[j])
-                if poly1 and poly2:
-                    if poly1.intersects(poly2):
-                        overlap = False
-                        intersect = poly1.intersection(poly2)
-                        uni = poly1.union(poly2)
-                        # Calculates intersection over union
-                        iou = intersect.area / uni.area
-                        if overlap_threshold:
-                            overlap = intersect.area > (overlap_threshold * min(poly1.area, poly2.area))
-                        if (iou > iou_threshold) or overlap:
-                            if merged:
-                                # If there are multiple overlapping polygons
-                                # with IoU over the threshold, they are all merged together
-                                merged = uni.union(merged)
-                                dropped.add(j)
-                            else:
-                                merged = uni
-                                # Polygons that are merged together are dropped from
-                                # the list
-                                dropped.add(i)
-                                dropped.add(j)
-            if merged:
-                if merged.geom_type in ['GeometryCollection','MultiPolygon']:
-                    for geom in merged.geoms:
                         if geom.geom_type == 'Polygon':
-                            new_polygons.append(list(geom.exterior.coords))
-                elif merged.geom_type == 'Polygon':
-                    new_polygons.append(list(merged.exterior.coords))
-        res = [i for j, i in enumerate(polygons) if j not in dropped]
-        res += new_polygons
-        return res
-    def get_region_preds(self, img):
-        """Function for predicting text region coordinates."""
-        results = self.region_model.predict(source=img,
-                                            device=self.device,
-                                            conf=self.region_conf_threshold,
-                                            half=bool(self.region_half_precision),
-                                            iou=self.region_nms_iou)
-        results = results[0].cpu()
-        if results.masks:
-            # Extracts detected region polygons
-            coords = results.masks.xy
-            # Merge overlapping polygons
-            coords = self.merge_polygons(coords, self.region_iou)
-            # Maximum and minimum x and y axis values for detected polygons used for ordering the polygons
-            max_min = self.get_max_min(coords).tolist()
-            # Gets a list of the predicted class labels for detected regions
-            classes = results.boxes.cls.tolist()
-            # A dictionary with class ids as keys and class names as values
-            names = results.names
-            # Confidence values for detections
-            box_confs = results.boxes.conf.tolist()
-            # A tuple containing the shape of the original image
-            img_shape = results.orig_shape
-            res = self.get_region_ids(list(coords), max_min, classes, names, box_confs, img_shape)
-            return res
         else:
             return None
-    def get_line_preds(self, img):
-        """Function for predicting text line coordinates."""
-        results = self.line_model.predict(source=img,
-                                          device=self.device,
-                                          conf=self.line_conf_threshold,
-                                          half=bool(self.line_half_precision),
-                                          iou=self.line_nms_iou)
-        results = results[0].cpu()
-        if results.masks:
-            # Detected text line polygons
-            coords = results.masks.xy
-            # Merge overlapping polygons
-            coords = self.merge_polygons(coords, self.line_iou, self.line_overlap)
-            # Maximum and minimum x and y axis values for detected polygons
-            max_min = self.get_max_min(coords).tolist()
-            # Confidence values for detections
-            box_confs = results.boxes.conf.tolist()
-            res_dict = {'coords': list(coords), 'max_min': max_min, 'confs': box_confs}
-            return res_dict
-        else:
             return None
-    def get_dist(self, line_polygon, regions):
-        """Function for finding the closest region to the text line."""
-        dist, reg_id = 1000000, None
-        line_polygon = self.validate_polygon(line_polygon)
-        if line_polygon:
-            for region in regions:
-                # Calculates dictance between line and regions polygons
-                region_polygon = self.validate_polygon(region['coords'])
-                if region_polygon:
-                    line_reg_dist = line_polygon.distance(region_polygon)
-                    if line_reg_dist < dist:
-                        dist = line_reg_dist
-                        reg_id = region['id']
-        return reg_id
-    def get_line_regions(self, lines, regions):
-        """Function for connecting each text line to one region."""
-        lines_list = []
-        for i in range(len(lines['coords'])):
-            iou, reg_id, conf = 0, '', 0.0
-            max_min = [0.0, 0.0, 0.0, 0.0]
-            polygon = lines['coords'][i]
-            for region in regions:
-                line_reg_iou = self.get_iou(polygon, region['coords'])
-                if line_reg_iou > iou:
-                    iou = line_reg_iou
-                    reg_id = region['id']
-            # If line polygon does not intersect with any region, a distance metric is used for defining
-            # the region that the line belongs to
-            if iou == 0:
-                reg_id = self.get_dist(polygon, regions)
-            if (len(lines['max_min']) - 1) >= i:
-                max_min = lines['max_min'][i]
-            if (len(lines['confs']) - 1) >= i:
-                conf = lines['confs'][i]
-            new_line = {'polygon': polygon, 'reg_id': reg_id, 'max_min': max_min, 'conf': conf}
-            lines_list.append(new_line)
-        return lines_list
-    def order_regions_lines(self, lines, regions):
-        """Function for ordering line predictions inside each region."""
-        regions_with_rows = []
-        region_max_mins = []
-        for i, region in enumerate(regions):
-            line_max_mins = []
-            line_confs = []
-            line_polygons = []
-            for line in lines:
-                if line['reg_id'] == region['id']:
-                    line_max_mins.append(line['max_min'])
-                    line_confs.append(line['conf'])
-                    line_polygons.append(line['polygon'])
-            if line_polygons:
-                # If one or more lines are connected to a region, line order inside the region is defined
-                # and the predicted text lines are joined in the same python dict
-                line_order = self.order_poly.order(line_max_mins)
-                line_polygons = [line_polygons[i] for i in line_order]
-                line_confs = [line_confs[i] for i in line_order]
-                new_region = {'region_coords': region['coords'],
-                            'region_name': region['name'],
-                            'lines': line_polygons,
-                            'line_confs': line_confs,
-                            'region_conf': region['conf'],
-                            'img_shape': region['img_shape']}
-                region_max_mins.append(region['max_min'])
-                regions_with_rows.append(new_region)
-            else:
-                continue
-        # Creates an ordering of the detected regions based on their polygon coordinates
-        if self.order_regions:
-            region_order = self.order_poly.order(region_max_mins)
-            regions_with_rows = [regions_with_rows[i] for i in region_order]
-        return regions_with_rows
-    def get_default_region(self, image):
-        """Function for creating a default region if no regions are detected."""
-        w, h = image.size
-        region = {'coords': [[0.0, 0.0], [w, 0.0], [w, h], [0.0, h]],
-                        'max_min': [w, 0.0, h, 0.0],
-                        'class': '0',
-                        'name': "paragraph",
-                        'conf': 0.0,
-                        'id': '0',
-                        'img_shape': (h, w)}
-        return [region]
-    def get_segmentation(self, image):
-        """Segment input image into ordered text lines or ordered text regions and text lines."""
-        line_preds = self.get_line_preds(image)
-        if line_preds:
-            # If region detection model is defined, text regions and text lines are detected
-            region_preds = self.get_region_preds(image)
-            if not region_preds:
-                region_preds = self.get_default_region(image)
-                print(f'No regions detected from image {image}')
-            lines_with_regions = self.get_line_regions(line_preds, region_preds)
-            ordered_regions = self.order_regions_lines(lines_with_regions, region_preds)
-            return ordered_regions
         else:
-            print(f'No text lines detected from image {image}')
-            return None

+from typing import List, Tuple, Optional, Dict, Any
 from shapely.validation import make_valid
 from shapely.geometry import Polygon
+from rfdetr import RFDETRSegPreview
+from collections import defaultdict
 import numpy as np
+import cv2
 import os
+from image_processing import (
+    load_with_torchvision,
+    preprocess_resize_torch_transform,
+    upscale_bbox,
+    upscale_mask_opencv,
+    crop_line
+)
+from utils import get_default_region, get_line_regions, order_regions_lines
 class SegmentImage:
+    """
+    Document image segmentation for detecting text regions and lines.
+    Uses an RFDETR segmentation model to detect and extract text regions and lines
+    from document images. Includes polygon merging, validation, and ordering.
+    Args:
+        model_path: Path to the RFDETR segmentation model weights
+        max_size: Maximum dimension (height or width) for image preprocessing (default: 768)
+        confidence_threshold: Minimum confidence score for detections (default: 0.15, range: 0-1)
+        line_percentage_threshold: Minimum polygon area as fraction of image area for lines
+                                   (default: 7e-05, i.e., 0.007% of image)
+        region_percentage_threshold: Minimum polygon area as fraction of image area for regions
+                                     (default: 7e-05, i.e., 0.007% of image)
+        line_iou: IoU threshold for merging overlapping line polygons (default: 0.3, range: 0-1)
+        region_iou: IoU threshold for merging overlapping region polygons (default: 0.3, range: 0-1)
+        line_overlap_threshold: Area overlap ratio threshold for merging lines (default: 0.5, range: 0-1)
+        region_overlap_threshold: Area overlap ratio threshold for merging regions (default: 0.5, range: 0-1)
+        class_id_region: Class ID constant for identifying regions in segmentation model output
+        class_id_line: Class ID constant for identifying lines in segmentation model output
+        min_polygon_points: Minimum number of points to form a valid polygon
+    """
     def __init__(self,
+                model_path: str,
+                max_size: int = 768,
+                confidence_threshold: float = 0.15,
+                line_percentage_threshold: float = 7e-05,
+                region_percentage_threshold: float = 7e-05,
+                line_iou: float = 0.3,
+                region_iou: float = 0.3,
+                line_overlap_threshold: float = 0.5,
+                region_overlap_threshold: float = 0.5,
+                class_id_region: int = 1,
+                class_id_line: int = 2,
+                min_polygon_points: int = 3):
+        self.model_path = model_path
+        self.max_size = max_size
+        self.confidence_threshold = confidence_threshold
+        self.line_percentage_threshold = line_percentage_threshold
+        self.region_percentage_threshold = region_percentage_threshold
         self.line_iou = line_iou
         self.region_iou = region_iou
+        self.line_overlap_threshold = line_overlap_threshold
+        self.region_overlap_threshold = region_overlap_threshold
+        self.class_id_region = class_id_region
+        self.class_id_line = class_id_line
+        self.min_polygon_points = min_polygon_points
+        # Validate model path
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"Model path does not exist: {self.model_path}")
+        self.init_model()
+    def init_model(self) -> None:
+        """
+        Load and optimize an RFDETR segmentation model for inference.
+        Raises:
+            Exception: If model initialization fails
+        """
         try:
+            self.model = RFDETRSegPreview(pretrain_weights=self.model_path)
+            self.model.optimize_for_inference()
+            print(f"✓ Segmentation model initialized successfully")
         except Exception as e:
+            raise RuntimeError(f'Failed to initialize segmentation model: {e}')
+    def validate_polygon(self, polygon: np.ndarray) -> Optional[Polygon]:
+        """
+        Test and correct the validity of a polygon using Shapely.
+        Converts numpy array to Shapely Polygon, validates it, and attempts
+        to fix invalid geometries using make_valid().
+        Args:
+            polygon: Array of polygon coordinates with shape (N, 2)
+        Returns:
+            Valid Shapely Polygon object, or None if polygon has fewer than 3 points
+        """
         if len(polygon) > 2:
+            try:
+                shapely_polygon = Polygon(polygon)
+                if not shapely_polygon.is_valid:
+                    shapely_polygon = make_valid(shapely_polygon)
+                return shapely_polygon
+            except Exception as e:
+                print(f"Warning: Failed to validate polygon: {e}")
+                return None
         else:
             return None
+    def merge_polygons(self,
+                      polygons: List[np.ndarray],
+                      polygon_iou: float,
+                      overlap_threshold: float) -> Tuple[List[np.ndarray], List[int]]:
+        """
+        Merge overlapping polygons using connected components (union-find algorithm).
+        Uses IoU (Intersection over Union) and area overlap ratio to determine which
+        polygons should be merged. Implements union-find to group connected components
+        of overlapping polygons, then merges each component into a single polygon.
+        Args:
+            polygons: List of polygon coordinate arrays, each with shape (N, 2)
+            polygon_iou: IoU threshold for merging (0-1)
+            overlap_threshold: Minimum area overlap ratio for merging (0-1)
+        Returns:
+            Tuple of:
+                - merged_polygons: List of merged polygon coordinate arrays
+                - polygon_mapping: List mapping each input polygon index to its output
+                                  polygon index (-1 if invalid/skipped)
+        """
+        n = len(polygons)
+        if n == 0:
+            return [], []
+        # Validate all polygons
+        validated = [self.validate_polygon(p) for p in polygons]
+        # Build adjacency graph of overlapping polygons
+        parent = list(range(n))
+        def find(x: int) -> int:
+            """Find root of element x with path compression."""
+            if parent[x] != x:
+                parent[x] = find(parent[x])
+            return parent[x]
+        def union(x: int, y: int) -> None:
+            """Union two sets containing x and y."""
+            px, py = find(x), find(y)
+            if px != py:
+                parent[px] = py
+        # Build adjacency graph by checking all pairs for overlap
+        for i in range(n):
+            poly1 = validated[i]
+            if not poly1:
+                continue
+            for j in range(i + 1, n):
+                poly2 = validated[j]
+                if not poly2 or not poly1.intersects(poly2):
+                    continue
+                # Calculate intersection and union for IoU
+                intersection = poly1.intersection(poly2)
+                union_geom = poly1.union(poly2)
+                iou = intersection.area / union_geom.area if union_geom.area > 0 else 0
+                # Check merge criteria
+                should_merge = iou > polygon_iou
+                # If IoU threshold not met, check area overlap ratio
+                if not should_merge and overlap_threshold > 0:
+                    smaller_area = min(poly1.area, poly2.area)
+                    overlap_ratio = intersection.area / smaller_area if smaller_area > 0 else 0
+                    should_merge = overlap_ratio > overlap_threshold
+                # Merge polygons by updating union-find structure
+                if should_merge:
+                    union(i, j)
+        # Group polygons by their connected component
+        components = defaultdict(list)
+        for i in range(n):
+            if validated[i]:
+                root = find(i)
+                components[root].append(i)
+        # Merge each connected component
+        merged_polygons = []
+        polygon_mapping = [-1] * n  # -1 indicates invalid/unmapped polygon
+        for root, indices in components.items():
+            output_idx = len(merged_polygons)
+            if len(indices) == 1:
+                # Single polygon, no merging needed
+                idx = indices[0]
+                merged_polygons.append(polygons[idx])
+                polygon_mapping[idx] = output_idx
+            else:
+                # Merge all polygons in this component using Shapely union
+                merged = validated[indices[0]]
+                for idx in indices[1:]:
+                    merged = merged.union(validated[idx])
+                # Extract polygon coordinates from merged geometry
+                if merged.geom_type == 'Polygon':
+                    # Single polygon result
+                    merged_polygons.append(
+                        np.array(merged.exterior.coords).astype(np.int32)
+                    )
+                    for idx in indices:
+                        polygon_mapping[idx] = output_idx
+                elif merged.geom_type in ['MultiPolygon', 'GeometryCollection']:
+                    # Multiple polygons resulted from merge (e.g., touching at single point)
+                    for geom in merged.geoms:
                         if geom.geom_type == 'Polygon':
+                            merged_polygons.append(
+                                np.array(geom.exterior.coords).astype(np.int32)
+                            )
+                    # Map all source polygons to first output polygon
+                    for idx in indices:
+                        polygon_mapping[idx] = output_idx
+        return merged_polygons, polygon_mapping
+    def calculate_polygon_area(self, vertices: np.ndarray) -> float:
+        """
+        Calculate polygon area using the Shoelace formula (surveyor's formula).
+        Computes area using coordinate cross products. Works for simple polygons
+        (non-self-intersecting) regardless of vertex ordering.
+        Args:
+            vertices: Array of polygon coordinates with shape (N, 2)
+        Returns:
+            Area of the polygon in square pixels
+        """
+        x = vertices[:, 0]
+        y = vertices[:, 1]
+        # Shoelace formula implementation using array operations
+        area = 0.5 * np.abs(np.sum(x[:-1] * y[1:]) - np.sum(y[:-1] * x[1:]) + x[-1] * y[0] - y[-1] * x[0])
+        return area
+    def mask_to_polygon_cv2(self,
+                           mask: np.ndarray,
+                           original_shape: Tuple[int, int]) -> Tuple[List[np.ndarray], np.ndarray]:
+        """
+        Convert binary segmentation mask to polygon coordinates using OpenCV contours.
+        Extracts contours from mask, converts them to polygons, and scales coordinates
+        back to original image dimensions. Also calculates area percentages for filtering.
+        Args:
+            mask: Binary mask as numpy array (bool or uint8, 0-255)
+            original_shape: Tuple of (height, width) of original image
+        Returns:
+            Tuple of:
+                - scaled_polygons: List of polygon coordinate arrays scaled to original size
+                - area_percentages: Array of polygon areas as fraction of mask size
+        """
+        # Ensure mask is uint8
+        if mask.dtype == bool:
+            mask_uint8 = mask.astype(np.uint8) * 255
         else:
+            mask_uint8 = mask.astype(np.uint8)
+        # Find external contours (only outer boundaries)
+        contours, _ = cv2.findContours(
+            mask_uint8,
+            cv2.RETR_EXTERNAL,
+            cv2.CHAIN_APPROX_SIMPLE
+        )
+        # Convert contours to polygons (filter out degenerate contours)
+        polygons = [
+            contour.squeeze()
+            for contour in contours
+            if len(contour) >= self.min_polygon_points
+        ]
+        # Calculate scaling factors from mask to original image
+        orig_height, orig_width = original_shape
+        mask_height, mask_width = mask.shape[:2]
+        scale_x = orig_width / mask_width
+        scale_y = orig_height / mask_height
+        # Scale polygons and calculate areas
+        scaled_polygons = []
+        area_percentages = []
+        mask_area = mask_height * mask_width
+        for poly in polygons:
+            # Calculate area on mask coordinates (before scaling)
+            area = self.calculate_polygon_area(
+                poly if len(poly.shape) > 1 else poly.reshape(1, -1)
+            )
+            area_percentage = area / mask_area if mask_area > 0 else 0
+            area_percentages.append(area_percentage)
+            # Scale polygon coordinates to original image size
+            if len(poly.shape) == 1:  # Single point edge case
+                scaled_poly = np.round(poly * np.array([scale_x, scale_y])).astype(int)
+            else:  # Normal case with multiple points
+                scaled_poly = np.round(poly * np.array([scale_x, scale_y])).astype(int)
+            scaled_polygons.append(scaled_poly)
+        return scaled_polygons, np.array(area_percentages)
+    def process_polygons(self,
+                        poly_masks: np.ndarray,
+                        image_shape: Tuple[int, int],
+                        percentage_threshold: float,
+                        overlap_threshold: float,
+                        iou_threshold: float) -> Tuple[List[np.ndarray], List[Tuple[int, int, int, int]]]:
+        """
+        Extract polygons from segmentation masks, filter by area, and merge overlapping ones.
+        Converts masks to polygons, filters out small detections based on area percentage,
+        and merges overlapping polygons based on IoU and overlap criteria.
+        Args:
+            poly_masks: Array of binary segmentation masks from model
+            image_shape: Tuple of (height, width) of original image
+            percentage_threshold: Minimum polygon area as fraction of image
+            overlap_threshold: Minimum overlap ratio for merging polygons
+            iou_threshold: Minimum IoU for merging polygons
+        Returns:
+            Tuple of:
+                - merged_polygons: List of polygon coordinate arrays
+                - merged_max_mins: List of bounding boxes as (xmin, ymin, xmax, ymax) tuples
+        """
+        all_polygons = []
+        all_area_percentages = []
+        # Extract polygons from all masks
+        for mask in poly_masks:
+            polygons, area_percentages = self.mask_to_polygon_cv2(
+                mask=mask,
+                original_shape=image_shape
+            )
+            all_polygons.extend(polygons)
+            all_area_percentages.extend(area_percentages)
+        all_area_percentages = np.array(all_area_percentages)
+        # Filter polygons by minimum area threshold
+        if len(all_area_percentages) == 0:
+            return [], []
+        valid_indices = np.where(all_area_percentages > percentage_threshold)[0]
+        filtered_polygons = [all_polygons[idx] for idx in valid_indices]
+        if not filtered_polygons:
+            return [], []
+        # Merge overlapping polygons
+        merged_polygons, _ = self.merge_polygons(
+            filtered_polygons,
+            iou_threshold,
+            overlap_threshold
+        )
+        # Calculate bounding boxes for merged polygons
+        merged_max_mins = []
+        for poly in merged_polygons:
+            if len(poly) > 0:
+                xmax, ymax = np.max(poly, axis=0)
+                xmin, ymin = np.min(poly, axis=0)
+                merged_max_mins.append((xmin, ymin, xmax, ymax))
+        return merged_polygons, merged_max_mins
+    def get_segmentation(self, image) -> Optional[List[Dict[str, Any]]]:
+        """
+        Detect and extract ordered text lines and regions from a document image.
+        Runs the segmentation model on the image, extracts line and region polygons,
+        merges overlapping detections, associates lines with regions, and orders them
+        for reading sequence.
+        Args:
+            image: PIL Image object in any mode (will be converted to RGB)
+        Returns:
+            List of ordered line dictionaries with region associations, or None if
+            no lines were detected. Each line dict contains coordinates, region ID,
+            and other metadata.
+        """
+        image_shape = (image.shape[0], image.shape[1])
+        # Preprocess image (resize for model input)
+        preprocessed_image = preprocess_resize_torch_transform(
+            image,
+            max_size=self.max_size
+        )
+        # Run segmentation model
+        try:
+            detections = self.model.predict(
+                preprocessed_image,
+                threshold=self.confidence_threshold
+            )
+        except Exception as e:
+            print(f"Error during segmentation prediction: {e}")
             return None
+        # Separate line and region masks by class ID
+        line_mask = detections.mask[detections.class_id == self.class_id_line]
+        region_mask = detections.mask[detections.class_id ==  self.class_id_region]
+        # Process line polygons
+        merged_line_polygons, merged_line_max_mins = self.process_polygons(
+            line_mask,
+            image_shape,
+            self.line_percentage_threshold,
+            self.line_overlap_threshold,
+            self.line_iou
+        )
+        # Process region polygons
+        merged_region_polygons, merged_region_max_mins = self.process_polygons(
+            region_mask,
+            image_shape,
+            self.region_percentage_threshold,
+            self.region_overlap_threshold,
+            self.region_iou
+        )
+        # If no lines detected, return None
+        if not merged_line_polygons:
+            print('No text lines detected from image.')
             return None
+        # Prepare line predictions dictionary
+        line_preds = {
+            'coords': merged_line_polygons,
+            'max_min': merged_line_max_mins
+        }
+        # Prepare region predictions (or use default if none detected)
+        if merged_region_polygons:
+            region_preds = []
+            for num, (region_polygon, region_max_min) in enumerate(
+                zip(merged_region_polygons, merged_region_max_mins)
+            ):
+                region_preds.append({
+                    'coords': region_polygon,
+                    'id': str(num),
+                    'max_min': region_max_min,
+                    'name': 'paragraph',
+                    'img_shape': image_shape
+                })
         else:
+            # No regions detected, create default region covering entire image
+            region_preds = get_default_region(image_shape=image_shape)
+        # Associate lines with their containing regions
+        lines_connected_to_regions = get_line_regions(
+            lines=line_preds,
+            regions=region_preds
+        )
+        # Order lines within regions for proper reading sequence
+        ordered_lines = order_regions_lines(
+            lines=lines_connected_to_regions,
+            regions=region_preds
+        )
+        return ordered_lines