Improves 3D wireframe prediction and extraction

Refactors the wireframe prediction pipeline to improve the
accuracy and robustness of 3D wireframe extraction from images.

This involves:
- Incorporating camera intrinsics (K), rotation (R), and
translation (t) matrices for more accurate point projections.
- Implementing depth fitting and sparse depth retrieval for
improved depth estimation.
- Adding a mechanism to filter occluded ground truth vertices
for more accurate visibility determination.
- Refining point cloud segmentation and filtering to extract
relevant features.
- Improve colmap point cloud visualization by colorizing apex/eave points.

Files changed (3) hide show

predict.py +621 -6
train.py +2 -3
visu.py +59 -0

predict.py CHANGED Viewed

@@ -1,7 +1,14 @@
 import numpy as np
 from typing import Tuple, List
-from hoho2025.example_solutions import empty_solution, read_colmap_rec, get_vertices_and_edges_from_segmentation, create_3d_wireframe_single_image, merge_vertices_3d, prune_not_connected, prune_too_far
 from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
 def convert_entry_to_human_readable(entry):
     out = {}
@@ -15,11 +22,377 @@ def convert_entry_to_human_readable(entry):
     out['__key__'] = entry['order_id']
     return out
 def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
     """
     good_entry = convert_entry_to_human_readable(entry)
     vert_edge_per_image = {}
     for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
                                                 good_entry['depth'],
@@ -29,17 +402,42 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
                                                 good_entry['image_ids'],
                                                 good_entry['ade'] # Added ade20k segmentation
                                                 )):
-        colmap_rec = good_entry['colmap_binary']
         K = np.array(K)
         R = np.array(R)
         t = np.array(t)
         # Resize gestalt segmentation to match depth map size
         depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
         gest_seg = gest.resize(depth_size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
         # Get 2D vertices and edges first
-        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=10.)
         # Check if we have enough to proceed
         if (len(vertices) < 2) or (len(connections) < 1):
@@ -49,19 +447,236 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
         # Call the refactored function to get 3D points
         vertices_3d = create_3d_wireframe_single_image(
-            vertices, connections, depth, colmap_rec, img_id, ade_seg
         )
         # Store original 2D vertices, connections, and computed 3D points
         vert_edge_per_image[i] = vertices, connections, vertices_3d
     # Merge vertices from all images
     all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 0.5)
     all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d, keep_largest=False)
-    all_3d_vertices_clean, connections_3d_clean  = prune_too_far(all_3d_vertices_clean, connections_3d_clean, colmap_rec, th = 4.0)
     if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
         print (f'Not enough vertices or connections in the 3D vertices')
         return empty_solution()
     return all_3d_vertices_clean, connections_3d_clean

 import numpy as np
 from typing import Tuple, List
+from hoho2025.example_solutions import empty_solution, read_colmap_rec, get_vertices_and_edges_from_segmentation, get_house_mask, fit_scale_robust_median, get_uv_depth, merge_vertices_3d, prune_not_connected, prune_too_far
 from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
+from PIL import Image, ImageDraw
+from visu import save_gestalt_with_proj, draw_crosses_on_image
+import os
+import pycolmap
+from PIL import Image as PImage
+import cv2
+import open3d as o3d
 def convert_entry_to_human_readable(entry):
     out = {}
     out['__key__'] = entry['order_id']
     return out
+def get_gt_vertices_and_edges(entry, i, depth, colmap_rec, k, r, t, img_id, ade_seg):
+    depth_fitted, depth_sparse, found_sparse, col_img = get_fitted_dense_depth(depth, colmap_rec, img_id, ade_seg)
+    #old_k, old_r, old_t = k.copy(), r.copy(), t.copy()
+    #k = col_img.camera.calibration_matrix()
+    #world_to_cam = np.eye(4)
+    #world_to_cam = col_img.cam_from_world.matrix()
+    #r = world_to_cam[:3, :3]
+    #t = world_to_cam[:3, 3]
+    wf_vertices = np.array(entry['wf_vertices'])
+    wf_edges = entry['wf_edges']
+    # Project world frame vertices into the current image
+    if wf_vertices.shape[0] > 0:
+        # Transform vertices to camera coordinates
+        wf_vertices_cam = (r @ wf_vertices.T) + t.reshape(3, 1)
+        # Project to image plane
+        wf_vertices_img_homogeneous = k @ wf_vertices_cam
+        # Convert to 2D pixel coordinates
+        wf_vertices_img = wf_vertices_img_homogeneous[:2, :] / wf_vertices_img_homogeneous[2, :]
+        projected_gt_vertices_2d = wf_vertices_img.T
+    # Initialize lists to store corresponding depth values from depth maps
+    gt_projected_depth_fitted_values = []
+    gt_projected_depth_sparse_values = []
+    # Get dimensions of the depth maps for bounds checking
+    # Assuming depth_fitted and depth_sparse have the same dimensions
+    map_height, map_width = depth_fitted.shape
+    for idx in range(projected_gt_vertices_2d.shape[0]):
+        # Get the 2D projected coordinates (x, y)
+        px, py = projected_gt_vertices_2d[idx]
+        # Round to nearest integer to use as indices for the depth maps
+        ix, iy = int(round(px)), int(round(py))
+        # Get corresponding depth_fitted value
+        if 0 <= iy < map_height and 0 <= ix < map_width:
+            gt_projected_depth_fitted_values.append(depth_fitted[iy, ix])
+        else:
+        # Projected point is outside the depth map bounds
+            gt_projected_depth_fitted_values.append(np.nan)
+        # Get corresponding depth_sparse value
+        if 0 <= iy < map_height and 0 <= ix < map_width: # Assuming same dimensions for depth_sparse
+            gt_projected_depth_sparse_values.append(depth_sparse[iy, ix])
+        else:
+        # Projected point is outside the depth map bounds
+            gt_projected_depth_sparse_values.append(np.nan)
+    # Determine occlusion status for each ground truth vertex
+    occlusion_status = [] # True if occluded, False otherwise
+    # This block executes only if there were ground truth vertices to begin with.
+    # wf_vertices_cam and projected_gt_vertices_2d would have been computed.
+    # gt_projected_depth_fitted_values list has one entry per vertex.
+    if wf_vertices.shape[0] > 0:
+        # These are the Z-coordinates (depths) of the original 3D wf_vertices
+        # when transformed into the camera's coordinate system.
+        # This is effectively the "true" depth of each vertex from the camera.
+        gt_vertices_depth_in_camera_system = wf_vertices_cam[2, :]
+        for idx in range(projected_gt_vertices_2d.shape[0]):
+            true_depth_of_vertex = gt_vertices_depth_in_camera_system[idx]
+            # This is the depth value read from the (dense) depth_fitted map
+            # at the 2D projection of the current wf_vertex.
+            depth_from_fitted_map = gt_projected_depth_fitted_values[idx]
+            # A vertex is considered occluded if its true depth is greater than
+            # the depth of the surface recorded in the depth_fitted map.
+            # This means the vertex is behind the observed surface.
+            # We also check if depth_from_fitted_map is a valid number (not NaN).
+            # If depth_from_fitted_map is NaN, it means the vertex projected outside
+            # the depth map's bounds, so we don't consider it occluded by the map.
+            if np.isnan(true_depth_of_vertex) or true_depth_of_vertex > depth_from_fitted_map + 200.:
+                occlusion_status.append(True)  # Vertex is occluded
+            else:
+                occlusion_status.append(False) # Vertex is not occluded or out of map bounds
+    if wf_vertices.shape[0] > 0:
+        # Filter vertices based on occlusion status
+        visible_vertices_indices = [idx for idx, occluded in enumerate(occlusion_status) if not occluded]
+        # Create a mapping from old vertex indices to new (filtered) vertex indices
+        old_to_new_indices_map = {old_idx: new_idx for new_idx, old_idx in enumerate(visible_vertices_indices)}
+        # Filter the projected_gt_vertices_2d and transform to the new structure
+        new_wf_vertices = []
+        if projected_gt_vertices_2d.shape[0] > 0: # Ensure projected_gt_vertices_2d is not empty
+            for idx in visible_vertices_indices:
+                xy_coords = projected_gt_vertices_2d[idx]
+                new_wf_vertices.append({'xy': xy_coords, 'type': 'apex'})
+        wf_vertices = new_wf_vertices
+        # Filter the edges
+        # An edge is kept if both its vertices are in the visible_vertices_indices list
+        visible_edges = []
+        for edge_start, edge_end in wf_edges:
+            if edge_start in old_to_new_indices_map and edge_end in old_to_new_indices_map:
+                # Remap to new indices
+                visible_edges.append((old_to_new_indices_map[edge_start], old_to_new_indices_map[edge_end]))
+        wf_edges = visible_edges
+    else:
+        # If there are no original vertices, wf_vertices should be an empty list
+        wf_vertices = []
+        wf_edges = []
+    wf_vertices_3d_visible = np.empty((0, 3))
+    original_gt_3d_vertices = np.array(entry['wf_vertices'])
+    # Check if there were original vertices and if occlusion_status was computed for them
+    if original_gt_3d_vertices.shape[0] > 0 and len(occlusion_status) == original_gt_3d_vertices.shape[0]:
+        # Determine indices of visible vertices based on occlusion_status
+        # occlusion_status is True if occluded, False otherwise. We want not occluded.
+        visible_indices = [idx for idx, occluded_flag in enumerate(occlusion_status) if not occluded_flag]
+        if visible_indices: # If the list of visible_indices is not empty
+            wf_vertices_3d_visible = original_gt_3d_vertices[visible_indices]
+    # If no original_gt_3d_vertices, or if all are occluded (visible_indices is empty),
+    # or if occlusion_status length doesn't match (which implies an issue earlier, but defensively handled),
+    # wf_vertices_3d_visible will remain the initialized np.empty((0, 3)).
+    return wf_vertices, wf_edges, wf_vertices_3d_visible
+def project_vertices_to_3d(uv: np.ndarray, depth_vert: np.ndarray, col_img: pycolmap.Image, K, R, t) -> np.ndarray:
+    """
+    Projects 2D vertex coordinates with associated depths to 3D world coordinates.
+    Parameters
+    ----------
+    uv : np.ndarray
+        (N, 2) array of 2D vertex coordinates (u, v).
+    depth_vert : np.ndarray
+        (N,) array of depth values for each vertex.
+    col_img : pycolmap.Image
+    Returns
+    -------
+    vertices_3d : np.ndarray
+        (N, 3) array of vertex coordinates in 3D world space.
+    """
+    # Backproject to 3D local camera coordinates
+    xy_local = np.ones((len(uv), 3))
+    #k = col_img.camera.calibration_matrix()
+    k = K
+    xy_local[:, 0] = (uv[:, 0] - k[0, 2]) / k[0, 0]
+    xy_local[:, 1] = (uv[:, 1] - k[1, 2]) / k[1, 1]
+    # Get the 3D vertices
+    vertices_3d_local = xy_local * depth_vert[...,None]
+    # Create camera-to-world transformation matrix
+    world_to_cam = np.eye(4)
+    world_to_cam[:3, :3] = R
+    world_to_cam[:3, 3] = t.reshape(3)
+    #world_to_cam[:3] = col_img.cam_from_world.matrix()
+    cam_to_world = np.linalg.inv(world_to_cam)
+    # Transform local 3D points to world coordinates
+    vertices_3d_homogeneous = cv2.convertPointsToHomogeneous(vertices_3d_local)
+    vertices_3d = cv2.transform(vertices_3d_homogeneous, cam_to_world)
+    vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
+    return vertices_3d
+def get_fitted_dense_depth(depth, colmap_rec, img_id, ade20k_seg, K, R, t):
+    """
+    Gets sparse depth from COLMAP, computes a house mask, fits dense depth to sparse
+    depth within the mask, and returns the fitted dense depth.
+    Parameters
+    ----------
+    depth : np.ndarray
+        Initial dense depth map (H, W).
+    colmap_rec : pycolmap.Reconstruction
+        COLMAP reconstruction data.
+    img_id : str
+        Identifier for the current image within the COLMAP reconstruction.
+    K : np.ndarray
+        Camera intrinsic matrix (3x3).
+    R : np.ndarray
+        Camera rotation matrix (3x3).
+    t : np.ndarray
+        Camera translation vector (3,).
+    ade20k_seg : PIL.Image
+        ADE20k segmentation map for the image.
+    Returns
+    -------
+    depth_fitted : np.ndarray
+        Dense depth map scaled and shifted to align with sparse depth within the house mask (H, W).
+    depth_sparse : np.ndarray
+        The sparse depth map obtained from COLMAP (H, W).
+    found_sparse : bool
+        True if sparse depth points were found for this image, False otherwise.
+    """
+    depth_np = np.array(depth) / 1000. # Convert mm to meters if needed
+    depth_sparse, found_sparse, col_img = get_sparse_depth_custom(colmap_rec, img_id, depth_np, K, R, t)
+    #print(depth_sparse.sum())
+    #depth_sparse, found_sparse, col_img = get_sparse_depth(colmap_rec, img_id, depth_np)
+    if not found_sparse:
+        print(f'No sparse depth found for image {img_id}')
+        # Return original (meter-scaled) depth if no sparse data
+        return depth_np, np.zeros_like(depth_np), False, None
+    # Get house mask to focus fitting on relevant areas
+    house_mask = get_house_mask(ade20k_seg)
+    # Fit dense depth to sparse depth (scale only), using only points within the house mask
+    k, depth_fitted = fit_scale_robust_median(depth_np, depth_sparse, validity_mask=house_mask)
+    print(f"Fitted depth scale k={k:.4f} for image {img_id}")
+    #depth_fitted = depth_np# * house_mask.astype(np.float32)
+    depth_sparse = depth_sparse# * house_mask.astype(np.float32)
+    return depth_fitted, depth_sparse, True, col_img
+def get_sparse_depth_custom(colmap_rec, img_id_substring, depth, K, R, t):
+    """
+    Return a sparse depth map for the COLMAP image whose name contains
+    `img_id_substring`. The output is an array of shape `depth_shape` (H,W),
+    where only the projected 3D points get a depth > 0, else 0.
+    Uses provided K, R, t for projection instead of COLMAP's image projection.
+    """
+    H, W = depth.shape
+    # 1) Find the matching COLMAP image to get its associated 3D points
+    # This part remains to identify which 3D points are relevant for this image view
+    found_img = None
+    for img_id_c, col_img_obj in colmap_rec.images.items(): # Renamed col_img to col_img_obj to avoid conflict
+        if img_id_substring in col_img_obj.name:
+            found_img = col_img_obj
+            break
+    if found_img is None:
+        print(f"Image substring {img_id_substring} not found in COLMAP.")
+        return np.zeros((H, W), dtype=np.float32), False, None
+    # 2) Gather 3D points that this image sees (according to COLMAP)
+    points_xyz_world = []
+    for pid, p3D in colmap_rec.points3D.items():
+        if found_img.has_point3D(pid):
+            points_xyz_world.append(p3D.xyz)  # world coords
+    if not points_xyz_world:
+        print(f"No 3D points associated with {found_img.name} in COLMAP.")
+        return np.zeros((H, W), dtype=np.float32), False, found_img # Return found_img for consistency
+    points_xyz_world = np.array(points_xyz_world)  # (N, 3)
+    # 3) Project points_xyz_world to camera coordinates using R, t
+    # points_cam = R @ points_xyz_world.T + t.reshape(3,1)
+    # points_cam = points_cam.T (N,3)
+    # More robustly:
+    points_xyz_world_h = np.hstack((points_xyz_world, np.ones((points_xyz_world.shape[0], 1)))) # (N, 4)
+    # World to Camera transformation matrix
+    world_to_cam_mat = np.eye(4)
+    world_to_cam_mat[:3, :3] = R
+    world_to_cam_mat[:3, 3] = t.flatten()
+    points_cam_h = (world_to_cam_mat @ points_xyz_world_h.T).T # (N, 4)
+    points_cam = points_cam_h[:, :3] / points_cam_h[:, 3, np.newaxis] # (N, 3) in camera coordinates
+    uv = []
+    z_vals = []
+    for i in range(points_cam.shape[0]):
+        p_cam = points_cam[i]
+        # Project to image plane using K
+        # p_img_h = K @ p_cam
+        # u = p_img_h[0] / p_img_h[2]
+        # v = p_img_h[1] / p_img_h[2]
+        # z = p_cam[2]
+        # Ensure p_cam[2] (depth) is positive
+        if p_cam[2] <= 0: # Point is behind or on the camera plane
+            continue
+        # Project to image plane using K
+        # K is [[fx, 0, cx], [0, fy, cy], [0, 0, 1]]
+        u_i = (K[0, 0] * p_cam[0] / p_cam[2]) + K[0, 2]
+        v_i = (K[1, 1] * p_cam[1] / p_cam[2]) + K[1, 2]
+        u_i_int = int(round(u_i))
+        v_i_int = int(round(v_i))
+        # Check in-bounds
+        if 0 <= u_i_int < W and 0 <= v_i_int < H:
+            uv.append((u_i_int, v_i_int))
+            z_vals.append(p_cam[2]) # Depth is the Z coordinate in camera space
+    if not uv:
+        print(f"No points projected into image bounds for {img_id_substring} using K,R,t.")
+        return np.zeros((H, W), dtype=np.float32), False, found_img
+    uv = np.array(uv, dtype=int)     # shape (M,2)
+    z_vals = np.array(z_vals)        # shape (M,)
+    depth_out = np.zeros((H, W), dtype=np.float32)
+    # Ensure z_vals are positive before assignment, though already checked
+    valid_depth_mask = z_vals > 0
+    if np.any(valid_depth_mask):
+        depth_out[uv[valid_depth_mask, 1], uv[valid_depth_mask, 0]] = z_vals[valid_depth_mask]
+    return depth_out, True, found_img
+def create_3d_wireframe_single_image(vertices: List[dict],
+                                     connections: List[Tuple[int, int]],
+                                     depth: PImage,
+                                     colmap_rec: pycolmap.Reconstruction,
+                                     img_id: str,
+                                     ade_seg: PImage,
+                                     K, R, t) -> np.ndarray:
+    """
+    Processes a single image view to generate 3D vertex coordinates from existing 2D vertices/edges.
+    Parameters
+    ----------
+    vertices : List[dict]
+        List of 2D vertex dictionaries (e.g., {"xy": (x, y), "type": ...}).
+    connections : List[Tuple[int, int]]
+        List of 2D edge connections (indices into the vertices list).
+    depth : PIL.Image
+        Initial dense depth map as a PIL Image.
+    colmap_rec : pycolmap.Reconstruction
+        COLMAP reconstruction data.
+    img_id : str
+        Identifier for the current image within the COLMAP reconstruction.
+    ade_seg : PIL.Image
+        ADE20k segmentation map for the image.
+    Returns
+    -------
+    vertices_3d : np.ndarray
+        (N, 3) array of vertex coordinates in 3D world space.
+        Returns an empty array if processing fails (e.g., missing sparse depth).
+    """
+    # Check if initial vertices/connections are valid
+    if (len(vertices) < 2) or (len(connections) < 1):
+        # This case should ideally be handled before calling, but good to double check.
+        print(f'Warning: create_3d_wireframe_single_image called with insufficient vertices/connections for image {img_id}')
+        return np.empty((0, 3))
+    # Get fitted dense depth and sparse depth
+    depth_fitted, depth_sparse, found_sparse, col_img = get_fitted_dense_depth(
+        depth, colmap_rec, img_id, ade_seg, K, R, t
+    )
+    # Get UV coordinates and depth for each vertex
+    uv, depth_vert = get_uv_depth(vertices, depth_fitted, depth_sparse, 10)
+    # Backproject to 3D
+    vertices_3d = project_vertices_to_3d(uv, depth_vert, col_img, K, R ,t)
+    return vertices_3d
 def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
     """
     good_entry = convert_entry_to_human_readable(entry)
+    colmap_rec = good_entry['colmap_binary']
+    colmap_pcloud = []
+    for i, p3D in colmap_rec.points3D.items():
+        p3D.color = np.array([0, 0, 0])
+        colmap_pcloud.append(p3D)
     vert_edge_per_image = {}
     for i, (gest, depth, K, R, t, img_id, ade_seg) in enumerate(zip(good_entry['gestalt'],
                                                 good_entry['depth'],
                                                 good_entry['image_ids'],
                                                 good_entry['ade'] # Added ade20k segmentation
                                                 )):
+        # Visualize gestalt segmentation
         K = np.array(K)
         R = np.array(R)
         t = np.array(t)
         # Resize gestalt segmentation to match depth map size
         depth_size = (np.array(depth).shape[1], np.array(depth).shape[0]) # W, H
         gest_seg = gest.resize(depth_size)
         gest_seg_np = np.array(gest_seg).astype(np.uint8)
+        pcloud_segmented, pcloud_idxs = extract_segmented_pcloud(gest_seg_np, colmap_rec, img_id, ade_seg, depth, K=K, R=R, t=t)
+        for idx, p3D in enumerate(colmap_rec.points3D.values()):
+            if idx in pcloud_idxs:
+                p3D.color = np.array([255, 0, 0])
         # Get 2D vertices and edges first
+        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=20.)
+        gt_verts = []
+        #gt_verts, gt_connects, gt_verts3d = get_gt_vertices_and_edges(good_entry, i, depth, colmap_rec, K, R, t, img_id, ade_seg)
+        #vertices, connections = gt_verts, gt_connects
+        if False:
+            gest.save(f'gestalt/{img_id}.png')
+            # Save ADE20k segmentation
+            # ade_seg is already a PIL Image
+            try:
+                ade_seg.save(f'ade_segmentations/{img_id}_ade.png')
+            except Exception as e:
+                print(f"Could not save ADE segmentation for {img_id}: {e}")
+            save_gestalt_with_proj(gest_seg_np, gt_verts, img_id)
+            # Define a local helper function to draw crosses and save the image
+            # Draw crosses on the ADE segmentation image and save it
+            # 'vertices' here refers to gt_verts
+            draw_crosses_on_image(ade_seg, vertices, f'crosses_{img_id}.png', color=(0, 0, 0), size=5)
         # Check if we have enough to proceed
         if (len(vertices) < 2) or (len(connections) < 1):
         # Call the refactored function to get 3D points
         vertices_3d = create_3d_wireframe_single_image(
+            vertices, connections, depth, colmap_rec, img_id, ade_seg, K, R, t
         )
+        #vertices_3d = gt_verts3d
         # Store original 2D vertices, connections, and computed 3D points
         vert_edge_per_image[i] = vertices, connections, vertices_3d
+    # Visualize colored COLMAP point cloud with Open3D
+    # Create Open3D point cloud from COLMAP reconstruction
+    pcd = o3d.geometry.PointCloud()
+    # Extract points and colors
+    points = []
+    colors = []
+    for p3D in colmap_rec.points3D.values():
+        points.append(p3D.xyz)
+        # Normalize color to [0,1] range for Open3D
+        colors.append(p3D.color / 255.0)
+    if points:
+        pcd.points = o3d.utility.Vector3dVector(np.array(points))
+        pcd.colors = o3d.utility.Vector3dVector(np.array(colors))
+        # Visualize the point cloud
+        o3d.visualization.draw_geometries([pcd], window_name="COLMAP Point Cloud")
     # Merge vertices from all images
     all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 0.5)
     all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d, keep_largest=False)
+    all_3d_vertices_clean, connections_3d_clean  = prune_too_far(all_3d_vertices_clean, connections_3d_clean, colmap_rec, th = 1.5)
     if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
         print (f'Not enough vertices or connections in the 3D vertices')
         return empty_solution()
     return all_3d_vertices_clean, connections_3d_clean
+def extract_segmented_pcloud(gest_seg_np, colmap_rec, img_id_substring, ade_seg, depth, K=None, R=None, t=None):
+    """
+    Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
+    Also find all COLMAP points that project into apex or eave_end masks.
+    """
+    #--------------------------------------------------------------------------------
+    # Step A: Collect apex and eave_end vertices
+    #--------------------------------------------------------------------------------
+    if not isinstance(gest_seg_np, np.ndarray):
+        gest_seg_np = np.array(gest_seg_np)
+    # Apex
+    apex_color = np.array(gestalt_color_mapping['apex'])
+    apex_mask = cv2.inRange(gest_seg_np, apex_color-10., apex_color+10.)
+    # Eave end
+    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
+    eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-10, eave_end_color+10)
+    # Combined mask for apex and eave_end
+    combined_mask = cv2.bitwise_or(apex_mask, eave_end_mask)
+    H, W = gest_seg_np.shape[:2]
+    # 1) Find the matching COLMAP image to get its associated 3D points
+    # This part remains to identify which 3D points are relevant for this image view
+    found_img = None
+    for img_id_c, col_img_obj in colmap_rec.images.items(): # Renamed col_img to col_img_obj to avoid conflict
+        if img_id_substring in col_img_obj.name:
+            found_img = col_img_obj
+            break
+    if found_img is None:
+        print(f"Image substring {img_id_substring} not found in COLMAP.")
+        return np.zeros((H, W), dtype=np.float32), False, None
+    # 2) Gather 3D points that this image sees (according to COLMAP)
+    points_xyz_world = []
+    points_idxs = []
+    for pid, p3D in colmap_rec.points3D.items():
+        if found_img.has_point3D(pid):
+            points_xyz_world.append(p3D.xyz)  # world coords
+            points_idxs.append(pid)
+    if not points_xyz_world:
+        print(f"No 3D points associated with {found_img.name} in COLMAP.")
+        return np.zeros((H, W), dtype=np.float32), False, found_img # Return found_img for consistency
+    points_xyz_world = np.array(points_xyz_world)  # (N, 3)
+    points_idxs = np.array(points_idxs)          # (N,)
+    # 3) Project points_xyz_world to camera coordinates using R, t
+    # points_cam = R @ points_xyz_world.T + t.reshape(3,1)
+    # points_cam = points_cam.T (N,3)
+    # More robustly:
+    points_xyz_world_h = np.hstack((points_xyz_world, np.ones((points_xyz_world.shape[0], 1)))) # (N, 4)
+    # World to Camera transformation matrix
+    world_to_cam_mat = np.eye(4)
+    world_to_cam_mat[:3, :3] = R
+    world_to_cam_mat[:3, 3] = t.flatten()
+    points_cam_h = (world_to_cam_mat @ points_xyz_world_h.T).T # (N, 4)
+    points_cam = points_cam_h[:, :3] / points_cam_h[:, 3, np.newaxis] # (N, 3) in camera coordinates
+    uv = []
+    valid_indices = []  # Track which original points are valid
+    for i in range(points_cam.shape[0]):
+        p_cam = points_cam[i]
+        # Ensure p_cam[2] (depth) is positive
+        if p_cam[2] <= 0:
+            continue
+        # Project to image plane using K
+        u_i = (K[0, 0] * p_cam[0] / p_cam[2]) + K[0, 2]
+        v_i = (K[1, 1] * p_cam[1] / p_cam[2]) + K[1, 2]
+        u_i_int = int(round(u_i))
+        v_i_int = int(round(v_i))
+        # Check in-bounds
+        if 0 <= u_i_int < W and 0 <= v_i_int < H:
+            uv.append((u_i_int, v_i_int))
+            valid_indices.append(i)  # Store original index
+    uv_colmap = []
+    valid_indices_colmap = []
+    for i, xyz in enumerate(points_xyz_world):
+        proj = found_img.project_point(xyz)  # returns (u, v) in image coords or None
+        if proj is not None:
+            u_i, v_i = proj
+            u_i = int(round(u_i))
+            v_i = int(round(v_i))
+            # Check in-bounds
+            if 0 <= u_i < W and 0 <= v_i < H:
+                uv_colmap.append((u_i, v_i))
+                valid_indices_colmap.append(i)  # Store original index
+    if not uv:
+        print(f"No points projected into image bounds for {img_id_substring} using K,R,t.")
+        return np.zeros((H, W), dtype=np.float32), False, found_img
+    house_mask = get_house_mask(ade_seg)
+    uv = np.array(uv, dtype=int)
+    valid_indices = np.array(valid_indices)
+    # Filter points that fall within the apex or eave_end masks
+    filtered_points_xyz = []
+    filtered_point_idxs = []
+    for i, (u, v) in enumerate(uv):
+    # Check if this projected point falls within the combined maskvalid_indices
+        if combined_mask[v, u] > 0 and house_mask[v, u] > 0:
+            original_idx = valid_indices[i]  # Get original index
+            filtered_points_xyz.append(points_xyz_world[original_idx])
+            filtered_point_idxs.append(points_idxs[original_idx])
+    filtered_points_xyz = np.array(filtered_points_xyz) if filtered_points_xyz else np.empty((0, 3))
+    filtered_point_idxs = np.array(filtered_point_idxs) if filtered_point_idxs else np.empty((0,))
+    '''
+    depth_fitted, depth_sparse, _, col_img = get_fitted_dense_depth(depth, colmap_rec, img_id_substring, ade_seg, K, R, t)
+    # Segment the depth_fitted to get points in apex/eave_end regions
+    segmented_points_3d = []
+    # Get coordinates where the combined mask is active
+    mask_coords = np.where(combined_mask > 0)
+    v_coords, u_coords = mask_coords
+    # Also apply house mask for additional filtering
+    house_coords = np.where(house_mask > 0)
+    house_v, house_u = house_coords
+    # Find intersection of combined_mask and house_mask
+    valid_mask = np.logical_and(combined_mask > 0, house_mask > 0)
+    valid_coords = np.where(valid_mask)
+    v_valid, u_valid = valid_coords
+    if len(v_valid) > 0:
+        # Get depth values at these coordinates
+        depth_values = depth_fitted[v_valid, u_valid]
+        # Filter out zero or invalid depth values
+        valid_depth_mask = depth_values > 0
+        if np.any(valid_depth_mask):
+            u_final = u_valid[valid_depth_mask]
+            v_final = v_valid[valid_depth_mask]
+            depth_final = depth_values[valid_depth_mask]
+            # Create UV coordinates for backprojection
+            uv_depth = np.column_stack((u_final, v_final))
+            # Backproject to 3D world coordinates
+            segmented_points_3d = project_vertices_to_3d(uv_depth, depth_final, col_img, K, R, t)
+    '''
+    segmented_points_3d = []
+    # Visualize with the segmented depth points in blue
+    pcd_all = o3d.geometry.PointCloud()
+    pcd_filtered = o3d.geometry.PointCloud()
+    pcd_depth = o3d.geometry.PointCloud()
+    # All points in gray
+    all_points = []
+    all_colors = []
+    for p3D in colmap_rec.points3D.values():
+        all_points.append(p3D.xyz)
+        all_colors.append([0.5, 0.5, 0.5])  # Gray color
+    if all_points:
+        pcd_all.points = o3d.utility.Vector3dVector(np.array(all_points))
+        pcd_all.colors = o3d.utility.Vector3dVector(np.array(all_colors))
+    # Filtered COLMAP points in red
+    if len(filtered_points_xyz) > 0:
+        pcd_filtered.points = o3d.utility.Vector3dVector(filtered_points_xyz)
+        pcd_filtered.colors = o3d.utility.Vector3dVector(np.full((len(filtered_points_xyz), 3), [1.0, 0.0, 0.0]))
+    # Segmented depth points in blue
+    if len(segmented_points_3d) > 0:
+        pcd_depth.points = o3d.utility.Vector3dVector(segmented_points_3d)
+        pcd_depth.colors = o3d.utility.Vector3dVector(np.full((len(segmented_points_3d), 3), [0.0, 0.0, 1.0]))
+    # Visualize all point clouds
+    geometries = [pcd_all]
+    if len(filtered_points_xyz) > 0:
+        geometries.append(pcd_filtered)
+    if len(segmented_points_3d) > 0:
+        geometries.append(pcd_depth)
+    o3d.visualization.draw_geometries(geometries, window_name=f"Combined Point Cloud - {img_id_substring}")
+    return filtered_points_xyz, filtered_point_idxs

train.py CHANGED Viewed

@@ -23,16 +23,15 @@ show_visu = False
 idx = 0
 for a in ds['train']:
-    colmap = read_colmap_rec(a['colmap_binary'])
     #plot_all_modalities(a)
     try:
         pred_vertices, pred_edges = predict_wireframe(a)
     except:
         pred_vertices, pred_edges = empty_solution()
     if show_visu:
         pcd, geometries = plot_reconstruction_local(None, colmap, points=True, cameras=True, crop_outliers=True)
         wireframe = plot_wireframe_local(None, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
         wireframe2 = plot_wireframe_local(None, pred_vertices, pred_edges, None, color='rgb(255, 0, 0)')

 idx = 0
 for a in ds['train']:
     #plot_all_modalities(a)
+    #pred_vertices, pred_edges = predict_wireframe(a)
     try:
         pred_vertices, pred_edges = predict_wireframe(a)
     except:
         pred_vertices, pred_edges = empty_solution()
     if show_visu:
+        colmap = read_colmap_rec(a['colmap_binary'])
         pcd, geometries = plot_reconstruction_local(None, colmap, points=True, cameras=True, crop_outliers=True)
         wireframe = plot_wireframe_local(None, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
         wireframe2 = plot_wireframe_local(None, pred_vertices, pred_edges, None, color='rgb(255, 0, 0)')

visu.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pycolmap
 import tempfile,zipfile
 import io
 import open3d as o3d
 def _plotly_rgb_to_normalized_o3d_color(color_val) -> list[float]:
     """
@@ -28,6 +29,64 @@ def _plotly_rgb_to_normalized_o3d_color(color_val) -> list[float]:
         return [c/255.0 for c in color_val]
     raise ValueError(f"Unsupported color type for Open3D conversion: {type(color_val)}. Expected string or 3-element tuple/list.")
 def plot_reconstruction_local(
         fig: go.Figure,

 import tempfile,zipfile
 import io
 import open3d as o3d
+from PIL import Image, ImageDraw
 def _plotly_rgb_to_normalized_o3d_color(color_val) -> list[float]:
     """
         return [c/255.0 for c in color_val]
     raise ValueError(f"Unsupported color type for Open3D conversion: {type(color_val)}. Expected string or 3-element tuple/list.")
+def draw_crosses_on_image(image_pil, vertices_data, output_file_path, size=5, color=(0, 0, 0)):
+    """
+    Draws crosses on a PIL Image at specified vertex locations and saves it.
+    Args:
+        image_pil (PIL.Image.Image): The image to draw on.
+        vertices_data (list): List of dictionaries, each with an 'xy' key
+                                holding [x, y] coordinates.
+        output_file_path (str): Path to save the modified image.
+        size (int): Size of the cross arms.
+        color (tuple): RGB color for the cross.
+    """
+    # Work on a copy to avoid modifying the original image
+    img_to_draw_on = image_pil.copy()
+    drawer = ImageDraw.Draw(img_to_draw_on)
+    for vert_info in vertices_data:
+        if 'xy' in vert_info:
+            x, y = vert_info['xy']
+            # Ensure coordinates are integers for drawing
+            x_int, y_int = int(round(x)), int(round(y))
+            # Draw horizontal line
+            drawer.line([(x_int - size, y_int), (x_int + size, y_int)], fill=color, width=1)
+            # Draw vertical line
+            drawer.line([(x_int, y_int - size), (x_int, y_int + size)], fill=color, width=1)
+    img_to_draw_on.save(output_file_path)
+def save_gestalt_with_proj(gest_seg_np, gt_verts, img_id):
+    # Convert gest_seg_np (which is a numpy array) to a PIL Image
+    # Assuming gest_seg_np is a 2D grayscale or a 3-channel RGB image
+    if gest_seg_np.ndim == 2:
+        img_to_draw_on = Image.fromarray(gest_seg_np, mode='L')
+    elif gest_seg_np.ndim == 3 and gest_seg_np.shape[2] == 3:
+        img_to_draw_on = Image.fromarray(gest_seg_np, mode='RGB')
+    else:
+        # Fallback or error handling if the format is unexpected
+        # For simplicity, let's assume it can be converted directly or handle specific cases
+        img_to_draw_on = Image.fromarray(gest_seg_np.astype(np.uint8))
+    # Ensure the image is in a mode that allows color drawing (e.g., RGB)
+    if img_to_draw_on.mode == 'L':
+        img_to_draw_on = img_to_draw_on.convert('RGB')
+    draw = ImageDraw.Draw(img_to_draw_on)
+    cross_size = 5  # Size of the cross arms
+    cross_color = (0, 0, 0)  # Red color for the cross
+    for vert_dict in gt_verts:
+        x, y = vert_dict['xy']
+        # Draw horizontal line of the cross
+        draw.line([(x - cross_size, y), (x + cross_size, y)], fill=cross_color, width=1)
+        # Draw vertical line of the cross
+        draw.line([(x, y - cross_size), (x, y + cross_size)], fill=cross_color, width=1)
+    # Save the image with drawn crosses
+    # You might want to use a different filename or path
+    img_to_draw_on.save(f'gestalt_cross/{img_id}.png')
 def plot_reconstruction_local(
         fig: go.Figure,