Enhances FastPointNet architecture for improved accuracy.

Deepens the FastPointNet architecture by adding more convolutional layers and fully connected layers.

Increases the number of parameters and introduces residual-like and skip connections for improved feature extraction and gradient flow, which results in better generalization and prediction accuracy.

Introduces global average pooling in addition to global max pooling and adjusts dropout rates to avoid overfitting.

Adds a filtering mechanism to the dataset loader.

Files changed (3) hide show

fast_pointnet.py +100 -68
predict.py +58 -5
train.py +11 -4

fast_pointnet.py CHANGED Viewed

@@ -12,6 +12,7 @@ class FastPointNet(nn.Module):
     """
     Fast PointNet implementation for 3D vertex prediction from point cloud patches.
     Takes 7D point clouds (x,y,z,r,g,b,filtered_flag) and predicts 3D vertex coordinates.
     """
     def __init__(self, input_dim=7, output_dim=3, max_points=1024, predict_score=True):
@@ -19,34 +20,46 @@ class FastPointNet(nn.Module):
         self.max_points = max_points
         self.predict_score = predict_score
-        # Point-wise MLPs
-        self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 128, 1)
-        self.conv3 = nn.Conv1d(128, 256, 1)
-        # Global feature extraction
-        self.conv4 = nn.Conv1d(256, 512, 1)
-        self.conv5 = nn.Conv1d(512, 1024, 1)
-        # Shared features
-        self.shared_fc = nn.Linear(1024, 512)
-        # Position prediction head
-        self.pos_fc1 = nn.Linear(512, 256)
-        self.pos_fc2 = nn.Linear(256, output_dim)
-        # Score prediction head (predicts distance to GT)
         if self.predict_score:
-            self.score_fc1 = nn.Linear(512, 256)
-            self.score_fc2 = nn.Linear(256, 128)
-            self.score_fc3 = nn.Linear(128, 1)  # Single score output
-        self.dropout = nn.Dropout(0.3)
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(256)
-        self.bn4 = nn.BatchNorm1d(512)
         self.bn5 = nn.BatchNorm1d(1024)
     def forward(self, x):
         """
@@ -61,32 +74,47 @@ class FastPointNet(nn.Module):
         """
         batch_size = x.size(0)
-        # Point-wise feature extraction
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = F.relu(self.bn3(self.conv3(x)))
-        x = F.relu(self.bn4(self.conv4(x)))
-        x = F.relu(self.bn5(self.conv5(x)))
-        # Global max pooling
-        x = torch.max(x, 2)[0]  # (batch_size, 1024)
-        # Shared features
-        shared_features = F.relu(self.shared_fc(x))
-        shared_features = self.dropout(shared_features)
-        # Position prediction
-        pos_features = F.relu(self.pos_fc1(shared_features))
-        pos_features = self.dropout(pos_features)
-        position = self.pos_fc2(pos_features)
         if self.predict_score:
-            # Score prediction (distance to GT)
-            score_features = F.relu(self.score_fc1(shared_features))
-            score_features = self.dropout(score_features)
-            score_features = F.relu(self.score_fc2(score_features))
-            score_features = self.dropout(score_features)
-            score = F.relu(self.score_fc3(score_features))  # Ensure positive distance
             return position, score
         else:
@@ -235,6 +263,25 @@ def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
     print(f"Saved {len(patches)} patches for entry {entry_id}")
 def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32, lr: float = 0.001,
                   score_weight: float = 0.1):
     """
@@ -252,28 +299,9 @@ def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, ba
     print(f"Training on device: {device}")
     # Create dataset and dataloader
-    dataset = PatchDataset(dataset_dir, max_points=1024, augment=True)
     print(f"Dataset loaded with {len(dataset)} samples")
-    # Create dataloader with custom collate function to filter invalid samples
-    def collate_fn(batch):
-        valid_batch = []
-        for patch_data, target, valid_mask, distance in batch:
-            # Filter out invalid samples (no valid points or dummy targets)
-            if valid_mask.sum() > 0 and not torch.all(target == 0):
-                valid_batch.append((patch_data, target, valid_mask, distance))
-        if len(valid_batch) == 0:
-            return None
-        # Stack valid samples
-        patch_data = torch.stack([item[0] for item in valid_batch])
-        targets = torch.stack([item[1] for item in valid_batch])
-        valid_masks = torch.stack([item[2] for item in valid_batch])
-        distances = torch.stack([item[3] for item in valid_batch])
-        return patch_data, targets, valid_masks, distances
     dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
                            collate_fn=collate_fn, drop_last=True)
@@ -401,7 +429,7 @@ def load_pointnet_model(model_path: str, device: torch.device = None, predict_sc
     return model
-def predict_vertex_from_patch(model: FastPointNet, patch_7d: np.ndarray, device: torch.device = None) -> Tuple[np.ndarray, float]:
     """
     Predict 3D vertex coordinates and confidence score from a patch using trained PointNet.
@@ -418,7 +446,7 @@ def predict_vertex_from_patch(model: FastPointNet, patch_7d: np.ndarray, device:
     if device is None:
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model.eval()
     # Prepare input
     max_points = 1024
@@ -443,6 +471,10 @@ def predict_vertex_from_patch(model: FastPointNet, patch_7d: np.ndarray, device:
             position, score = model(patch_tensor)
             position = position.cpu().numpy().squeeze()
             score = score.cpu().numpy().squeeze()
             return position, score
         else:
             position = model(patch_tensor)

     """
     Fast PointNet implementation for 3D vertex prediction from point cloud patches.
     Takes 7D point clouds (x,y,z,r,g,b,filtered_flag) and predicts 3D vertex coordinates.
+    Enhanced with deeper architecture and more parameters for better generalization.
     """
     def __init__(self, input_dim=7, output_dim=3, max_points=1024, predict_score=True):
         self.max_points = max_points
         self.predict_score = predict_score
+        # Enhanced point-wise MLPs with deeper architecture
+        self.conv1 = nn.Conv1d(input_dim, 128, 1)
+        self.conv2 = nn.Conv1d(128, 256, 1)
+        self.conv3 = nn.Conv1d(256, 512, 1)
+        self.conv4 = nn.Conv1d(512, 1024, 1)
+        # Additional layers for better feature extraction
+        self.conv5 = nn.Conv1d(1024, 1024, 1)
+        self.conv6 = nn.Conv1d(1024, 2048, 1)
+        # Larger shared features
+        self.shared_fc1 = nn.Linear(2048, 1024)
+        self.shared_fc2 = nn.Linear(1024, 512)
+        # Enhanced position prediction head
+        self.pos_fc1 = nn.Linear(512, 512)
+        self.pos_fc2 = nn.Linear(512, 256)
+        self.pos_fc3 = nn.Linear(256, 128)
+        self.pos_fc4 = nn.Linear(128, output_dim)
+        # Enhanced score prediction head
         if self.predict_score:
+            self.score_fc1 = nn.Linear(512, 512)
+            self.score_fc2 = nn.Linear(512, 256)
+            self.score_fc3 = nn.Linear(256, 128)
+            self.score_fc4 = nn.Linear(128, 64)
+            self.score_fc5 = nn.Linear(64, 1)
+        # Batch normalization layers
+        self.bn1 = nn.BatchNorm1d(128)
+        self.bn2 = nn.BatchNorm1d(256)
+        self.bn3 = nn.BatchNorm1d(512)
+        self.bn4 = nn.BatchNorm1d(1024)
         self.bn5 = nn.BatchNorm1d(1024)
+        self.bn6 = nn.BatchNorm1d(2048)
+        # Dropout with different rates
+        self.dropout_light = nn.Dropout(0.2)
+        self.dropout_medium = nn.Dropout(0.3)
+        self.dropout_heavy = nn.Dropout(0.4)
     def forward(self, x):
         """
         """
         batch_size = x.size(0)
+        # Enhanced point-wise feature extraction with residual-like connections
+        x1 = F.relu(self.bn1(self.conv1(x)))
+        x2 = F.relu(self.bn2(self.conv2(x1)))
+        x3 = F.relu(self.bn3(self.conv3(x2)))
+        x4 = F.relu(self.bn4(self.conv4(x3)))
+        x5 = F.relu(self.bn5(self.conv5(x4)))
+        x6 = F.relu(self.bn6(self.conv6(x5)))
+        # Global max pooling with additional global average pooling
+        max_pool = torch.max(x6, 2)[0]  # (batch_size, 2048)
+        avg_pool = torch.mean(x6, 2)    # (batch_size, 2048)
+        # Combine max and average pooling for richer global features
+        global_features = max_pool + avg_pool  # (batch_size, 2048)
+        # Enhanced shared features with residual connection
+        shared1 = F.relu(self.shared_fc1(global_features))
+        shared1 = self.dropout_light(shared1)
+        shared2 = F.relu(self.shared_fc2(shared1))
+        shared_features = self.dropout_medium(shared2)
+        # Enhanced position prediction with skip connections
+        pos1 = F.relu(self.pos_fc1(shared_features))
+        pos1 = self.dropout_light(pos1)
+        pos2 = F.relu(self.pos_fc2(pos1))
+        pos2 = self.dropout_medium(pos2)
+        pos3 = F.relu(self.pos_fc3(pos2))
+        pos3 = self.dropout_light(pos3)
+        position = self.pos_fc4(pos3)
         if self.predict_score:
+            # Enhanced score prediction
+            score1 = F.relu(self.score_fc1(shared_features))
+            score1 = self.dropout_light(score1)
+            score2 = F.relu(self.score_fc2(score1))
+            score2 = self.dropout_medium(score2)
+            score3 = F.relu(self.score_fc3(score2))
+            score3 = self.dropout_light(score3)
+            score4 = F.relu(self.score_fc4(score3))
+            score4 = self.dropout_light(score4)
+            score = F.relu(self.score_fc5(score4))  # Ensure positive distance
             return position, score
         else:
     print(f"Saved {len(patches)} patches for entry {entry_id}")
+# Create dataloader with custom collate function to filter invalid samples
+def collate_fn(batch):
+    valid_batch = []
+    for patch_data, target, valid_mask, distance in batch:
+        # Filter out invalid samples (no valid points or dummy targets)
+        if valid_mask.sum() > 0 and not torch.all(target == 0):
+            valid_batch.append((patch_data, target, valid_mask, distance))
+    if len(valid_batch) == 0:
+        return None
+    # Stack valid samples
+    patch_data = torch.stack([item[0] for item in valid_batch])
+    targets = torch.stack([item[1] for item in valid_batch])
+    valid_masks = torch.stack([item[2] for item in valid_batch])
+    distances = torch.stack([item[3] for item in valid_batch])
+    return patch_data, targets, valid_masks, distances
 def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32, lr: float = 0.001,
                   score_weight: float = 0.1):
     """
     print(f"Training on device: {device}")
     # Create dataset and dataloader
+    dataset = PatchDataset(dataset_dir, max_points=1024, augment=False)
     print(f"Dataset loaded with {len(dataset)} samples")
     dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
                            collate_fn=collate_fn, drop_last=True)
     return model
+def predict_vertex_from_patch(model: FastPointNet, patch: np.ndarray, device: torch.device = None) -> Tuple[np.ndarray, float]:
     """
     Predict 3D vertex coordinates and confidence score from a patch using trained PointNet.
     if device is None:
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    patch_7d = patch['patch_7d']  # (N, 7)
     # Prepare input
     max_points = 1024
             position, score = model(patch_tensor)
             position = position.cpu().numpy().squeeze()
             score = score.cpu().numpy().squeeze()
+            offset = patch['offset']
+            position -= offset
             return position, score
         else:
             position = model(patch_tensor)

predict.py CHANGED Viewed

@@ -11,7 +11,7 @@ import cv2
 import open3d as o3d
 from visu import plot_reconstruction_local, plot_wireframe_local, plot_bpo_cameras_from_entry_local
 import pyvista as pv
-from fast_pointnet import save_patches_dataset
 GENERATE_DATASET = True
 #DATASET_DIR = '/home/skvrnjan/personal/hohocustom/'
@@ -388,7 +388,53 @@ def create_3d_wireframe_single_image(vertices: List[dict],
     return vertices_3d
-def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
     """
@@ -421,6 +467,13 @@ def predict_wireframe(entry) -> Tuple[np.ndarray, List[int]]:
             continue
         vertices, connections, vertices_3d = vertices_ours, connections_ours, vertices_3d_ours
         # Get 2D vertices and edges first
         #vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.)
@@ -908,7 +961,7 @@ def generate_patches(colmap_rec, filtered_points_idxs, frame, filtered_vertices,
             if pid in point_idxs:
                 patch_7d[i, 6] = 1.0
             else:
-                patch_7d[i, 6] = 0.0
         if filtered_vertices[group_idx] is not None:
             initial_pred = filtered_vertices[group_idx] + offset
@@ -961,7 +1014,7 @@ def generate_patches(colmap_rec, filtered_points_idxs, frame, filtered_vertices,
                 plotter.add_mesh(pred_sphere, color="orange", opacity=0.5)
             plotter.show(title=f"Patch {group_idx}")
     return patches
 def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_seg, depth, K=None, R=None, t=None, frame=None):
@@ -991,7 +1044,7 @@ def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_se
     if len(uv) == 0:
         print(f"No points projected into image bounds for {img_id_substring} using K,R,t.")
-        return [], [], []
     house_mask = get_house_mask(ade_seg)

 import open3d as o3d
 from visu import plot_reconstruction_local, plot_wireframe_local, plot_bpo_cameras_from_entry_local
 import pyvista as pv
+from fast_pointnet import save_patches_dataset, predict_vertex_from_patch
 GENERATE_DATASET = True
 #DATASET_DIR = '/home/skvrnjan/personal/hohocustom/'
     return vertices_3d
+def visu_patch_and_pred(patch, pred):
+    # Create plotter
+    plotter = pv.Plotter()
+    # Create point cloud for this patch
+    offset = patch.get('offset', None)  # Offset if available
+    patch_points_3d = np.array(patch['patch_7d'][:, :3])
+    patch_points_3d = patch_points_3d - offset
+    patch_cloud = pv.PolyData(patch_points_3d)
+    point_idxs = patch['filtered_point_ids']  # List of point indices that are filtered
+    patch_point_ids = patch['point_ids']  # Assuming the 7th column contains point IDs
+    assigned_gt_vertex = patch.get('assigned_gt_vertex', None)  # GT vertex if available
+    initial_pred = patch.get('initial_pred', None)  # Initial prediction if available
+    initial_pred = initial_pred - offset
+    assigned_gt_vertex = assigned_gt_vertex - offset
+    # Color points: red for filtered points, blue for other points
+    patch_point_colors = []
+    for i, pid in enumerate(patch_point_ids):
+        if pid in point_idxs:
+            patch_point_colors.append([255, 0, 0])  # Red for filtered points
+        else:
+            patch_point_colors.append([0, 0, 255])  # Blue for other points
+    patch_cloud["colors"] = np.array(patch_point_colors)
+    plotter.add_mesh(patch_cloud, scalars="colors", rgb=True, point_size=8, render_points_as_spheres=True)
+    # Create sphere to visualize GT vertex if available
+    if assigned_gt_vertex is not None:
+        gt_sphere = pv.Sphere(radius=0.1, center=assigned_gt_vertex)
+        plotter.add_mesh(gt_sphere, color="green", opacity=0.5)
+    if initial_pred is not None:
+        # Create sphere to visualize initial prediction
+        pred_sphere = pv.Sphere(radius=0.1, center=initial_pred)
+        plotter.add_mesh(pred_sphere, color="orange", opacity=0.5)
+    if pred is not None:
+        # Create sphere to visualize predicted vertex
+        pred_sphere = pv.Sphere(radius=0.1, center=pred)
+        plotter.add_mesh(pred_sphere, color="red", opacity=0.5)
+    plotter.show(title=f"Patch x")
+def predict_wireframe(entry, pnet_model) -> Tuple[np.ndarray, List[int]]:
     """
     Predict 3D wireframe from a dataset entry.
     """
             continue
+        for patch in patches:
+            pred_vertex, pred_dist = predict_vertex_from_patch(pnet_model, patch, device='cuda')
+            visu_patch_and_pred(patch, pred_vertex)
+            x = 0
         vertices, connections, vertices_3d = vertices_ours, connections_ours, vertices_3d_ours
         # Get 2D vertices and edges first
         #vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.)
             if pid in point_idxs:
                 patch_7d[i, 6] = 1.0
             else:
+                patch_7d[i, 6] = -1.0
         if filtered_vertices[group_idx] is not None:
             initial_pred = filtered_vertices[group_idx] + offset
                 plotter.add_mesh(pred_sphere, color="orange", opacity=0.5)
             plotter.show(title=f"Patch {group_idx}")
     return patches
 def our_get_vertices_and_edges(gest_seg_np, colmap_rec, img_id_substring, ade_seg, depth, K=None, R=None, t=None, frame=None):
     if len(uv) == 0:
         print(f"No points projected into image bounds for {img_id_substring} using K,R,t.")
+        return [], [], [], []
     house_mask = get_house_mask(ade_seg)

train.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pycolmap
 import tempfile,zipfile
 import io
 import open3d as o3d
 from visu import plot_reconstruction_local, plot_wireframe_local, plot_bpo_cameras_from_entry_local, _plotly_rgb_to_normalized_o3d_color
 from utils import read_colmap_rec, empty_solution
@@ -13,22 +14,28 @@ from utils import read_colmap_rec, empty_solution
 from hoho2025.metric_helper import hss
 from predict import predict_wireframe
 from tqdm import tqdm
-ds = load_dataset("usm3d/hoho25k", cache_dir='/home/skvrnjan/personal/hoho25k', trust_remote_code=True)
 ds = ds.shuffle()
 scores_hss = []
 scores_f1 = []
 scores_iou = []
-show_visu = False
 idx = 0
 for a in tqdm(ds['train'], desc="Processing dataset"):
     #plot_all_modalities(a)
-    #pred_vertices, pred_edges = predict_wireframe(a)
     try:
-        pred_vertices, pred_edges = predict_wireframe(a)
     except:
         pred_vertices, pred_edges = empty_solution()

 import tempfile,zipfile
 import io
 import open3d as o3d
+import os
 from visu import plot_reconstruction_local, plot_wireframe_local, plot_bpo_cameras_from_entry_local, _plotly_rgb_to_normalized_o3d_color
 from utils import read_colmap_rec, empty_solution
 from hoho2025.metric_helper import hss
 from predict import predict_wireframe
 from tqdm import tqdm
+from fast_pointnet import load_pointnet_model
+import torch
+ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
 ds = ds.shuffle()
 scores_hss = []
 scores_f1 = []
 scores_iou = []
+show_visu = True
+device = "cuda" if torch.cuda.is_available() else "cpu"
+pnet_model = load_pointnet_model(model_path="/home/skvrnjan/personal/hoho_pnet/initial_epoch_100.pth", device=device, predict_score=True)
 idx = 0
 for a in tqdm(ds['train'], desc="Processing dataset"):
     #plot_all_modalities(a)
+    #pred_vertices, pred_edges = predict_wireframe(a, pnet_model)
     try:
+        pred_vertices, pred_edges = predict_wireframe(a, pnet_model)
     except:
         pred_vertices, pred_edges = empty_solution()