# This script processes the 'usm3d/hoho25k' dataset. # For each sample in the dataset, it performs the following steps: # 1. Reads COLMAP reconstruction data. # 2. Extracts 3D point coordinates and their corresponding colors. # 3. Retrieves ground truth wireframe vertices and edges. # 4. Skips processing if the output file already exists or if no 3D points are found. # 5. Saves the extracted point cloud, colors, ground truth data, and sample ID # into a pickle file in a specified output directory. # The script shuffles the dataset before processing and keeps track of # the number of samples successfully processed and saved. # from datasets import load_dataset from hoho2025.viz3d import * import os import numpy as np import pickle from utils import read_colmap_rec from tqdm import tqdm ds = load_dataset("usm3d/hoho25k", cache_dir="", trust_remote_code=True) #ds = load_dataset("usm3d/hoho25k", cache_dir="", trust_remote_code=True) ds = ds.shuffle() # Create output directory output_dir = "" os.makedirs(output_dir, exist_ok=True) counter = 0 for a in tqdm(ds['train'], desc="Processing dataset"): colmap = read_colmap_rec(a['colmap_binary']) order_id = a['order_id'] # Save as pickle file output_file = os.path.join(output_dir, f'sample_{order_id}.pkl') if os.path.exists(output_file): continue # Extract point cloud from COLMAP points3d = colmap.points3D if len(points3d) == 0: continue # Convert to numpy arrays point_coords = np.array([point.xyz for point in points3d.values()]) point_colors = np.array([point.color for point in points3d.values()]) # Get ground truth data gt_vertices = np.array(a['wf_vertices']) gt_connections = np.array(a['wf_edges']) # Save the data sample_data = { 'point_cloud': point_coords, 'point_colors': point_colors, 'gt_vertices': gt_vertices, 'gt_connections': gt_connections, 'sample_id': order_id } with open(output_file, 'wb') as f: pickle.dump(sample_data, f) counter += 1 print(f"Generated {counter} samples in {output_dir}")