File size: 2,238 Bytes
33113fd
 
 
 
 
 
 
 
 
 
 
01b1bb1
 
 
 
 
 
 
 
 
 
9518589
 
01b1bb1
 
 
9518589
01b1bb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33113fd
9518589
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# This script processes the 'usm3d/hoho25k' dataset.
# For each sample in the dataset, it performs the following steps:
# 1. Reads COLMAP reconstruction data.
# 2. Extracts 3D point coordinates and their corresponding colors.
# 3. Retrieves ground truth wireframe vertices and edges.
# 4. Skips processing if the output file already exists or if no 3D points are found.
# 5. Saves the extracted point cloud, colors, ground truth data, and sample ID
#    into a pickle file in a specified output directory.
# The script shuffles the dataset before processing and keeps track of
# the number of samples successfully processed and saved.
#
from datasets import load_dataset
from hoho2025.viz3d import *
import os
import numpy as np
import pickle

from utils import read_colmap_rec

from tqdm import tqdm

ds = load_dataset("usm3d/hoho25k", cache_dir="<CACHE_DIR_PLACEHOLDER>", trust_remote_code=True)
#ds = load_dataset("usm3d/hoho25k", cache_dir="<ALTERNATIVE_CACHE_DIR_PLACEHOLDER>", trust_remote_code=True)
ds = ds.shuffle()

# Create output directory
output_dir = "<OUTPUT_DIR_PLACEHOLDER>"
os.makedirs(output_dir, exist_ok=True)

counter = 0
for a in tqdm(ds['train'], desc="Processing dataset"):
    colmap = read_colmap_rec(a['colmap_binary'])
    order_id = a['order_id']
    
    # Save as pickle file
    output_file = os.path.join(output_dir, f'sample_{order_id}.pkl')
    if os.path.exists(output_file):
        continue

    # Extract point cloud from COLMAP
    points3d = colmap.points3D
    if len(points3d) == 0:
        continue
        
    # Convert to numpy arrays
    point_coords = np.array([point.xyz for point in points3d.values()])
    point_colors = np.array([point.color for point in points3d.values()])
    
    # Get ground truth data
    gt_vertices = np.array(a['wf_vertices'])
    gt_connections = np.array(a['wf_edges'])

    # Save the data
    sample_data = {
        'point_cloud': point_coords,
        'point_colors': point_colors,
        'gt_vertices': gt_vertices,
        'gt_connections': gt_connections,
        'sample_id': order_id
    }
    
    with open(output_file, 'wb') as f:
        pickle.dump(sample_data, f)

    counter += 1

print(f"Generated {counter} samples in {output_dir}")