jskvrna
/

hoho

Model card Files Files and versions

hoho / generate_pcloud_dataset.py

jskvrna's picture

Final submission code

9518589 6 months ago

history blame contribute delete

2.24 kB

	# This script processes the 'usm3d/hoho25k' dataset.
	# For each sample in the dataset, it performs the following steps:
	# 1. Reads COLMAP reconstruction data.
	# 2. Extracts 3D point coordinates and their corresponding colors.
	# 3. Retrieves ground truth wireframe vertices and edges.
	# 4. Skips processing if the output file already exists or if no 3D points are found.
	# 5. Saves the extracted point cloud, colors, ground truth data, and sample ID
	# into a pickle file in a specified output directory.
	# The script shuffles the dataset before processing and keeps track of
	# the number of samples successfully processed and saved.
	#
	from datasets import load_dataset
	from hoho2025.viz3d import *
	import os
	import numpy as np
	import pickle

	from utils import read_colmap_rec

	from tqdm import tqdm

	ds = load_dataset("usm3d/hoho25k", cache_dir="<CACHE_DIR_PLACEHOLDER>", trust_remote_code=True)
	#ds = load_dataset("usm3d/hoho25k", cache_dir="<ALTERNATIVE_CACHE_DIR_PLACEHOLDER>", trust_remote_code=True)
	ds = ds.shuffle()

	# Create output directory
	output_dir = "<OUTPUT_DIR_PLACEHOLDER>"
	os.makedirs(output_dir, exist_ok=True)

	counter = 0
	for a in tqdm(ds['train'], desc="Processing dataset"):
	colmap = read_colmap_rec(a['colmap_binary'])
	order_id = a['order_id']

	# Save as pickle file
	output_file = os.path.join(output_dir, f'sample_{order_id}.pkl')
	if os.path.exists(output_file):
	continue

	# Extract point cloud from COLMAP
	points3d = colmap.points3D
	if len(points3d) == 0:
	continue

	# Convert to numpy arrays
	point_coords = np.array([point.xyz for point in points3d.values()])
	point_colors = np.array([point.color for point in points3d.values()])

	# Get ground truth data
	gt_vertices = np.array(a['wf_vertices'])
	gt_connections = np.array(a['wf_edges'])

	# Save the data
	sample_data = {
	'point_cloud': point_coords,
	'point_colors': point_colors,
	'gt_vertices': gt_vertices,
	'gt_connections': gt_connections,
	'sample_id': order_id
	}

	with open(output_file, 'wb') as f:
	pickle.dump(sample_data, f)

	counter += 1

	print(f"Generated {counter} samples in {output_dir}")