init project
Browse files
app.py
CHANGED
|
@@ -45,7 +45,7 @@ pe3r = Models(device)
|
|
| 45 |
|
| 46 |
def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
|
| 47 |
cam_color=None, as_pointcloud=False,
|
| 48 |
-
transparent_cams=False
|
| 49 |
assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
|
| 50 |
pts3d = to_numpy(pts3d)
|
| 51 |
imgs = to_numpy(imgs)
|
|
@@ -87,7 +87,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
|
|
| 87 |
return outfile
|
| 88 |
|
| 89 |
# # @spaces.GPU(duration=180)
|
| 90 |
-
def get_3D_model_from_scene(outdir,
|
| 91 |
clean_depth=False, transparent_cams=False, cam_size=0.05):
|
| 92 |
"""
|
| 93 |
extract 3D_model (glb file) from a reconstructed scene
|
|
@@ -245,7 +245,7 @@ def slerp_multiple(vectors, t_values):
|
|
| 245 |
return interpolated_vector
|
| 246 |
|
| 247 |
@torch.no_grad
|
| 248 |
-
def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform
|
| 249 |
sam_mask=[]
|
| 250 |
img_area = original_size[0] * original_size[1]
|
| 251 |
|
|
@@ -298,7 +298,7 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
|
|
| 298 |
return ret_mask
|
| 299 |
|
| 300 |
@torch.no_grad
|
| 301 |
-
def get_cog_feats(images
|
| 302 |
cog_seg_maps = []
|
| 303 |
rev_cog_seg_maps = []
|
| 304 |
inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
|
|
@@ -309,7 +309,7 @@ def get_cog_feats(images, device):
|
|
| 309 |
np_images = images.np_images
|
| 310 |
np_images_size = images.np_images_size
|
| 311 |
|
| 312 |
-
sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform
|
| 313 |
for mask in sam1_masks:
|
| 314 |
_, _, _ = pe3r.sam2.add_new_mask(
|
| 315 |
inference_state=inference_state,
|
|
@@ -331,7 +331,7 @@ def get_cog_feats(images, device):
|
|
| 331 |
if out_frame_idx == 0:
|
| 332 |
continue
|
| 333 |
|
| 334 |
-
sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform
|
| 335 |
|
| 336 |
for sam1_mask in sam1_masks:
|
| 337 |
flg = 1
|
|
@@ -434,7 +434,7 @@ def get_cog_feats(images, device):
|
|
| 434 |
return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
|
| 435 |
|
| 436 |
@spaces.GPU(duration=180)
|
| 437 |
-
def get_reconstructed_scene(outdir,
|
| 438 |
as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
|
| 439 |
scenegraph_type, winsize, refid):
|
| 440 |
"""
|
|
@@ -447,7 +447,7 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
|
|
| 447 |
images = Images(filelist=filelist, device=device)
|
| 448 |
|
| 449 |
# try:
|
| 450 |
-
cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images
|
| 451 |
imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
|
| 452 |
# except Exception as e:
|
| 453 |
# rev_cog_seg_maps = []
|
|
@@ -495,7 +495,7 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
|
|
| 495 |
print(e)
|
| 496 |
|
| 497 |
|
| 498 |
-
outfile = get_3D_model_from_scene(outdir,
|
| 499 |
clean_depth, transparent_cams, cam_size)
|
| 500 |
|
| 501 |
# also return rgb, depth and confidence imgs
|
|
@@ -519,21 +519,21 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
|
|
| 519 |
|
| 520 |
return scene, outfile, imgs
|
| 521 |
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
|
| 538 |
|
| 539 |
def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
|
|
@@ -558,9 +558,9 @@ def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
|
|
| 558 |
|
| 559 |
|
| 560 |
with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
| 561 |
-
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname
|
| 562 |
-
|
| 563 |
-
|
| 564 |
|
| 565 |
with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
|
| 566 |
# scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
|
|
@@ -622,32 +622,32 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
|
| 622 |
mask_sky, clean_depth, transparent_cams, cam_size,
|
| 623 |
scenegraph_type, winsize, refid],
|
| 624 |
outputs=[scene, outmodel, outgallery])
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
demo.launch(show_error=True, share=None, server_name=None, server_port=None)
|
|
|
|
| 45 |
|
| 46 |
def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
|
| 47 |
cam_color=None, as_pointcloud=False,
|
| 48 |
+
transparent_cams=False):
|
| 49 |
assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
|
| 50 |
pts3d = to_numpy(pts3d)
|
| 51 |
imgs = to_numpy(imgs)
|
|
|
|
| 87 |
return outfile
|
| 88 |
|
| 89 |
# # @spaces.GPU(duration=180)
|
| 90 |
+
def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
|
| 91 |
clean_depth=False, transparent_cams=False, cam_size=0.05):
|
| 92 |
"""
|
| 93 |
extract 3D_model (glb file) from a reconstructed scene
|
|
|
|
| 245 |
return interpolated_vector
|
| 246 |
|
| 247 |
@torch.no_grad
|
| 248 |
+
def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
|
| 249 |
sam_mask=[]
|
| 250 |
img_area = original_size[0] * original_size[1]
|
| 251 |
|
|
|
|
| 298 |
return ret_mask
|
| 299 |
|
| 300 |
@torch.no_grad
|
| 301 |
+
def get_cog_feats(images):
|
| 302 |
cog_seg_maps = []
|
| 303 |
rev_cog_seg_maps = []
|
| 304 |
inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
|
|
|
|
| 309 |
np_images = images.np_images
|
| 310 |
np_images_size = images.np_images_size
|
| 311 |
|
| 312 |
+
sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform)
|
| 313 |
for mask in sam1_masks:
|
| 314 |
_, _, _ = pe3r.sam2.add_new_mask(
|
| 315 |
inference_state=inference_state,
|
|
|
|
| 331 |
if out_frame_idx == 0:
|
| 332 |
continue
|
| 333 |
|
| 334 |
+
sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform)
|
| 335 |
|
| 336 |
for sam1_mask in sam1_masks:
|
| 337 |
flg = 1
|
|
|
|
| 434 |
return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
|
| 435 |
|
| 436 |
@spaces.GPU(duration=180)
|
| 437 |
+
def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
|
| 438 |
as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
|
| 439 |
scenegraph_type, winsize, refid):
|
| 440 |
"""
|
|
|
|
| 447 |
images = Images(filelist=filelist, device=device)
|
| 448 |
|
| 449 |
# try:
|
| 450 |
+
cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images)
|
| 451 |
imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
|
| 452 |
# except Exception as e:
|
| 453 |
# rev_cog_seg_maps = []
|
|
|
|
| 495 |
print(e)
|
| 496 |
|
| 497 |
|
| 498 |
+
outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 499 |
clean_depth, transparent_cams, cam_size)
|
| 500 |
|
| 501 |
# also return rgb, depth and confidence imgs
|
|
|
|
| 519 |
|
| 520 |
return scene, outfile, imgs
|
| 521 |
|
| 522 |
+
@spaces.GPU(duration=180)
|
| 523 |
+
def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
|
| 524 |
+
mask_sky, clean_depth, transparent_cams, cam_size):
|
| 525 |
|
| 526 |
+
texts = [text]
|
| 527 |
+
inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
| 528 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
| 529 |
+
with torch.no_grad():
|
| 530 |
+
text_feats =pe3r.siglip.get_text_features(**inputs)
|
| 531 |
+
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
| 532 |
+
scene.render_image(text_feats, threshold)
|
| 533 |
+
scene.ori_imgs = scene.rendered_imgs
|
| 534 |
+
outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 535 |
+
clean_depth, transparent_cams, cam_size)
|
| 536 |
+
return outfile
|
| 537 |
|
| 538 |
|
| 539 |
def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
|
|
|
|
| 558 |
|
| 559 |
|
| 560 |
with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
|
| 561 |
+
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
|
| 562 |
+
model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
|
| 563 |
+
get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
|
| 564 |
|
| 565 |
with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
|
| 566 |
# scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
|
|
|
|
| 622 |
mask_sky, clean_depth, transparent_cams, cam_size,
|
| 623 |
scenegraph_type, winsize, refid],
|
| 624 |
outputs=[scene, outmodel, outgallery])
|
| 625 |
+
min_conf_thr.release(fn=model_from_scene_fun,
|
| 626 |
+
inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 627 |
+
clean_depth, transparent_cams, cam_size],
|
| 628 |
+
outputs=outmodel)
|
| 629 |
+
cam_size.change(fn=model_from_scene_fun,
|
| 630 |
+
inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 631 |
+
clean_depth, transparent_cams, cam_size],
|
| 632 |
+
outputs=outmodel)
|
| 633 |
+
as_pointcloud.change(fn=model_from_scene_fun,
|
| 634 |
+
inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 635 |
+
clean_depth, transparent_cams, cam_size],
|
| 636 |
+
outputs=outmodel)
|
| 637 |
+
mask_sky.change(fn=model_from_scene_fun,
|
| 638 |
+
inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 639 |
+
clean_depth, transparent_cams, cam_size],
|
| 640 |
+
outputs=outmodel)
|
| 641 |
+
clean_depth.change(fn=model_from_scene_fun,
|
| 642 |
+
inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 643 |
+
clean_depth, transparent_cams, cam_size],
|
| 644 |
+
outputs=outmodel)
|
| 645 |
+
transparent_cams.change(model_from_scene_fun,
|
| 646 |
+
inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 647 |
+
clean_depth, transparent_cams, cam_size],
|
| 648 |
+
outputs=outmodel)
|
| 649 |
+
find_btn.click(fn=get_3D_object_from_scene_fun,
|
| 650 |
+
inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
|
| 651 |
+
clean_depth, transparent_cams, cam_size],
|
| 652 |
+
outputs=outmodel)
|
| 653 |
demo.launch(show_error=True, share=None, server_name=None, server_port=None)
|