Spaces:

hujiecpp
/

PE3R

Running on Zero

App Files Files Community

hujiecpp commited on Feb 19

Commit

2a23e85

1 Parent(s): 564a5c5

init project

Browse files

Files changed (1) hide show

app.py +54 -54

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ pe3r = Models(device)
 def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
                                  cam_color=None, as_pointcloud=False,
-                                 transparent_cams=False, silent=False):
     assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
     pts3d = to_numpy(pts3d)
     imgs = to_numpy(imgs)
@@ -87,7 +87,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
     return outfile
 # # @spaces.GPU(duration=180)
-def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
                             clean_depth=False, transparent_cams=False, cam_size=0.05):
     """
     extract 3D_model (glb file) from a reconstructed scene
@@ -245,7 +245,7 @@ def slerp_multiple(vectors, t_values):
     return interpolated_vector
 @torch.no_grad
-def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform, device):
     sam_mask=[]
     img_area = original_size[0] * original_size[1]
@@ -298,7 +298,7 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
     return ret_mask
 @torch.no_grad
-def get_cog_feats(images, device):
     cog_seg_maps = []
     rev_cog_seg_maps = []
     inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
@@ -309,7 +309,7 @@ def get_cog_feats(images, device):
     np_images = images.np_images
     np_images_size = images.np_images_size
-    sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform, device)
     for mask in sam1_masks:
         _, _, _ = pe3r.sam2.add_new_mask(
             inference_state=inference_state,
@@ -331,7 +331,7 @@ def get_cog_feats(images, device):
         if out_frame_idx == 0:
             continue
-        sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform, device)
         for sam1_mask in sam1_masks:
             flg = 1
@@ -434,7 +434,7 @@ def get_cog_feats(images, device):
     return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
 @spaces.GPU(duration=180)
-def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, min_conf_thr,
                             as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
                             scenegraph_type, winsize, refid):
     """
@@ -447,7 +447,7 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
     images = Images(filelist=filelist, device=device)
     # try:
-    cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images, device)
     imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
     # except Exception as e:
         # rev_cog_seg_maps = []
@@ -495,7 +495,7 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
         print(e)
-    outfile = get_3D_model_from_scene(outdir, silent, scene, min_conf_thr, as_pointcloud, mask_sky,
                                       clean_depth, transparent_cams, cam_size)
     # also return rgb, depth and confidence imgs
@@ -519,21 +519,21 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
     return scene, outfile, imgs
-# @spaces.GPU(duration=180)
-# def get_3D_object_from_scene(outdir, pe3r, silent, device, text, threshold, scene, min_conf_thr, as_pointcloud,
-#                  mask_sky, clean_depth, transparent_cams, cam_size):
-#     texts = [text]
-#     inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
-#     inputs = {key: value.to(device) for key, value in inputs.items()}
-#     with torch.no_grad():
-#         text_feats =pe3r.siglip.get_text_features(**inputs)
-#         text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
-#     scene.render_image(text_feats, threshold)
-#     scene.ori_imgs = scene.rendered_imgs
-#     outfile = get_3D_model_from_scene(outdir, silent, scene, min_conf_thr, as_pointcloud, mask_sky,
-#                                       clean_depth, transparent_cams, cam_size)
-#     return outfile
 def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
@@ -558,9 +558,9 @@ def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
 with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
-    recon_fun = functools.partial(get_reconstructed_scene, tmpdirname, device, silent)
-    # model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname, silent)
-    # get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname, pe3r, silent, device)
     with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
         # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
@@ -622,32 +622,32 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
                                     mask_sky, clean_depth, transparent_cams, cam_size,
                                     scenegraph_type, winsize, refid],
                             outputs=[scene, outmodel, outgallery])
-            # min_conf_thr.release(fn=model_from_scene_fun,
-            #                         inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                                 clean_depth, transparent_cams, cam_size],
-            #                         outputs=outmodel)
-            # cam_size.change(fn=model_from_scene_fun,
-            #                 inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                         clean_depth, transparent_cams, cam_size],
-            #                 outputs=outmodel)
-            # as_pointcloud.change(fn=model_from_scene_fun,
-            #                         inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                                 clean_depth, transparent_cams, cam_size],
-            #                         outputs=outmodel)
-            # mask_sky.change(fn=model_from_scene_fun,
-            #                 inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                         clean_depth, transparent_cams, cam_size],
-            #                 outputs=outmodel)
-            # clean_depth.change(fn=model_from_scene_fun,
-            #                     inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                             clean_depth, transparent_cams, cam_size],
-            #                     outputs=outmodel)
-            # transparent_cams.change(model_from_scene_fun,
-            #                         inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                                 clean_depth, transparent_cams, cam_size],
-            #                         outputs=outmodel)
-            # find_btn.click(fn=get_3D_object_from_scene_fun,
-            #                     inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                             clean_depth, transparent_cams, cam_size],
-            #                 outputs=outmodel)
     demo.launch(show_error=True, share=None, server_name=None, server_port=None)

 def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
                                  cam_color=None, as_pointcloud=False,
+                                 transparent_cams=False):
     assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
     pts3d = to_numpy(pts3d)
     imgs = to_numpy(imgs)
     return outfile
 # # @spaces.GPU(duration=180)
+def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
                             clean_depth=False, transparent_cams=False, cam_size=0.05):
     """
     extract 3D_model (glb file) from a reconstructed scene
     return interpolated_vector
 @torch.no_grad
+def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
     sam_mask=[]
     img_area = original_size[0] * original_size[1]
     return ret_mask
 @torch.no_grad
+def get_cog_feats(images):
     cog_seg_maps = []
     rev_cog_seg_maps = []
     inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
     np_images = images.np_images
     np_images_size = images.np_images_size
+    sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform)
     for mask in sam1_masks:
         _, _, _ = pe3r.sam2.add_new_mask(
             inference_state=inference_state,
         if out_frame_idx == 0:
             continue
+        sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform)
         for sam1_mask in sam1_masks:
             flg = 1
     return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
 @spaces.GPU(duration=180)
+def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
                             as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
                             scenegraph_type, winsize, refid):
     """
     images = Images(filelist=filelist, device=device)
     # try:
+    cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images)
     imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
     # except Exception as e:
         # rev_cog_seg_maps = []
         print(e)
+    outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
                                       clean_depth, transparent_cams, cam_size)
     # also return rgb, depth and confidence imgs
     return scene, outfile, imgs
+@spaces.GPU(duration=180)
+def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
+                 mask_sky, clean_depth, transparent_cams, cam_size):
+    texts = [text]
+    inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
+    inputs = {key: value.to(device) for key, value in inputs.items()}
+    with torch.no_grad():
+        text_feats =pe3r.siglip.get_text_features(**inputs)
+        text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
+    scene.render_image(text_feats, threshold)
+    scene.ori_imgs = scene.rendered_imgs
+    outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
+                                      clean_depth, transparent_cams, cam_size)
+    return outfile
 def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
 with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
+    recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
+    model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
+    get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
     with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
         # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
                                     mask_sky, clean_depth, transparent_cams, cam_size,
                                     scenegraph_type, winsize, refid],
                             outputs=[scene, outmodel, outgallery])
+            min_conf_thr.release(fn=model_from_scene_fun,
+                                    inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                            clean_depth, transparent_cams, cam_size],
+                                    outputs=outmodel)
+            cam_size.change(fn=model_from_scene_fun,
+                            inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                    clean_depth, transparent_cams, cam_size],
+                            outputs=outmodel)
+            as_pointcloud.change(fn=model_from_scene_fun,
+                                    inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                            clean_depth, transparent_cams, cam_size],
+                                    outputs=outmodel)
+            mask_sky.change(fn=model_from_scene_fun,
+                            inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                    clean_depth, transparent_cams, cam_size],
+                            outputs=outmodel)
+            clean_depth.change(fn=model_from_scene_fun,
+                                inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                        clean_depth, transparent_cams, cam_size],
+                                outputs=outmodel)
+            transparent_cams.change(model_from_scene_fun,
+                                    inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                            clean_depth, transparent_cams, cam_size],
+                                    outputs=outmodel)
+            find_btn.click(fn=get_3D_object_from_scene_fun,
+                                inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
+                                        clean_depth, transparent_cams, cam_size],
+                            outputs=outmodel)
     demo.launch(show_error=True, share=None, server_name=None, server_port=None)