Cloned from stabilityai/TripoSR

Browse files

Files changed (6) hide show

.gitattributes +2 -0
README.md +56 -0
config.yaml +38 -0
figures/input800.mp4 +3 -0
figures/output_examples.mp4 +3 -0
model.ckpt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+figures/output_examples.mp4 filter=lfs diff=lfs merge=lfs -text
+figures/input800.mp4 filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+datasets:
+- allenai/objaverse
+tags:
+- 3d
+extra_gated_fields:
+  Name: text
+  Email: text
+  Country: text
+  Organization or Affiliation: text
+  I ALLOW Stability AI to email me about new model releases: checkbox
+license: mit
+pipeline_tag: image-to-3d
+---
+> Try our new model: **SF3D** with several improvements such as faster generation and more game-ready assets.
+>
+> The model is available [here](https://huggingface.co/stabilityai/stable-fast-3d) and we also have a [demo](https://huggingface.co/spaces/stabilityai/stable-fast-3d).
+# TripoSR
+![](figures/input800.mp4)
+TripoSR is a fast and feed-forward 3D generative model developed in collaboration between Stability AI and Tripo AI.
+## Model Details
+### Model Description
+We closely follow [LRM](https://arxiv.org/abs/2311.04400) network architecture for the model design, where TripoSR incorporates a series of technical advancements over the LRM model in terms of both data curation as well as model and training improvements. For more technical details and evaluations, please refer to [our tech report](https://arxiv.org/abs/2403.02151).
+* **Developed by**: [Stability AI](https://stability.ai/), [Tripo AI](https://tripo3d.ai/)
+* **Model type**: Feed-forward 3D reconstruction from a single image
+* **License**: MIT
+* **Hardware**: We train `TripoSR` for 5 days on 22 GPU nodes each with 8 A100 40GB GPUs
+### Model Sources
+* **Repository**: https://github.com/VAST-AI-Research/TripoSR
+* **Tech report**: https://arxiv.org/abs/2403.02151
+* **Demo**: https://huggingface.co/spaces/stabilityai/TripoSR
+### Training Dataset
+We use renders from the [Objaverse](https://objaverse.allenai.org/objaverse-1.0) dataset, utilizing our enhanced rendering method that more closely replicate the distribution of images found in the real world, significantly improving our model’s ability to generalize. We selected a carefully curated subset of the Objaverse dataset for the training data, which is available under the CC-BY license.
+## Usage
+* For usage instructions, please refer to our [TripoSR GitHub repository](https://github.com/VAST-AI-Research/TripoSR)
+* You can also try it in [our gradio demo](https://huggingface.co/spaces/stabilityai/TripoSR)
+### Misuse, Malicious Use, and Out-of-Scope Use
+The model should not be used to intentionally create or disseminate 3D models that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes.

config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+cond_image_size: 512
+image_tokenizer_cls: tsr.models.tokenizers.image.DINOSingleImageTokenizer
+image_tokenizer:
+  pretrained_model_name_or_path: "facebook/dino-vitb16"
+tokenizer_cls: tsr.models.tokenizers.triplane.Triplane1DTokenizer
+tokenizer:
+  plane_size: 32
+  num_channels: 1024
+backbone_cls: tsr.models.transformer.transformer_1d.Transformer1D
+backbone:
+  in_channels: ${tokenizer.num_channels}
+  num_attention_heads: 16
+  attention_head_dim: 64
+  num_layers: 16
+  cross_attention_dim: 768
+post_processor_cls: tsr.models.network_utils.TriplaneUpsampleNetwork
+post_processor:
+  in_channels: 1024
+  out_channels: 40
+decoder_cls: tsr.models.network_utils.NeRFMLP
+decoder:
+  in_channels: 120 # 3 * 40
+  n_neurons: 64
+  n_hidden_layers: 9
+  activation: silu
+renderer_cls: tsr.models.nerf_renderer.TriplaneNeRFRenderer
+renderer:
+  radius: 0.87 # slightly larger than 0.5 * sqrt(3)
+  feature_reduction: concat
+  density_activation: exp
+  density_bias: -1.0
+  num_samples_per_ray: 128

figures/input800.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2da16c42c2e03139e297d6930bcff7a99c595cc100b545861519793c662500f
+size 148042

figures/output_examples.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c63a5fe7afea93549cc412fab612a1c2e5a46844fa75c5ff4eee892b9d3bbc4e
+size 2425685

model.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:429e2c6b22a0923967459de24d67f05962b235f79cde6b032aa7ed2ffcd970ee
+size 1677246742