| cond_image_size: 512 | |
| image_tokenizer_cls: tsr.models.tokenizers.image.DINOSingleImageTokenizer | |
| image_tokenizer: | |
| pretrained_model_name_or_path: "facebook/dino-vitb16" | |
| tokenizer_cls: tsr.models.tokenizers.triplane.Triplane1DTokenizer | |
| tokenizer: | |
| plane_size: 32 | |
| num_channels: 1024 | |
| backbone_cls: tsr.models.transformer.transformer_1d.Transformer1D | |
| backbone: | |
| in_channels: ${tokenizer.num_channels} | |
| num_attention_heads: 16 | |
| attention_head_dim: 64 | |
| num_layers: 16 | |
| cross_attention_dim: 768 | |
| post_processor_cls: tsr.models.network_utils.TriplaneUpsampleNetwork | |
| post_processor: | |
| in_channels: 1024 | |
| out_channels: 40 | |
| decoder_cls: tsr.models.network_utils.NeRFMLP | |
| decoder: | |
| in_channels: 120 # 3 * 40 | |
| n_neurons: 64 | |
| n_hidden_layers: 9 | |
| activation: silu | |
| renderer_cls: tsr.models.nerf_renderer.TriplaneNeRFRenderer | |
| renderer: | |
| radius: 0.87 # slightly larger than 0.5 * sqrt(3) | |
| feature_reduction: concat | |
| density_activation: exp | |
| density_bias: -1.0 | |
| num_samples_per_ray: 128 |