Training UNets
Browse files- checkpoint/iter_000500/unet/config.json +73 -0
 - checkpoint/iter_000500/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_001000/unet/config.json +73 -0
 - checkpoint/iter_001000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_001500/unet/config.json +73 -0
 - checkpoint/iter_001500/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_002000/unet/config.json +73 -0
 - checkpoint/iter_002000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_004000/unet/config.json +73 -0
 - checkpoint/iter_004000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_006000/unet/config.json +73 -0
 - checkpoint/iter_006000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_008000/unet/config.json +73 -0
 - checkpoint/iter_008000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_010000/unet/config.json +73 -0
 - checkpoint/iter_010000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_012000/unet/config.json +73 -0
 - checkpoint/iter_012000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_014000/unet/config.json +73 -0
 - checkpoint/iter_014000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_016000/unet/config.json +73 -0
 - checkpoint/iter_016000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/iter_018000/unet/config.json +73 -0
 - checkpoint/iter_018000/unet/diffusion_pytorch_model.bin +3 -0
 - checkpoint/latest/iter_018000 +0 -0
 - checkpoint/latest/trainer.ckpt +3 -0
 - checkpoint/latest/unet/config.json +73 -0
 - checkpoint/latest/unet/diffusion_pytorch_model.bin +3 -0
 
    	
        checkpoint/iter_000500/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_000500/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:8c96483444cac22957d47a8017aa32fc20146c97665eb7ecb3fe3c1419f62eed
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_001000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_001000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:84f8820a70c7d3ed9e682b5a5840096090693023158d41cc04127adb92d4f65b
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_001500/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_001500/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:e32b2654051e00fbcd65ff7a59639ccf254d5fa7f0f850789680f8b32df2fc59
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_002000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_002000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:b85975fa552bd31467dd755442d766ffcdfba73570daa8e0009dd0fa9ed8f10f
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_004000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_004000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:ff15f98c690152121dc5730b1f21e02aee95ff5356f37a519dd3b4b314ae1874
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_006000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_006000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:29c60d53e3c7df2813482be020460fab99d0b1680d0a9824dd708a2f81f3a007
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_008000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_008000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:d1918f056a7fd7e6db0b89a70a79a3486c05f0064102c6fe2179d18218a5e14a
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_010000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_010000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:74e13b113cf12d2ae2afe8d07c07877ec1c977d6ee85244d18e71017c6b12081
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_012000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_012000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:dbf72754b219bdd5a4857190bb23cc3b3bd07b92071bf6ae3dab3529aafb7294
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_014000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_014000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:0c1f7b2fc46ec24021a6e2a18d78a1bbfcf8d204502c8c0d572a0aba5fbce63a
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_016000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_016000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:cf871632e510d2bf29dabf85db45f2a5dd8f3de1b0d32e2eee8a12a646366a3b
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/iter_018000/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/iter_018000/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:4128ef2e097bdd5c27925c4700f4fd6e6bdf8ee5060bc906871d43e3452a6b2b
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     | 
    	
        checkpoint/latest/iter_018000
    ADDED
    
    | 
         
            File without changes
         
     | 
    	
        checkpoint/latest/trainer.ckpt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:873a6328eb2de10a3821f41ef1c57211004661b595702be32d5ea89a65a386a6
         
     | 
| 3 | 
         
            +
            size 6928220088
         
     | 
    	
        checkpoint/latest/unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNet2DConditionModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.31.0",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "/home/pedro/code/hf/diffusers/marigold-segmentation/checkpoints/stable-diffusion-2/unet",
         
     | 
| 5 | 
         
            +
              "act_fn": "silu",
         
     | 
| 6 | 
         
            +
              "addition_embed_type": null,
         
     | 
| 7 | 
         
            +
              "addition_embed_type_num_heads": 64,
         
     | 
| 8 | 
         
            +
              "addition_time_embed_dim": null,
         
     | 
| 9 | 
         
            +
              "attention_head_dim": [
         
     | 
| 10 | 
         
            +
                5,
         
     | 
| 11 | 
         
            +
                10,
         
     | 
| 12 | 
         
            +
                20,
         
     | 
| 13 | 
         
            +
                20
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "attention_type": "default",
         
     | 
| 16 | 
         
            +
              "block_out_channels": [
         
     | 
| 17 | 
         
            +
                320,
         
     | 
| 18 | 
         
            +
                640,
         
     | 
| 19 | 
         
            +
                1280,
         
     | 
| 20 | 
         
            +
                1280
         
     | 
| 21 | 
         
            +
              ],
         
     | 
| 22 | 
         
            +
              "center_input_sample": false,
         
     | 
| 23 | 
         
            +
              "class_embed_type": null,
         
     | 
| 24 | 
         
            +
              "class_embeddings_concat": false,
         
     | 
| 25 | 
         
            +
              "conv_in_kernel": 3,
         
     | 
| 26 | 
         
            +
              "conv_out_kernel": 3,
         
     | 
| 27 | 
         
            +
              "cross_attention_dim": 1024,
         
     | 
| 28 | 
         
            +
              "cross_attention_norm": null,
         
     | 
| 29 | 
         
            +
              "down_block_types": [
         
     | 
| 30 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 31 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 32 | 
         
            +
                "CrossAttnDownBlock2D",
         
     | 
| 33 | 
         
            +
                "DownBlock2D"
         
     | 
| 34 | 
         
            +
              ],
         
     | 
| 35 | 
         
            +
              "downsample_padding": 1,
         
     | 
| 36 | 
         
            +
              "dropout": 0.0,
         
     | 
| 37 | 
         
            +
              "dual_cross_attention": false,
         
     | 
| 38 | 
         
            +
              "encoder_hid_dim": null,
         
     | 
| 39 | 
         
            +
              "encoder_hid_dim_type": null,
         
     | 
| 40 | 
         
            +
              "flip_sin_to_cos": true,
         
     | 
| 41 | 
         
            +
              "freq_shift": 0,
         
     | 
| 42 | 
         
            +
              "in_channels": 8,
         
     | 
| 43 | 
         
            +
              "layers_per_block": 2,
         
     | 
| 44 | 
         
            +
              "mid_block_only_cross_attention": null,
         
     | 
| 45 | 
         
            +
              "mid_block_scale_factor": 1,
         
     | 
| 46 | 
         
            +
              "mid_block_type": "UNetMidBlock2DCrossAttn",
         
     | 
| 47 | 
         
            +
              "norm_eps": 1e-05,
         
     | 
| 48 | 
         
            +
              "norm_num_groups": 32,
         
     | 
| 49 | 
         
            +
              "num_attention_heads": null,
         
     | 
| 50 | 
         
            +
              "num_class_embeds": null,
         
     | 
| 51 | 
         
            +
              "only_cross_attention": false,
         
     | 
| 52 | 
         
            +
              "out_channels": 4,
         
     | 
| 53 | 
         
            +
              "projection_class_embeddings_input_dim": null,
         
     | 
| 54 | 
         
            +
              "resnet_out_scale_factor": 1.0,
         
     | 
| 55 | 
         
            +
              "resnet_skip_time_act": false,
         
     | 
| 56 | 
         
            +
              "resnet_time_scale_shift": "default",
         
     | 
| 57 | 
         
            +
              "reverse_transformer_layers_per_block": null,
         
     | 
| 58 | 
         
            +
              "sample_size": 96,
         
     | 
| 59 | 
         
            +
              "time_cond_proj_dim": null,
         
     | 
| 60 | 
         
            +
              "time_embedding_act_fn": null,
         
     | 
| 61 | 
         
            +
              "time_embedding_dim": null,
         
     | 
| 62 | 
         
            +
              "time_embedding_type": "positional",
         
     | 
| 63 | 
         
            +
              "timestep_post_act": null,
         
     | 
| 64 | 
         
            +
              "transformer_layers_per_block": 1,
         
     | 
| 65 | 
         
            +
              "up_block_types": [
         
     | 
| 66 | 
         
            +
                "UpBlock2D",
         
     | 
| 67 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 68 | 
         
            +
                "CrossAttnUpBlock2D",
         
     | 
| 69 | 
         
            +
                "CrossAttnUpBlock2D"
         
     | 
| 70 | 
         
            +
              ],
         
     | 
| 71 | 
         
            +
              "upcast_attention": false,
         
     | 
| 72 | 
         
            +
              "use_linear_projection": true
         
     | 
| 73 | 
         
            +
            }
         
     | 
    	
        checkpoint/latest/unet/diffusion_pytorch_model.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:4128ef2e097bdd5c27925c4700f4fd6e6bdf8ee5060bc906871d43e3452a6b2b
         
     | 
| 3 | 
         
            +
            size 3463933622
         
     |