{ "_class_name": "AutoencoderVQ", "_quantizer_name": "VQuantizer", "in_channels": 3, "latent_channels": 256, "layers_per_block": 4, "norm_num_groups": 32, "out_channels": 3, "sample_size": 1024, "num_vq_embeddings": 131072, "vq_embed_dim": 256, "temporal_stride": 1, "spatial_stride": 16, "attn_down_block": true, "attn_up_block": true, "decoder_dtype": "bfloat16", "block_out_channels": [ 256, 256, 512, 512, 1024 ] }