{
  "_class_name": "AutoencoderVQ",
  "_quantizer_name": "VQuantizer",
  "in_channels": 3,
  "latent_channels": 256,
  "layers_per_block": 4,
  "norm_num_groups": 32,
  "out_channels": 3,
  "sample_size": 1024,
  "num_vq_embeddings": 131072,
  "vq_embed_dim": 256,
  "temporal_stride": 1,
  "spatial_stride": 16,
  "attn_down_block": true,
  "attn_up_block": true,
  "decoder_dtype": "bfloat16",
  "block_out_channels": [
    256,
    256,
    512,
    512,
    1024
  ]
}