{
  "modality_dims": {
    "clip_l": 768,
    "clip_g": 1280,
    "t5_xl_l": 2048,
    "t5_xl_g": 2048
  },
  "modality_seq_lens": {
    "clip_l": 77,
    "clip_g": 77,
    "t5_xl_l": 512,
    "t5_xl_g": 512
  },
  "binding_config": {
    "clip_l": {
      "t5_xl_l": 0.3
    },
    "clip_g": {
      "t5_xl_g": 0.3
    },
    "t5_xl_l": {},
    "t5_xl_g": {}
  },
  "latent_dim": 2048,
  "seq_len": 77,
  "encoder_layers": 3,
  "decoder_layers": 3,
  "hidden_dim": 1024,
  "dropout": 0.1,
  "fusion_strategy": "adaptive_cantor",
  "fusion_heads": 8,
  "fusion_dropout": 0.1,
  "cantor_depth": 8,
  "cantor_local_window": 3,
  "alpha_init": 1.0,
  "beta_init": 0.3,
  "alpha_lr_scale": 0.1,
  "beta_lr_scale": 1.0,
  "beta_kl": 0.1,
  "beta_reconstruction": 1.0,
  "beta_cross_modal": 0.05,
  "beta_alpha_regularization": 0.01,
  "recon_type": "mse",
  "modality_recon_weights": {
    "clip_l": 1.0,
    "clip_g": 1.0,
    "t5_xl_l": 0.3,
    "t5_xl_g": 0.3
  },
  "use_kl_annealing": true,
  "kl_anneal_epochs": 10,
  "kl_start_beta": 0.0,
  "batch_size": 8,
  "num_epochs": 100,
  "learning_rate": 0.0001,
  "weight_decay": 1e-05,
  "gradient_clip": 1.0,
  "use_scheduler": true,
  "scheduler_type": "cosine",
  "num_samples": 10000,
  "synthetic_ratio": 0.15,
  "checkpoint_dir": "./checkpoints_lyra_adaptive_cantor",
  "save_every": 1000,
  "keep_last_n": 3,
  "hf_repo": "AbstractPhil/vae-lyra-xl-adaptive-cantor",
  "push_to_hub": true,
  "push_every": 2000,
  "auto_load_from_hub": true,
  "use_wandb": false,
  "wandb_project": "vae-lyra-adaptive-cantor",
  "wandb_entity": null,
  "log_every": 50,
  "device": "cuda",
  "mixed_precision": true,
  "seed": 42,
  "num_workers": 0
}