Spaces:
Running
Running
| arch: | |
| H_cycles: 2 | |
| H_layers: 4 | |
| L_cycles: 2 | |
| L_layers: 4 | |
| expansion: 4 | |
| halt_exploration_prob: 0.1 | |
| halt_max_steps: 16 | |
| hidden_size: 512 | |
| loss: | |
| loss_type: stablemax_cross_entropy | |
| name: losses@ACTLossHead | |
| name: hrm.hrm_act_v1@HierarchicalReasoningModel_ACTV1 | |
| num_heads: 8 | |
| pos_encodings: rope | |
| puzzle_emb_ndim: 128 | |
| beta1: 0.9 | |
| beta2: 0.95 | |
| checkpoint_every_eval: true | |
| checkpoint_path: checkpoints/Abstract_optimizer_processed ACT-torch/HierarchicalReasoningModel_ACTV1 | |
| ambrosial-orca | |
| data_path: data/abstract_optimizer_processed | |
| epochs: 20000 | |
| eval_interval: 1000 | |
| eval_save_outputs: [] | |
| global_batch_size: 16 | |
| lr: 0.0001 | |
| lr_min_ratio: 1.0 | |
| lr_warmup_steps: 2000 | |
| project_name: Abstract_optimizer_processed ACT-torch | |
| puzzle_emb_lr: 0.01 | |
| puzzle_emb_weight_decay: 0.1 | |
| run_name: HierarchicalReasoningModel_ACTV1 ambrosial-orca | |
| seed: 0 | |
| weight_decay: 0.1 | |