|
|
arch: |
|
|
H_cycles: 3 |
|
|
H_layers: 0 |
|
|
L_cycles: 6 |
|
|
L_layers: 2 |
|
|
expansion: 4 |
|
|
ffn_hybrid: true |
|
|
ffn_hybrid_gate: scalar |
|
|
ffn_kron_chunk_k: 16 |
|
|
ffn_kron_chunk_t: 4096 |
|
|
ffn_kron_in_factors: &id001 !!python/object:omegaconf.listconfig.ListConfig |
|
|
_content: |
|
|
- !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: 0 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: &id002 !!python/name:typing.Any '' |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- &id003 !!python/name:builtins.dict '' |
|
|
_parent: *id001 |
|
|
_val: 32 |
|
|
- !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: 1 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id001 |
|
|
_val: 16 |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_kron_in_factors |
|
|
key_type: &id006 !!python/name:builtins.int '' |
|
|
object_type: &id007 !!python/name:builtins.list '' |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: &id004 !!python/object:omegaconf.dictconfig.DictConfig |
|
|
_content: |
|
|
H_cycles: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: H_cycles |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 3 |
|
|
H_layers: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: H_layers |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 0 |
|
|
L_cycles: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: L_cycles |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 6 |
|
|
L_layers: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: L_layers |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 2 |
|
|
expansion: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: expansion |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 4 |
|
|
ffn_hybrid: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_hybrid |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: true |
|
|
ffn_hybrid_gate: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_hybrid_gate |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: scalar |
|
|
ffn_kron_chunk_k: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_kron_chunk_k |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 16 |
|
|
ffn_kron_chunk_t: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_kron_chunk_t |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 4096 |
|
|
ffn_kron_in_factors: *id001 |
|
|
ffn_kron_out_factors: &id005 !!python/object:omegaconf.listconfig.ListConfig |
|
|
_content: |
|
|
- !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: 0 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id005 |
|
|
_val: 48 |
|
|
- !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: 1 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id005 |
|
|
_val: 32 |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_kron_out_factors |
|
|
key_type: *id006 |
|
|
object_type: *id007 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
ffn_kron_share_gate_up: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_kron_share_gate_up |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: true |
|
|
ffn_kron_terms: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_kron_terms |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 96 |
|
|
ffn_rank: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_rank |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 128 |
|
|
ffn_type: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ffn_type |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: mlp |
|
|
forward_dtype: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: forward_dtype |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: bfloat16 |
|
|
halt_exploration_prob: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: halt_exploration_prob |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 0.1 |
|
|
halt_max_steps: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: halt_max_steps |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 16 |
|
|
hidden_size: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: hidden_size |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 512 |
|
|
loss: &id008 !!python/object:omegaconf.dictconfig.DictConfig |
|
|
_content: |
|
|
loss_type: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: loss_type |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id008 |
|
|
_val: stablemax_cross_entropy |
|
|
name: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: name |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id008 |
|
|
_val: losses@ACTLossHead |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: loss |
|
|
key_type: *id002 |
|
|
object_type: *id003 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
mixer: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: mixer |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: attention |
|
|
mlp_t: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: mlp_t |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: true |
|
|
name: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: name |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1 |
|
|
no_ACT_continue: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: no_ACT_continue |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: true |
|
|
num_heads: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: num_heads |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 8 |
|
|
pos_encodings: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: pos_encodings |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: rope |
|
|
puzzle_emb_len: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: puzzle_emb_len |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: 16 |
|
|
puzzle_emb_ndim: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: puzzle_emb_ndim |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id004 |
|
|
_val: ${.hidden_size} |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: arch |
|
|
key_type: *id002 |
|
|
object_type: *id003 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: &id009 !!python/object:omegaconf.dictconfig.DictConfig |
|
|
_content: |
|
|
arch: *id004 |
|
|
beta1: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: beta1 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0.9 |
|
|
beta2: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: beta2 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0.95 |
|
|
checkpoint_every_eval: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: checkpoint_every_eval |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: true |
|
|
data_paths: &id010 !!python/object:omegaconf.listconfig.ListConfig |
|
|
_content: |
|
|
- !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: 0 |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id010 |
|
|
_val: data/sudoku-extreme-1k-aug-1000 |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: data_paths |
|
|
key_type: *id006 |
|
|
object_type: *id007 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
data_paths_test: !!python/object:omegaconf.listconfig.ListConfig |
|
|
_content: [] |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: data_paths_test |
|
|
key_type: *id006 |
|
|
object_type: *id007 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
ema: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ema |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: true |
|
|
ema_rate: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: ema_rate |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0.999 |
|
|
epochs: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: epochs |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 50000 |
|
|
eval_interval: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: eval_interval |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 5000 |
|
|
evaluators: !!python/object:omegaconf.listconfig.ListConfig |
|
|
_content: [] |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: evaluators |
|
|
key_type: *id006 |
|
|
object_type: *id007 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
freeze_weights: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: freeze_weights |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: false |
|
|
global_batch_size: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: global_batch_size |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 768 |
|
|
lr: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: lr |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0.0001 |
|
|
lr_min_ratio: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: lr_min_ratio |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 1.0 |
|
|
lr_warmup_steps: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: lr_warmup_steps |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 2000 |
|
|
min_eval_interval: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: min_eval_interval |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0 |
|
|
puzzle_emb_lr: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: puzzle_emb_lr |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0.0001 |
|
|
puzzle_emb_weight_decay: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: puzzle_emb_weight_decay |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 1.0 |
|
|
run_name: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: run_name |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: pretrain_hybrid_sudoku |
|
|
seed: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: seed |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 0 |
|
|
weight_decay: !!python/object:omegaconf.nodes.AnyNode |
|
|
_metadata: !!python/object:omegaconf.base.Metadata |
|
|
flags: {} |
|
|
flags_root: false |
|
|
key: weight_decay |
|
|
object_type: null |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
- *id003 |
|
|
_parent: *id009 |
|
|
_val: 1.0 |
|
|
_metadata: !!python/object:omegaconf.base.ContainerMetadata |
|
|
element_type: *id002 |
|
|
flags: |
|
|
struct: true |
|
|
flags_root: false |
|
|
key: null |
|
|
key_type: *id002 |
|
|
object_type: *id003 |
|
|
optional: true |
|
|
ref_type: *id002 |
|
|
resolver_cache: !!python/object/apply:collections.defaultdict |
|
|
args: |
|
|
- *id003 |
|
|
dictitems: |
|
|
now: |
|
|
? !!python/tuple |
|
|
- '%H-%M-%S' |
|
|
: 09-39-56 |
|
|
? !!python/tuple |
|
|
- '%Y-%m-%d' |
|
|
: '2025-10-11' |
|
|
_parent: null |
|
|
ffn_kron_out_factors: *id005 |
|
|
ffn_kron_share_gate_up: true |
|
|
ffn_kron_terms: 96 |
|
|
ffn_rank: 128 |
|
|
ffn_type: mlp |
|
|
forward_dtype: bfloat16 |
|
|
halt_exploration_prob: 0.1 |
|
|
halt_max_steps: 16 |
|
|
hidden_size: 512 |
|
|
loss: |
|
|
loss_type: stablemax_cross_entropy |
|
|
name: losses@ACTLossHead |
|
|
mixer: attention |
|
|
mlp_t: true |
|
|
name: recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1 |
|
|
no_ACT_continue: true |
|
|
num_heads: 8 |
|
|
pos_encodings: rope |
|
|
puzzle_emb_len: 16 |
|
|
puzzle_emb_ndim: 512 |
|
|
beta1: 0.9 |
|
|
beta2: 0.95 |
|
|
checkpoint_every_eval: true |
|
|
checkpoint_path: checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_hybrid_sudoku |
|
|
data_paths: |
|
|
- data/sudoku-extreme-1k-aug-1000 |
|
|
data_paths_test: [] |
|
|
ema: true |
|
|
ema_rate: 0.999 |
|
|
epochs: 50000 |
|
|
eval_interval: 5000 |
|
|
eval_save_outputs: [] |
|
|
evaluators: [] |
|
|
freeze_weights: false |
|
|
global_batch_size: 768 |
|
|
load_checkpoint: null |
|
|
lr: 0.0001 |
|
|
lr_min_ratio: 1.0 |
|
|
lr_warmup_steps: 2000 |
|
|
min_eval_interval: 0 |
|
|
project_name: Sudoku-extreme-1k-aug-1000-ACT-torch |
|
|
puzzle_emb_lr: 0.0001 |
|
|
puzzle_emb_weight_decay: 1.0 |
|
|
run_name: pretrain_hybrid_sudoku |
|
|
seed: 0 |
|
|
weight_decay: 1.0 |
|
|
|