diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..17829bc56640e98554c8fe439d3ed2c5c05f5bbf --- /dev/null +++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9152cd6bbb46fef658f48645ee6e23ea164ca837939dc4c4472075f850cff6 +size 25187350 diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b69df5c0685e8abef4b15640ad8f80288739b9f8 --- /dev/null +++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a39fe2b27eb1705884374389839c276e3e277d48903d17482d810f46a368ae +size 25187350 diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..abb397914e60836ac57599d68223b417d2e4b500 --- /dev/null +++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5161845a4ca972062849f1023167b1d4bf37a764a8fe8a602bd782a3fc39be3 +size 25187350 diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e5b08df472b22a0d12d72eec6e6ca3d0fdb848a --- /dev/null +++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218b3717735f0cbc6bc72c5946422136f80d8b29f44ec4061edf648f2409d014 +size 25187350 diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb9512fb6ff9ebdc1e841198b433682fcd94d09d --- /dev/null +++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58a7a54c6e79c03326d5b013a815a3b9d2f74970d97dd8858bff2730b464b4c +size 25187350 diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa5d1670a90b8a99a4b0f0a72446b70de17eb099 --- /dev/null +++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a3c72811397cdcf999e03d2ee8955d0712917fbd4662c390ef8e3c5a43a4895 +size 25187350 diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ad65d539b047ae1c4bd721241a2049ed5b3eaf2 --- /dev/null +++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4942fe15e99f94e489e241702f3604c71e544c763732a0f73ea9d06404ec12e6 +size 25220118 diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..16600c13928cfc6f426a32128853042b836f9351 --- /dev/null +++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ecbc76304e9f267e2704d53589c7bce90f07433db6f5b828bad733d36bd358d +size 25220118 diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..90fc72d5a05de054485506bf1bf155b3405e0f29 --- /dev/null +++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40e0430372222dd534492312ef443e7a5f6e1522b8c5c6e3f947169c9da1ebb +size 25220118 diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdc3b14f3acaaa4f8b5f0c8e093ad7aa4e396d39 --- /dev/null +++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a38db0a1a6556f0131ec4c8e9cd7127422ce43147b27e88f371c14de95a5e1b +size 25220118 diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a99897aff41591800373769372c455606f313a65 --- /dev/null +++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb6735797c3de308bc16ca32206b86e11a53193df3b3b7e4b00a4617d9b3e0a +size 25220118 diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..32d0f67ad46ccde2a5e61595f97cf397675ba2c1 --- /dev/null +++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c630784e70a267b96d3d53bc0bd511be38fe9c4f62ccb1977f97edf4b0ff9a +size 25220118 diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a003937a509859cb5c720faefb551a944fbd479 --- /dev/null +++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5a5fa9af76200111d6217ac3624e53c9becb7e360746e46aa0f5d584312a91 +size 25203487 diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1d3bf376a3345d66d8863b7827ea4b51dcd3270 --- /dev/null +++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d6acc935243f87fb597097bd4c4d522c1e7c7dca47e4a395dcd4ec7baff786 +size 25203487 diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..77b91d666846fdd7be2b7233e108cf5c11a80656 --- /dev/null +++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5304a167b6359fb73c4520ff0623820cf5fe4f59a8eb03279c34d52779fcb3 +size 25203487 diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..57eedbef6070b2654818d0bacc6777d25e1e252c --- /dev/null +++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30728e6484a5b66e9b64e0fdc73384fa894e23eb471bfd1988876d4409d483cc +size 25203487 diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fb2814e7ebace37c9fc5e264e870a19edb0ba04 --- /dev/null +++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9b389961a271e75436332200cdbb201cddfb8a6d15d0d8846abcd61e9f466b +size 25203487 diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b88309ce5ae23fc3edc6c948e1d2159357e81878 --- /dev/null +++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774d4df00cfaad33152c4143fca2e2c0e8589d5ff17a99ce74b91e820227f834 +size 25203487 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0b9aabfec173d98f4fb6702a5d25a34ec655f86 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c056e07b96a75af6cd31ab961e24fc82e49114ef0227814a5582f831012e26e +size 25187597 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..10df8cd741868ea1295ef65bd106655e44b9e887 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81fed4d21ff131d65a405e3b6f88a55a64db798a8a7f3edca27eade8db266d00 +size 25187597 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..242d34670cf852de96e992746b5bd427edcafaae --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json @@ -0,0 +1,53 @@ +{ + "trainer": { + "trainer_class": "MatryoshkaBatchTopKTrainer", + "dict_class": "MatryoshkaBatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 384, + "seed": 0, + "activation_dim": 768, + "dict_size": 4096, + "group_fractions": [ + 0.03125, + 0.0625, + 0.125, + 0.25, + 0.53125 + ], + "group_weights": [ + 0.2, + 0.2, + 0.2, + 0.2, + 0.2 + ], + "group_sizes": [ + 128, + 256, + 512, + 1024, + 2176 + ], + "k": 40, + "device": "cuda:0", + "layer": 8, + "lm_name": "EleutherAI/pythia-160m-deduped", + "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1", + "submodule_name": "resid_post_layer_8" + }, + "buffer": { + "d_submodule": 768, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..610e37fd8add9a9bb0b42fcd10818d8cfd6820be --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.362171837777803, "l1_loss": 78.93377662427497, "l0": 39.87090717662465, "frac_variance_explained": 0.9211916851274895, "cossim": 0.9494949272184661, "l2_ratio": 0.9577325889558503, "relative_reconstruction_bias": 1.0056351567759658, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.990205157886852, "loss_zero": 12.187079458525687, "frac_recovered": 0.9570405320687727, "frac_alive": 0.996337890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf82e5ac95d5f9feff8a82060edbdb7358fae753 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b752ea230a4c9f51a78ca7c08c6fbc203b092c314513ec3fb2b31cf540ef53 +size 25187597 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..436f63fb621de4fb94a041df8208c35062f885ea --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json @@ -0,0 +1,53 @@ +{ + "trainer": { + "trainer_class": "MatryoshkaBatchTopKTrainer", + "dict_class": "MatryoshkaBatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 384, + "seed": 0, + "activation_dim": 768, + "dict_size": 4096, + "group_fractions": [ + 0.03125, + 0.0625, + 0.125, + 0.25, + 0.53125 + ], + "group_weights": [ + 0.2, + 0.2, + 0.2, + 0.2, + 0.2 + ], + "group_sizes": [ + 128, + 256, + 512, + 1024, + 2176 + ], + "k": 80, + "device": "cuda:0", + "layer": 8, + "lm_name": "EleutherAI/pythia-160m-deduped", + "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2", + "submodule_name": "resid_post_layer_8" + }, + "buffer": { + "d_submodule": 768, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..255e836b5c6a70e3d9ab97bd2c53ffe51ef5a3ae --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.5154667334123095, "l1_loss": 130.4948748964252, "l0": 79.62615203857422, "frac_variance_explained": 0.9379528291297682, "cossim": 0.9607100306135236, "l2_ratio": 0.9691500952749541, "relative_reconstruction_bias": 1.0058996894142844, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.842034011176138, "loss_zero": 12.187079458525687, "frac_recovered": 0.9736035881620465, "frac_alive": 0.9912109375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9e04487e271c0612884933091c9b1923391f395 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e984acf7f7b7c1fd93666aa91cd2e32a3592025f68a89d30305b8badf3483ebf +size 25187597 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..98f116c015475956fb0dc8e7626801916bf5d7d4 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json @@ -0,0 +1,53 @@ +{ + "trainer": { + "trainer_class": "MatryoshkaBatchTopKTrainer", + "dict_class": "MatryoshkaBatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 384, + "seed": 0, + "activation_dim": 768, + "dict_size": 4096, + "group_fractions": [ + 0.03125, + 0.0625, + 0.125, + 0.25, + 0.53125 + ], + "group_weights": [ + 0.2, + 0.2, + 0.2, + 0.2, + 0.2 + ], + "group_sizes": [ + 128, + 256, + 512, + 1024, + 2176 + ], + "k": 160, + "device": "cuda:0", + "layer": 8, + "lm_name": "EleutherAI/pythia-160m-deduped", + "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3", + "submodule_name": "resid_post_layer_8" + }, + "buffer": { + "d_submodule": 768, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2e4832634e20ab973aa975afd4a2311c8eef473d --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.389582446127227, "l1_loss": 193.1514032537287, "l0": 159.14285000887784, "frac_variance_explained": 0.9577755133310953, "cossim": 0.9736721605965586, "l2_ratio": 0.9815338091416792, "relative_reconstruction_bias": 1.003865700779539, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7269490740515967, "loss_zero": 12.187079458525687, "frac_recovered": 0.9865131269801747, "frac_alive": 0.94580078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e813562ced3cd3208d6fc2d3ef09563b995a77f --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b16c1393d2ec50c976f03c6fe7f556474bc9b588f80d462df84cb607d4ae052 +size 25187597 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ba758370ba7db50104bd668df9d45c814d015a7 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json @@ -0,0 +1,53 @@ +{ + "trainer": { + "trainer_class": "MatryoshkaBatchTopKTrainer", + "dict_class": "MatryoshkaBatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 384, + "seed": 0, + "activation_dim": 768, + "dict_size": 4096, + "group_fractions": [ + 0.03125, + 0.0625, + 0.125, + 0.25, + 0.53125 + ], + "group_weights": [ + 0.2, + 0.2, + 0.2, + 0.2, + 0.2 + ], + "group_sizes": [ + 128, + 256, + 512, + 1024, + 2176 + ], + "k": 320, + "device": "cuda:0", + "layer": 8, + "lm_name": "EleutherAI/pythia-160m-deduped", + "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4", + "submodule_name": "resid_post_layer_8" + }, + "buffer": { + "d_submodule": 768, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3776c200f774dc38adbd212227ff5cb9ffcade5f --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.9152750463196724, "l1_loss": 339.17472700639206, "l0": 318.46138416637075, "frac_variance_explained": 0.9781012733777364, "cossim": 0.9863574920278607, "l2_ratio": 0.9901740713552996, "relative_reconstruction_bias": 1.0028689196615508, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.655328530253786, "loss_zero": 12.187079458525687, "frac_recovered": 0.9944809982270906, "frac_alive": 0.57763671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..efab7a0d4669efd95b0d4e3fa3a1c7192ad7f275 --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec31077dc93e01e06f3dcf61a60ef59f241cfbcefa4cd171a5bbefbc19de41a +size 25187597 diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..56abe515ec5de95c27e65875d4301cdc3c542a5a --- /dev/null +++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json @@ -0,0 +1,53 @@ +{ + "trainer": { + "trainer_class": "MatryoshkaBatchTopKTrainer", + "dict_class": "MatryoshkaBatchTopKSAE", + "lr": 0.0003, + "steps": 244140, + "auxk_alpha": 0.03125, + "warmup_steps": 1000, + "decay_start": 195312, + "threshold_beta": 0.999, + "threshold_start_step": 1000, + "top_k_aux": 384, + "seed": 0, + "activation_dim": 768, + "dict_size": 4096, + "group_fractions": [ + 0.03125, + 0.0625, + 0.125, + 0.25, + 0.53125 + ], + "group_weights": [ + 0.2, + 0.2, + 0.2, + 0.2, + 0.2 + ], + "group_sizes": [ + 128, + 256, + 512, + 1024, + 2176 + ], + "k": 640, + "device": "cuda:0", + "layer": 8, + "lm_name": "EleutherAI/pythia-160m-deduped", + "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5", + "submodule_name": "resid_post_layer_8" + }, + "buffer": { + "d_submodule": 768, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad625881a32363059977e3dcab26e216d2a20b06 --- /dev/null +++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fbfed2bd6a4e0cb3a0dc451f2f48781f0d2430dd8a2c4aa016e9912b55d8099 +size 25186984 diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6425181efeac5dbe9b7e2ab7da11fbb96a6db48d --- /dev/null +++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d5ed8467a1385130ee9ce63e034511e54e60478737435b9c3b33b62d274076 +size 25186984 diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..858a5aa7ac9f16df381aec43632b8185a9d67bf6 --- /dev/null +++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b893965a3bd9f7a065d28a233cbe1102b9d77fc20b35f392ec244e6c8d2cd0 +size 25186984 diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab36617c90a5ed07c7ed442465a5756092772cad --- /dev/null +++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99290b592d7e34ed4bf67ae86078dde28a61aa6fc71fdf36374b39d67bf577fe +size 25186984 diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4ba1077f9c51c29bd9f63d29ef34ac540374268 --- /dev/null +++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed3f186a191764d3e9af5276e8a81b2fcb888063fb4aba30423260469298228 +size 25186984 diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..33c842d4c02f7d6e4e5798aa8d04a8656bb3c84e --- /dev/null +++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7cc64def02d43e0445231babf172338c9cbce22b25bb0e4ad8b565a0636ee4f +size 25186984 diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0388c7c4f681a9828a2d48f322ff6fd7af46d9e --- /dev/null +++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3548a791eed29cf0a2545c5eb894a9637c617ecef47724b5531cd13cf3c1c783 +size 25186984 diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f30e6c2762cfe3569f236126aa78826a263b976 --- /dev/null +++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a3642060da855b1df65b37dd08dcc0f6f1f9391ff66449d11714a477f7585c +size 25186984 diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9327a36643fb52f7c600f734afaa0a3fd3e7bd51 --- /dev/null +++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8509bc8a09a9a68fc11c2a03b3780eb6ae432c55ff28991fca0c7e25fa83c925 +size 25186984 diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcb4e4a8ba997ae7caf67f5e7577fabd23671b5e --- /dev/null +++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb77b85c9327f7711a15fbdaf115caee1e78e782194db6997e17f736b89bc63 +size 25186984 diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..09f0ec4f1b1031e4489ba16eab55997c445a9021 --- /dev/null +++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807df4e63d0c206d8b4891c6ff84849c28161c13a8d7c272506f9cf3c2cbbaae +size 25186984 diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..877e66818bd007f91ebe84f664200829dd940076 --- /dev/null +++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39e81c75160b19613fb8d959f57ad63118391a2751c3d80737f86be45d79bae +size 25186984 diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..416ed7db041d6d226b2575e4c5d36fe8d1bd9a6e --- /dev/null +++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ea57ec3bd6667ade40f4f0aca0e896e14fe0a529c1594c2d7127565e7f726ce +size 25187350 diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6407795f5ac9bf164850ed6920d1d50977d1ebff --- /dev/null +++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8704d61cc34d1c88bc3b9cf8d0f7b856e8eb7c5d2c09ecd9020ec1e28c5daa +size 25187350 diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ab707689ac53da955cb258737567c8771866fec --- /dev/null +++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b84cd2a1361674efa24209db17a9bdb112e4a1b4a81c45eba205cad13722d3e9 +size 25187350 diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..93f6b7bccaf6b8189b5aa70f604438928e0b1bfc --- /dev/null +++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:920b611a07e9da38dd08b36fca39f442ccdf64aaca75604d9b6ff830b2e6db9d +size 25187350 diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4f122168592da2110a651ff11654015c9c2967d --- /dev/null +++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61ff96e653b38d80fa209cda7058a259193f736e28b979f6d4c020842dd0fea4 +size 25187350 diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..78dea2fc7145ccb41ce8e912a5ce2f13c18b8749 --- /dev/null +++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49563ce615b5df2709df8599512e69955bed2aea00eeea911c0598c4bc8a0d83 +size 25187350