adamkarvonen commited on
Commit
e8ce53c
·
verified ·
1 Parent(s): 98bb07a

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  2. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  3. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  4. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  5. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  6. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  7. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  8. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  9. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  10. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  11. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  12. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  13. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  14. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  15. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  16. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  17. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  18. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  19. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  20. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  21. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +53 -0
  22. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
  23. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  24. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +53 -0
  25. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
  26. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  27. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +53 -0
  28. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
  29. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  30. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +53 -0
  31. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json +1 -0
  32. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  33. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json +53 -0
  34. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  35. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  36. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  37. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  38. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  39. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  40. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  41. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  42. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  43. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  44. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  45. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  46. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  47. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  48. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  49. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  50. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9152cd6bbb46fef658f48645ee6e23ea164ca837939dc4c4472075f850cff6
3
+ size 25187350
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6a39fe2b27eb1705884374389839c276e3e277d48903d17482d810f46a368ae
3
+ size 25187350
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5161845a4ca972062849f1023167b1d4bf37a764a8fe8a602bd782a3fc39be3
3
+ size 25187350
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218b3717735f0cbc6bc72c5946422136f80d8b29f44ec4061edf648f2409d014
3
+ size 25187350
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d58a7a54c6e79c03326d5b013a815a3b9d2f74970d97dd8858bff2730b464b4c
3
+ size 25187350
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3c72811397cdcf999e03d2ee8955d0712917fbd4662c390ef8e3c5a43a4895
3
+ size 25187350
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4942fe15e99f94e489e241702f3604c71e544c763732a0f73ea9d06404ec12e6
3
+ size 25220118
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ecbc76304e9f267e2704d53589c7bce90f07433db6f5b828bad733d36bd358d
3
+ size 25220118
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c40e0430372222dd534492312ef443e7a5f6e1522b8c5c6e3f947169c9da1ebb
3
+ size 25220118
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a38db0a1a6556f0131ec4c8e9cd7127422ce43147b27e88f371c14de95a5e1b
3
+ size 25220118
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb6735797c3de308bc16ca32206b86e11a53193df3b3b7e4b00a4617d9b3e0a
3
+ size 25220118
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72c630784e70a267b96d3d53bc0bd511be38fe9c4f62ccb1977f97edf4b0ff9a
3
+ size 25220118
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5a5fa9af76200111d6217ac3624e53c9becb7e360746e46aa0f5d584312a91
3
+ size 25203487
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d6acc935243f87fb597097bd4c4d522c1e7c7dca47e4a395dcd4ec7baff786
3
+ size 25203487
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5304a167b6359fb73c4520ff0623820cf5fe4f59a8eb03279c34d52779fcb3
3
+ size 25203487
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30728e6484a5b66e9b64e0fdc73384fa894e23eb471bfd1988876d4409d483cc
3
+ size 25203487
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df9b389961a271e75436332200cdbb201cddfb8a6d15d0d8846abcd61e9f466b
3
+ size 25203487
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774d4df00cfaad33152c4143fca2e2c0e8589d5ff17a99ce74b91e820227f834
3
+ size 25203487
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c056e07b96a75af6cd31ab961e24fc82e49114ef0227814a5582f831012e26e
3
+ size 25187597
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81fed4d21ff131d65a405e3b6f88a55a64db798a8a7f3edca27eade8db266d00
3
+ size 25187597
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 40,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 7.362171837777803, "l1_loss": 78.93377662427497, "l0": 39.87090717662465, "frac_variance_explained": 0.9211916851274895, "cossim": 0.9494949272184661, "l2_ratio": 0.9577325889558503, "relative_reconstruction_bias": 1.0056351567759658, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.990205157886852, "loss_zero": 12.187079458525687, "frac_recovered": 0.9570405320687727, "frac_alive": 0.996337890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b752ea230a4c9f51a78ca7c08c6fbc203b092c314513ec3fb2b31cf540ef53
3
+ size 25187597
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 80,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 6.5154667334123095, "l1_loss": 130.4948748964252, "l0": 79.62615203857422, "frac_variance_explained": 0.9379528291297682, "cossim": 0.9607100306135236, "l2_ratio": 0.9691500952749541, "relative_reconstruction_bias": 1.0058996894142844, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.842034011176138, "loss_zero": 12.187079458525687, "frac_recovered": 0.9736035881620465, "frac_alive": 0.9912109375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e984acf7f7b7c1fd93666aa91cd2e32a3592025f68a89d30305b8badf3483ebf
3
+ size 25187597
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 160,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 5.389582446127227, "l1_loss": 193.1514032537287, "l0": 159.14285000887784, "frac_variance_explained": 0.9577755133310953, "cossim": 0.9736721605965586, "l2_ratio": 0.9815338091416792, "relative_reconstruction_bias": 1.003865700779539, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7269490740515967, "loss_zero": 12.187079458525687, "frac_recovered": 0.9865131269801747, "frac_alive": 0.94580078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b16c1393d2ec50c976f03c6fe7f556474bc9b588f80d462df84cb607d4ae052
3
+ size 25187597
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 320,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.9152750463196724, "l1_loss": 339.17472700639206, "l0": 318.46138416637075, "frac_variance_explained": 0.9781012733777364, "cossim": 0.9863574920278607, "l2_ratio": 0.9901740713552996, "relative_reconstruction_bias": 1.0028689196615508, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.655328530253786, "loss_zero": 12.187079458525687, "frac_recovered": 0.9944809982270906, "frac_alive": 0.57763671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec31077dc93e01e06f3dcf61a60ef59f241cfbcefa4cd171a5bbefbc19de41a
3
+ size 25187597
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 640,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbfed2bd6a4e0cb3a0dc451f2f48781f0d2430dd8a2c4aa016e9912b55d8099
3
+ size 25186984
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d5ed8467a1385130ee9ce63e034511e54e60478737435b9c3b33b62d274076
3
+ size 25186984
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b893965a3bd9f7a065d28a233cbe1102b9d77fc20b35f392ec244e6c8d2cd0
3
+ size 25186984
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99290b592d7e34ed4bf67ae86078dde28a61aa6fc71fdf36374b39d67bf577fe
3
+ size 25186984
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed3f186a191764d3e9af5276e8a81b2fcb888063fb4aba30423260469298228
3
+ size 25186984
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7cc64def02d43e0445231babf172338c9cbce22b25bb0e4ad8b565a0636ee4f
3
+ size 25186984
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3548a791eed29cf0a2545c5eb894a9637c617ecef47724b5531cd13cf3c1c783
3
+ size 25186984
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a3642060da855b1df65b37dd08dcc0f6f1f9391ff66449d11714a477f7585c
3
+ size 25186984
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8509bc8a09a9a68fc11c2a03b3780eb6ae432c55ff28991fca0c7e25fa83c925
3
+ size 25186984
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb77b85c9327f7711a15fbdaf115caee1e78e782194db6997e17f736b89bc63
3
+ size 25186984
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:807df4e63d0c206d8b4891c6ff84849c28161c13a8d7c272506f9cf3c2cbbaae
3
+ size 25186984
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39e81c75160b19613fb8d959f57ad63118391a2751c3d80737f86be45d79bae
3
+ size 25186984
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea57ec3bd6667ade40f4f0aca0e896e14fe0a529c1594c2d7127565e7f726ce
3
+ size 25187350
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b8704d61cc34d1c88bc3b9cf8d0f7b856e8eb7c5d2c09ecd9020ec1e28c5daa
3
+ size 25187350
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84cd2a1361674efa24209db17a9bdb112e4a1b4a81c45eba205cad13722d3e9
3
+ size 25187350
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920b611a07e9da38dd08b36fca39f442ccdf64aaca75604d9b6ff830b2e6db9d
3
+ size 25187350
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ff96e653b38d80fa209cda7058a259193f736e28b979f6d4c020842dd0fea4
3
+ size 25187350