jburtoft commited on
Commit
d19e6f4
·
verified ·
1 Parent(s): 54e707b

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +22 -0
  2. neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/llama/meta-llama/Llama-3.2-1B/59041c9140e83957fcc2.json +77 -0
  3. neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/llama/meta-llama/Llama-3.2-1B/96440af3992c61db1915.json +77 -0
  4. neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/qwen2/Qwen/Qwen2.5-7B/6fc75d9e400329069010.json +71 -0
  5. neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/qwen2/arcee-ai/arcee-lite/aaad74b827582e109166.json +70 -0
  6. neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/compile_flags.json +1 -0
  7. neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.done +0 -0
  8. neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.hlo_module.pb +3 -0
  9. neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.neff +3 -0
  10. neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/compile_flags.json +1 -0
  11. neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.done +0 -0
  12. neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.hlo_module.pb +3 -0
  13. neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.neff +3 -0
  14. neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/compile_flags.json +1 -0
  15. neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.done +0 -0
  16. neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.hlo_module.pb +3 -0
  17. neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.neff +3 -0
  18. neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/compile_flags.json +1 -0
  19. neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.done +0 -0
  20. neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.hlo_module.pb +3 -0
  21. neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.neff +3 -0
  22. neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/wrapped_neff.hlo +3 -0
  23. neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/compile_flags.json +1 -0
  24. neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.done +0 -0
  25. neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.hlo_module.pb +3 -0
  26. neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.neff +3 -0
  27. neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/wrapped_neff.hlo +3 -0
  28. neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/compile_flags.json +1 -0
  29. neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.done +0 -0
  30. neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.hlo_module.pb +3 -0
  31. neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.neff +3 -0
  32. neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/compile_flags.json +1 -0
  33. neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.done +0 -0
  34. neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.hlo_module.pb +3 -0
  35. neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.neff +3 -0
  36. neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/compile_flags.json +1 -0
  37. neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.done +0 -0
  38. neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.hlo_module.pb +3 -0
  39. neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.neff +3 -0
  40. neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/compile_flags.json +1 -0
  41. neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.done +0 -0
  42. neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.hlo_module.pb +3 -0
  43. neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.neff +3 -0
  44. neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/compile_flags.json +1 -0
  45. neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.done +0 -0
  46. neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.hlo_module.pb +3 -0
  47. neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.neff +3 -0
  48. neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/compile_flags.json +1 -0
  49. neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/model.done +0 -0
  50. neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/model.hlo_module.pb +3 -0
.gitattributes CHANGED
@@ -11867,3 +11867,25 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_14611821751025129524+bad9cf09/model.neff
11867
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_1798959443618413102+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text
11868
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_393045707851385331+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text
11869
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_4395987120667329067+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11867
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_1798959443618413102+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text
11868
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_393045707851385331+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text
11869
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_4395987120667329067+bad9cf09/model.neff filter=lfs diff=lfs merge=lfs -text
11870
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
11871
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
11872
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
11873
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
11874
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
11875
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
11876
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
11877
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text
11878
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text
11879
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
11880
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text
11881
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.neff filter=lfs diff=lfs merge=lfs -text
11882
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
11883
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
11884
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_e5fef74c2bea22a04c7c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
11885
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_e5fef74c2bea22a04c7c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
11886
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_15640073450153027801+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text
11887
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_17272134439322064525+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text
11888
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_17799376403849510564+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text
11889
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_18274421132528031246+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text
11890
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7645671890323173873+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text
11891
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_9669004554499106932+f7f529f3/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/llama/meta-llama/Llama-3.2-1B/59041c9140e83957fcc2.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.2-1B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Llama-3.2-1B",
26
+ "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 2048,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 2048,
42
+ "neuronxcc_version": "2.20.9961.0+0acef03a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 2048,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/llama/meta-llama/Llama-3.2-1B/96440af3992c61db1915.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.2-1B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Llama-3.2-1B",
26
+ "checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 1,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 2048,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 2048,
42
+ "neuronxcc_version": "2.20.9961.0+0acef03a",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 2048,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 1,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/qwen2/Qwen/Qwen2.5-7B/6fc75d9e400329069010.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-7B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 3584,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 18944,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-7B",
24
+ "checkpoint_revision": "d149729398750b98c0af14eb82c78cfe92750796",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 2048,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 2048,
40
+ "neuronxcc_version": "2.20.9961.0+0acef03a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 2048,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 28,
60
+ "num_hidden_layers": 28,
61
+ "num_key_value_heads": 4,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 131072,
66
+ "tie_word_embeddings": false,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 152064
71
+ }
neuronxcc-2.20.9961.0+0acef03a/0_REGISTRY/0.3.0/qwen2/arcee-ai/arcee-lite/aaad74b827582e109166.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "arcee-ai/arcee-lite",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "arcee-ai/arcee-lite",
24
+ "checkpoint_revision": "c5cb9c38be16b64757f785f0df36dca87f76d5e2",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 2048,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 2048,
40
+ "neuronxcc_version": "2.20.9961.0+0acef03a",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 2048,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 12,
60
+ "num_hidden_layers": 28,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_sliding_window": false,
69
+ "vocab_size": 151936
70
+ }
neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e97945c7aa39207e4c0573a12e7c0daf41de18887466041dae687748e11602
3
+ size 679803
neuronxcc-2.20.9961.0+0acef03a/MODULE_0a113de31eae6e0cd5ea+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8755b3ca0b48f0875b7ef368ce891da56963170f4f8b46e352718100990ee252
3
+ size 3636224
neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db744394a0c423f4f3bd9aeed587f1626c4bd8c310867545e7cf7ccc6a912860
3
+ size 371739
neuronxcc-2.20.9961.0+0acef03a/MODULE_21fb9cbd6bc31f93ffc7+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd98bac47a28a2d1126fcfca9ad2579a775033448ce80b1c0350e5702f5100c
3
+ size 9657344
neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a74c0049d74cb0cc9d967d1d7ea350ba916c89acfbfe54e18c0f36696e0c719
3
+ size 660175
neuronxcc-2.20.9961.0+0acef03a/MODULE_244d0637c526ac81b414+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e891ff7e08be3d14ee7a833ea2d46a1de88887f6e44c7d103a45b6af959964f
3
+ size 10732544
neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950d4b81fcbc51f2ee05e4b54ec72c34ed6a614a09777678ab8256ccadf4c25a
3
+ size 740840
neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe618c73e74cdd8db0825b2bc1429797edc127378610f6efc354a4275dc62f1
3
+ size 6145024
neuronxcc-2.20.9961.0+0acef03a/MODULE_3634540e2564ed96b5da+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:160f04d4186c56abdcafb441db29b46e6a05932c324492e3b7e8f82c35db298a
3
+ size 6339591
neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d391b1d7dbf4f034f49282f26255219468054f0a8333e9e7b2990521de02e2a0
3
+ size 388806
neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da7c1e95569880d633b22e37a0a3595d9d7b5512a76703ee264afc7a406f047
3
+ size 2335744
neuronxcc-2.20.9961.0+0acef03a/MODULE_48e1dbda70c7b45843ee+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:106b77e6679e9a0a7723043bf22964feffa0545658a41eccf5953697cb730d34
3
+ size 2407399
neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03509621db932b2ad273e9eb6035e68292df17c21e9eb99d8f09d22c5cc22926
3
+ size 69007
neuronxcc-2.20.9961.0+0acef03a/MODULE_52f62927e2aef61c8a2a+ae6a382b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f595fde6155be56d50d6da408103d6a443f33bc13593725934a650d1f79c547
3
+ size 1352704
neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a026f3c0afa9452f8e28cdfbad2e7e345e697bb30fd481c536111993679da0
3
+ size 201977
neuronxcc-2.20.9961.0+0acef03a/MODULE_7e3a1a26312c81c65e8f+ae6a382b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e92eb3fef01485419a56e993bff077ed797984730707b5730e493864719624
3
+ size 6554624
neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b3c5395792211a3272bef80039b605b36847cd4a86941f65c417d19b17a1b4
3
+ size 343143
neuronxcc-2.20.9961.0+0acef03a/MODULE_a7bb15d22b2a18c55870+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5776d9905351a073af4bfce4191a08faa6247cece91a4368db3e10ab67929cd2
3
+ size 20204544
neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3df471f965045c386c6e8db1b96baa6e68fd5adbc463b3ff4ee9e534efee4e
3
+ size 71540
neuronxcc-2.20.9961.0+0acef03a/MODULE_b42ea471c6d81a15b0b3+ae6a382b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d54349cbc656b3fae72730ffaeadb9a4d2ed25de24bb152402b20385fd1dd0
3
+ size 2663424
neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options='--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27fc08e1e4d7a8468ed18e98af82fedc774d2582c19d7152128f4ce8b8e3d2b
3
+ size 177861
neuronxcc-2.20.9961.0+0acef03a/MODULE_d22c269383bdc0775b95+ae6a382b/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55bd9e2f6f0168634b093fec4cfc765432ecb8c69add8da6f44065ee3f22cdf4
3
+ size 1557504
neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.20.9961.0+0acef03a/MODULE_d48baee12d51947bcd73+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e82cf9e4b37a6d040b934497f5bd95de4d2bc09d8d1fd9321190908082ef08
3
+ size 417093