diff --git a/.gitattributes b/.gitattributes index 7c5252018c7e2f9dd87c5f2e7469223715384ecf..edb08d91aa9974515a43ce0e1cad50fef8ffd04a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10715,3 +10715,113 @@ neuronxcc-2.15.143.0+e39249ad/MODULE_dd7fe4a299fa94e55211+39f12043/model.neff fi neuronxcc-2.15.143.0+e39249ad/MODULE_f275897da2e206eb43ea+39f12043/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.20.9961.0+0acef03a/MODULE_c9338aa2ab68700f103b/model.neuron filter=lfs diff=lfs merge=lfs -text neuronxcc-2.20.9961.0+0acef03a/MODULE_9bfa84a5867ce08f6ac5/model.neuron filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json new file mode 100644 index 0000000000000000000000000000000000000000..fd12ff811849aae9ed0997f872dbd528b2fe8868 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json new file mode 100644 index 0000000000000000000000000000000000000000..f5dade93cdada572b9ae2418da295edbc2c88b4a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json new file mode 100644 index 0000000000000000000000000000000000000000..83d8cea621fad22d8b7bcdbfdcdcc4e4a7d90ef5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json new file mode 100644 index 0000000000000000000000000000000000000000..c03a02387a9ad638fba287be5c55b0b6ab479626 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json new file mode 100644 index 0000000000000000000000000000000000000000..f12ee49f0dd693f05ed6605f1374f20ae8b54156 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json new file mode 100644 index 0000000000000000000000000000000000000000..ee74469435dceb0bb6eedd0cb763c23d789634d2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/d06188fe8e87222dbd8c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/d06188fe8e87222dbd8c.json new file mode 100644 index 0000000000000000000000000000000000000000..cd67355cd94837ce784f3efc433cea2eb407c578 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/d06188fe8e87222dbd8c.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a338625e976e1cdcce16.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a338625e976e1cdcce16.json new file mode 100644 index 0000000000000000000000000000000000000000..08f34e58b60afd5e6cc18ce44ea17587f8824d46 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a338625e976e1cdcce16.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a7b0618b75c2e747c4a7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a7b0618b75c2e747c4a7.json new file mode 100644 index 0000000000000000000000000000000000000000..0de9629205460521b9f9a44c961d5ea759d10621 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a7b0618b75c2e747c4a7.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json new file mode 100644 index 0000000000000000000000000000000000000000..87b1a027a96e5d8783e3437f85a65c62d0430ed5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json new file mode 100644 index 0000000000000000000000000000000000000000..dda88db0d6348e1b20c343de4e62099675fec141 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json new file mode 100644 index 0000000000000000000000000000000000000000..927e0bb44c095f35b9794af8102ac3260e35909e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json new file mode 100644 index 0000000000000000000000000000000000000000..a6252a542b8a3c778fb48ee53e9cccc08dd60178 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json new file mode 100644 index 0000000000000000000000000000000000000000..7aef121f689ea9a2b0baad3b9faecedcb8b7d187 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json new file mode 100644 index 0000000000000000000000000000000000000000..2e4e1b57d7ce1a7ea7a190593c99e499c675de94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json new file mode 100644 index 0000000000000000000000000000000000000000..c7b80fade1d5f71cc6e3afa870009d600a106d1d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/51d387c8436423c80830.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/51d387c8436423c80830.json new file mode 100644 index 0000000000000000000000000000000000000000..7bc0b1dd7d30e4d0132a8fd865db3263f131f897 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/51d387c8436423c80830.json @@ -0,0 +1,94 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 32768, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 32768, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/edef092decee45a6b77d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/edef092decee45a6b77d.json new file mode 100644 index 0000000000000000000000000000000000000000..8a386be0abf9c5930098e94bc390f0ed2bcd3d86 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/edef092decee45a6b77d.json @@ -0,0 +1,94 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json new file mode 100644 index 0000000000000000000000000000000000000000..7e41d87b085735c84f1b1cf427bae022b6786e2a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json @@ -0,0 +1,76 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json new file mode 100644 index 0000000000000000000000000000000000000000..139fd609e49ee66e03c6455833fea0358d0b1621 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json @@ -0,0 +1,76 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json new file mode 100644 index 0000000000000000000000000000000000000000..046484e8b62a7a469f92a6a5fad4b5ca3904d582 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json @@ -0,0 +1,76 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json new file mode 100644 index 0000000000000000000000000000000000000000..f4195ac7167b4e9d3da781f879c6c59cec2547e2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json new file mode 100644 index 0000000000000000000000000000000000000000..907b458f60ec0d45d3ad57b038b33075a545221f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json new file mode 100644 index 0000000000000000000000000000000000000000..a4723b6b61b298ede97c3a72e7cb32b25170b919 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev4", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json new file mode 100644 index 0000000000000000000000000000000000000000..dbafcc03048a833e143b779df690888d97829a7c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json new file mode 100644 index 0000000000000000000000000000000000000000..1980ca61437357627b75dea01d99dc35babe62e8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/0cfa36114f700208376a.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/0cfa36114f700208376a.json new file mode 100644 index 0000000000000000000000000000000000000000..b20a4e6fa0c97c44ebb24bcf39ac9baeaf91b4c1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/0cfa36114f700208376a.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/1144207432b18b97200f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/1144207432b18b97200f.json new file mode 100644 index 0000000000000000000000000000000000000000..f7a7742c1510988f0698e132643a0b46247bb463 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/1144207432b18b97200f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/132e478ab06dfd6f996e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/132e478ab06dfd6f996e.json new file mode 100644 index 0000000000000000000000000000000000000000..332595c9ce238c9a0fa6425f1cf93826a344a7aa --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/132e478ab06dfd6f996e.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/430083c6aa3b306e22e0.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/430083c6aa3b306e22e0.json new file mode 100644 index 0000000000000000000000000000000000000000..bb0f0e32c35597b6a87a3d17ddbbbd6408f7ae23 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/430083c6aa3b306e22e0.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/53e5296dda9b797224f5.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/53e5296dda9b797224f5.json new file mode 100644 index 0000000000000000000000000000000000000000..cbfb11cebc5414fc6f3cd641a65d4c26c2b82551 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/53e5296dda9b797224f5.json @@ -0,0 +1,67 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json new file mode 100644 index 0000000000000000000000000000000000000000..e572c04e5a95ab0bbd0f54e6443cbce69b7ee62c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json new file mode 100644 index 0000000000000000000000000000000000000000..b4f9c859c2b506bd7ba73496a1fe9fa37d2e69c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0042866b0b29ca346a04.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0042866b0b29ca346a04.json new file mode 100644 index 0000000000000000000000000000000000000000..48b1b9ef57282cc2974726465938d0d615d2dce2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0042866b0b29ca346a04.json @@ -0,0 +1,68 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f19cc6cfed252cbe80.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f19cc6cfed252cbe80.json new file mode 100644 index 0000000000000000000000000000000000000000..dd349466da5e02bf8190cb250db2011d26b9634d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f19cc6cfed252cbe80.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/235e6b665a70c7f3b88b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/235e6b665a70c7f3b88b.json new file mode 100644 index 0000000000000000000000000000000000000000..dbc5bc21910c466bfa4a70f45b26a531ebcc1f94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/235e6b665a70c7f3b88b.json @@ -0,0 +1,69 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/54998903c673b03ab682.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/54998903c673b03ab682.json new file mode 100644 index 0000000000000000000000000000000000000000..0aeb79614dd14071d6e0df331a1886663c5a6e5d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/54998903c673b03ab682.json @@ -0,0 +1,69 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/559712f03e12a7d3db9d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/559712f03e12a7d3db9d.json new file mode 100644 index 0000000000000000000000000000000000000000..ebe26fccacffe900958631ec2bac3e8d1ff8d11c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/559712f03e12a7d3db9d.json @@ -0,0 +1,68 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/62c9b979338956707306.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/62c9b979338956707306.json new file mode 100644 index 0000000000000000000000000000000000000000..bb4f591bbaa1cf67190ed09d1fca5b8c28086360 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/62c9b979338956707306.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/7b9ae8b155ce16ab1f81.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/7b9ae8b155ce16ab1f81.json new file mode 100644 index 0000000000000000000000000000000000000000..e3d1165597e7a9af044a31041c63272289cd05db --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/7b9ae8b155ce16ab1f81.json @@ -0,0 +1,68 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/8eae465f9a28e71d02c7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/8eae465f9a28e71d02c7.json new file mode 100644 index 0000000000000000000000000000000000000000..8de11dbf80a1a6d418046375014b5513319660ac --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/8eae465f9a28e71d02c7.json @@ -0,0 +1,72 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/94b3194811206ba2227e.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/94b3194811206ba2227e.json new file mode 100644 index 0000000000000000000000000000000000000000..7367d771b8d81f46465f9fcceb4033b65f0014a3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/94b3194811206ba2227e.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json new file mode 100644 index 0000000000000000000000000000000000000000..77fe8af0483de1817d078cf7b80ce5fd70b271a8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/c6f1e5861bd12b93b78f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/c6f1e5861bd12b93b78f.json new file mode 100644 index 0000000000000000000000000000000000000000..c96487bd98a1691f50a0e6f672eca2e759a3f15b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/c6f1e5861bd12b93b78f.json @@ -0,0 +1,69 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d03410f237213137456b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d03410f237213137456b.json new file mode 100644 index 0000000000000000000000000000000000000000..8ea84900fd7cabf162f42aece5a3e83d35469d5b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d03410f237213137456b.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e15ce75e921fd9551605.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e15ce75e921fd9551605.json new file mode 100644 index 0000000000000000000000000000000000000000..de13124c18e8636d136067f187da9aeff330869f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e15ce75e921fd9551605.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f70dc623c263d5d225a1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f70dc623c263d5d225a1.json new file mode 100644 index 0000000000000000000000000000000000000000..48f487c1969b9da23a0ae7a6e6a6828aa4e3146d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f70dc623c263d5d225a1.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f98ea9d9fe79ee8c6c52.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f98ea9d9fe79ee8c6c52.json new file mode 100644 index 0000000000000000000000000000000000000000..940e949b585e01c64143ee2e12dc426948fac121 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f98ea9d9fe79ee8c6c52.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json new file mode 100644 index 0000000000000000000000000000000000000000..8517d13d2422c5aad6b8aed073178cd6c423b361 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json new file mode 100644 index 0000000000000000000000000000000000000000..14482d4628cdb44db3c81944fb20ac91309c5ed4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/a112f725c89793c1c195.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/2ff0e00d254551dc9f8f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/2ff0e00d254551dc9f8f.json new file mode 100644 index 0000000000000000000000000000000000000000..a69ece6b27cf2be2588c9f48109b6e728d3e82fb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/2ff0e00d254551dc9f8f.json @@ -0,0 +1,68 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/73ff888b74fe5e8b202b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/73ff888b74fe5e8b202b.json new file mode 100644 index 0000000000000000000000000000000000000000..6a8a08b93fc137337363428f3572508d33ed6f53 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/73ff888b74fe5e8b202b.json @@ -0,0 +1,70 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/94c6857ebcf2f229024b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/94c6857ebcf2f229024b.json new file mode 100644 index 0000000000000000000000000000000000000000..59ff6c25090b5f71fad0d98f000dd6f3df1a6a54 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/94c6857ebcf2f229024b.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/9c4d80cc33f2dbb825ac.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/9c4d80cc33f2dbb825ac.json new file mode 100644 index 0000000000000000000000000000000000000000..b77c0e6b27d799ab2177a0f6f1a90b11ef32868c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/9c4d80cc33f2dbb825ac.json @@ -0,0 +1,67 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/e55bcb903e51e66312d2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/e55bcb903e51e66312d2.json new file mode 100644 index 0000000000000000000000000000000000000000..73aa64fdc3947161e335ea82234213dd56fd5e60 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/e55bcb903e51e66312d2.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json new file mode 100644 index 0000000000000000000000000000000000000000..cd532485caeb751c5b621708323662038216c604 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/9a2b918af52c9bfa3d18.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json new file mode 100644 index 0000000000000000000000000000000000000000..d032e0ed068694ff383f074bbb31e3f10590d43d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/phi3/yujiepan/phi-4-tiny-random/ec6c870f3d2f7c1e202f.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B-Instruct/b82f205ce639617b2486.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B-Instruct/b82f205ce639617b2486.json new file mode 100644 index 0000000000000000000000000000000000000000..d0d0f0a8cc154f2112e46ce943a454da7de7a979 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B-Instruct/b82f205ce639617b2486.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B-Instruct", + "checkpoint_revision": "7ae557604adf67be50417f59c2c2f167def9a775", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float32", + "tp_degree": 24 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/1001cb40169f602f416d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/1001cb40169f602f416d.json new file mode 100644 index 0000000000000000000000000000000000000000..61c9f1c5ddabffbce762c1eafd1c6a0d3a6d2353 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/1001cb40169f602f416d.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/149443e088fd21e13be1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/149443e088fd21e13be1.json new file mode 100644 index 0000000000000000000000000000000000000000..0c311a975a55173bb8b1652fc9bcd21e85921db4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/149443e088fd21e13be1.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/61bfd675ca3b6db5c3e1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/61bfd675ca3b6db5c3e1.json new file mode 100644 index 0000000000000000000000000000000000000000..791d9e44cea03b0bf62b18706c8b70325af4f226 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/61bfd675ca3b6db5c3e1.json @@ -0,0 +1,84 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/69772d57962d7e14e1de.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/69772d57962d7e14e1de.json new file mode 100644 index 0000000000000000000000000000000000000000..69457250ec6d7db3941c34b81883abeacdc71995 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/69772d57962d7e14e1de.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/70af726a0b0d8d549022.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/70af726a0b0d8d549022.json new file mode 100644 index 0000000000000000000000000000000000000000..b9b4eea993604882391d32ea8987269dfc621581 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/70af726a0b0d8d549022.json @@ -0,0 +1,89 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/7cc4248409d33bf1789c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/7cc4248409d33bf1789c.json new file mode 100644 index 0000000000000000000000000000000000000000..85621b4687c7e1d558db49d458f2e0bef8baaf8e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/7cc4248409d33bf1789c.json @@ -0,0 +1,88 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/86b00083a46fd91feecf.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/86b00083a46fd91feecf.json new file mode 100644 index 0000000000000000000000000000000000000000..563e4a9830d9c74b57d9f5a9fe959fd4b7e61673 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/86b00083a46fd91feecf.json @@ -0,0 +1,90 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/9f5467601278d23fa26c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/9f5467601278d23fa26c.json new file mode 100644 index 0000000000000000000000000000000000000000..3777edb03fe31b5d9bc596f3486a7ac66280ad42 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/9f5467601278d23fa26c.json @@ -0,0 +1,93 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/ccb2fd63bba05f247c75.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/ccb2fd63bba05f247c75.json new file mode 100644 index 0000000000000000000000000000000000000000..6a2dfc888c983ae40440fbded4027878245a7c1c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/ccb2fd63bba05f247c75.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/db688c6b524b86487e7f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/db688c6b524b86487e7f.json new file mode 100644 index 0000000000000000000000000000000000000000..e1379fb375cd318d522f08ccda395dff286be42f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/db688c6b524b86487e7f.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/f672420258867c52f0e9.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/f672420258867c52f0e9.json new file mode 100644 index 0000000000000000000000000000000000000000..c040d1c242353807e702afb04aeeb20cdc58f88c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/Qwen/Qwen2.5-0.5B/f672420258867c52f0e9.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json new file mode 100644 index 0000000000000000000000000000000000000000..ae9da4d5a952d707a1b2494323293162086a5ac1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/92c163c890a351c20ef2.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json new file mode 100644 index 0000000000000000000000000000000000000000..813de212ad71ec43d9f08e50877d26cea0fc100a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/c40c1f41852a249bf072.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/28d961eb1ce9c8c0cb77.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/28d961eb1ce9c8c0cb77.json new file mode 100644 index 0000000000000000000000000000000000000000..238057b2db759f843590afb20974de5b8ca95ace --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/28d961eb1ce9c8c0cb77.json @@ -0,0 +1,95 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/74e47bd5c123873a87bb.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/74e47bd5c123873a87bb.json new file mode 100644 index 0000000000000000000000000000000000000000..e8f84a16e0d7bbcccd27ae39782a2dab23df4db3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/74e47bd5c123873a87bb.json @@ -0,0 +1,96 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/97a156beffd6dba83aac.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/97a156beffd6dba83aac.json new file mode 100644 index 0000000000000000000000000000000000000000..a3089cf2c27ef1a14feaedefe689652e5218a53c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/97a156beffd6dba83aac.json @@ -0,0 +1,92 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/e2e6df4e0d01752ba5dc.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/e2e6df4e0d01752ba5dc.json new file mode 100644 index 0000000000000000000000000000000000000000..5b5a4ac587eded3163c40830a9d24bcc7f636830 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/e2e6df4e0d01752ba5dc.json @@ -0,0 +1,89 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/f0a00c1fd3f5965be402.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/f0a00c1fd3f5965be402.json new file mode 100644 index 0000000000000000000000000000000000000000..26cf3cf68e7cfca6cadc253d12d954baa46c9f7e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3/Qwen/Qwen3-0.6B/f0a00c1fd3f5965be402.json @@ -0,0 +1,93 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-0.6B", + "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json new file mode 100644 index 0000000000000000000000000000000000000000..48f54fdeaff316647d40a8cc9e63ed9e515af356 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/1e59d16658e0e31e411c.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json new file mode 100644 index 0000000000000000000000000000000000000000..0aac5df9b0d624ffb646a23d3208023c41daa49b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/398949106549ec3188cd.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/2463293535fc22b31c01.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/2463293535fc22b31c01.json new file mode 100644 index 0000000000000000000000000000000000000000..de83488628fadad260c95a4b3dca01185101a541 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/2463293535fc22b31c01.json @@ -0,0 +1,142 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "1c00fc78bd9cf90108046bc433cb34992480f1c1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/2c82c86db68352b22fbc.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/2c82c86db68352b22fbc.json new file mode 100644 index 0000000000000000000000000000000000000000..6d54e00360460874633c377c489e03f2913cadcc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/2c82c86db68352b22fbc.json @@ -0,0 +1,139 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "1c00fc78bd9cf90108046bc433cb34992480f1c1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/3a9131ad50b2885c33f7.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/3a9131ad50b2885c33f7.json new file mode 100644 index 0000000000000000000000000000000000000000..4df2c5f90ee0e9faf178e566c83966f8e5e6c66e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/3a9131ad50b2885c33f7.json @@ -0,0 +1,136 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "1c00fc78bd9cf90108046bc433cb34992480f1c1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/7cf30abbc0e808430a49.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/7cf30abbc0e808430a49.json new file mode 100644 index 0000000000000000000000000000000000000000..87901f64d1afa08c3cfb6bfce15aadb54c38ca09 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/7cf30abbc0e808430a49.json @@ -0,0 +1,143 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "1c00fc78bd9cf90108046bc433cb34992480f1c1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "qkv_kernel_enabled": false, + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/b1b32acc3589021da626.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/b1b32acc3589021da626.json new file mode 100644 index 0000000000000000000000000000000000000000..3e2bd7cce8c23cd61b9802f7910a1a8a8bfa7448 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/smollm3/HuggingFaceTB/SmolLM3-3B/b1b32acc3589021da626.json @@ -0,0 +1,140 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "1c00fc78bd9cf90108046bc433cb34992480f1c1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev5", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/818a7a532a97af6efe8a.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/818a7a532a97af6efe8a.json new file mode 100644 index 0000000000000000000000000000000000000000..86780ecaa3f2eed8a31a9df702aa9c33cd03c4a4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/818a7a532a97af6efe8a.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/4042e0b5aa52d546c37f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/4042e0b5aa52d546c37f.json new file mode 100644 index 0000000000000000000000000000000000000000..d60cd86b29dc25baf1e22efcfff7374cdb0cae54 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/4042e0b5aa52d546c37f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/4bd7f8e0c01deb46ba79.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/4bd7f8e0c01deb46ba79.json new file mode 100644 index 0000000000000000000000000000000000000000..9c576642fead90c0d394d3c4ef01b5f9245a2256 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/4bd7f8e0c01deb46ba79.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/5882f2cc50223918592f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/5882f2cc50223918592f.json new file mode 100644 index 0000000000000000000000000000000000000000..2ac2f637e64b880de6b08414d2a4901639d78c1e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/5882f2cc50223918592f.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "n_active_tokens": 128, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 128, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/a70dd044fd7d561d10b9.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/a70dd044fd7d561d10b9.json new file mode 100644 index 0000000000000000000000000000000000000000..43a737a60c4631ff1ac61ac91617464903cdf023 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/a70dd044fd7d561d10b9.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "n_active_tokens": 512, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 512, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 1 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/aac31f1235c58084cfb4.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/aac31f1235c58084cfb4.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cebf8d9a629f2e4eb9ad61c939b5e983658b0d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/llamafactory/tiny-random-Llama-3/aac31f1235c58084cfb4.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/15750e03022435b7ad98.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/15750e03022435b7ad98.json new file mode 100644 index 0000000000000000000000000000000000000000..9d60b2c3391a20857fa3b6d7829a94655d563833 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/15750e03022435b7ad98.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/3cdfc62041f90379cae4.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/3cdfc62041f90379cae4.json new file mode 100644 index 0000000000000000000000000000000000000000..20524e8f0e04e114e521dfc0a9c407b210b71592 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/3cdfc62041f90379cae4.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/4ffce14c3e4b5833461b.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/4ffce14c3e4b5833461b.json new file mode 100644 index 0000000000000000000000000000000000000000..bb1907ef36dcda2ed746ab01c93da32e31054a1b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama/unsloth/Llama-3.2-1B-Instruct/4ffce14c3e4b5833461b.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/53100a81701818c9be88.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/53100a81701818c9be88.json new file mode 100644 index 0000000000000000000000000000000000000000..1f04ded78d7c6424dcd43f445726fb47da0302b7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/53100a81701818c9be88.json @@ -0,0 +1,221 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c958eda2360716f886e4.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c958eda2360716f886e4.json new file mode 100644 index 0000000000000000000000000000000000000000..0136042f9dd3484ebd68c867f612c44cd726b085 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c958eda2360716f886e4.json @@ -0,0 +1,221 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/tiny-random/llama-4/585b1c34b9a47c8c0a73.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/tiny-random/llama-4/585b1c34b9a47c8c0a73.json new file mode 100644 index 0000000000000000000000000000000000000000..40a2fcb344d9c4c7ec8da973f3b6df1f2f903285 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/tiny-random/llama-4/585b1c34b9a47c8c0a73.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/mixtral/dacorvo/Mixtral-tiny/5f857d9157904414fa94.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/mixtral/dacorvo/Mixtral-tiny/5f857d9157904414fa94.json new file mode 100644 index 0000000000000000000000000000000000000000..7b8966b58df25e84cfa06f13ecf1fc7ce244d86b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/mixtral/dacorvo/Mixtral-tiny/5f857d9157904414fa94.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/phi3/yujiepan/phi-4-tiny-random/0a7bdba3ae715b1104b0.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/phi3/yujiepan/phi-4-tiny-random/0a7bdba3ae715b1104b0.json new file mode 100644 index 0000000000000000000000000000000000000000..05fa40a980ed03cd18b7cde964ae5ea197536a06 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/phi3/yujiepan/phi-4-tiny-random/0a7bdba3ae715b1104b0.json @@ -0,0 +1,60 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/2480bb1f2c8c4a3a4264.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/2480bb1f2c8c4a3a4264.json new file mode 100644 index 0000000000000000000000000000000000000000..595ff7cfbc7f926910e34f1f9e880e6877369f5d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/2480bb1f2c8c4a3a4264.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/73f0e7212f3cdfc3cc65.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/73f0e7212f3cdfc3cc65.json new file mode 100644 index 0000000000000000000000000000000000000000..9c554bdb4d521a7a27d029717cdeb341fa4e526a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/73f0e7212f3cdfc3cc65.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/f2b9d1e266275db9dc7c.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/f2b9d1e266275db9dc7c.json new file mode 100644 index 0000000000000000000000000000000000000000..135bf9979c026cd5f533ca70f9566efb77b907d3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/Qwen/Qwen2.5-0.5B/f2b9d1e266275db9dc7c.json @@ -0,0 +1,83 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/e271d5a60f8605a46324.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/e271d5a60f8605a46324.json new file mode 100644 index 0000000000000000000000000000000000000000..cfdc2a5cdb2e81d8f9288dcb7d4183ff7d357f67 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen2/yujiepan/qwen2.5-128k-tiny-random/e271d5a60f8605a46324.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/2cb35a1a6e81559d0dc1.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/2cb35a1a6e81559d0dc1.json new file mode 100644 index 0000000000000000000000000000000000000000..b5b261a178941bd3fcf025bc7a9a5fdfaae1e621 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/2cb35a1a6e81559d0dc1.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.hlo_module.pb index ff2add9228e02876f3420c76e3459481645507f4..782b82bf9a4a4ebf93378bc32956f4660440a70a 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4b6d089df781e3ab0dfe911812cbb200f2dc1d358cc2d49abd01f7222601b25 +oid sha256:52e2ac447c53f64b1d6c2ffa89ba1c66587cb89fa087c23a7f9e266d3953fca4 size 497603 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.neff index af08051da20c68315921439b2e9724d9fbe8630e..fd9e391547b485b90e4741b8eb08a9113ef01a14 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9cdaa16cfce3a4b868994f3a379ad2cc60becae6b12accc37ba0c6e7b5b89ad +oid sha256:7d38dfb588e27e16a260ab31ca8eff6551c9655002dbe77ebf52baf8f9396af8 size 31345664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..53a9d64f53d0249d49fc89061208951aeb04d676 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7dae1ac13e262e1a7f9472b6a31e792e9c78b1214495916d9e83666910cb83 +size 567258 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7ac84ec41a091402fff380bd0e60fa9d8922ed8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6d4d1dc08dd89019640186d5368b1a308f0bd285a59f3f2e2a97e2bfc50b89 +size 14961664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e823b548ff4a25e5aa182a3bfa868a05cc5eb44 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c63a63d4f561b4fc6ea1377d3430f999ebe0df743b56092d0c3af2dee81cc1 +size 81847 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a8e8989c4a42dae3594cae5cd35447898653bc12 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ccfee2ee8f6b88953e50ad3be95f533c690497ca155745a86d03acf0c844b5 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d74e0273370c25beb8c5a2ae29249a7bde7c7a49 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754dac85dc7c9ebbf2b62320d22d0a7bda6c83ce940930516311115bf0dca9cb +size 74753 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e2b899182aac2484da441b64207a2d47a56e5a27 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132cf771557e2993484f5217ea9b98f08e244adfd78a5a5d0b09f86bfe2b7d9d +size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0872920689da856579af29c259aed4fe69d1352d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25906d7457ccddc5ebffe556434461a7e222ecf7fd71cb060099e05cd9f90ba2 +size 316031 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..51723992d01807c9bc97d9b00d82c9c8ef974b94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beff4a7ab7f70afbb24a85c88ce24e5bc5cfae6de236e3f9686176defedd5222 +size 81016 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b9b9fed467f794f14cc7c8b744794e221262f21 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc275f95550b0e1be60dc7ed4bbc81233eab5133b3b0af0f576bae41fda91887 +size 297984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b0b587cdc952b98ba9c0d4e77d4fc6442fe857e0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62347f65c836de46507f6affd655cd1f15290142541de20acc56feac1fcef987 +size 308436 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..39c2c734c0c9a1cf027e392c557504b6f5a34ff2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a42e5d8e63342a8132a9098c06cceb024a7467006f87a370d54b32c22639217 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f89153b1953a4299dfe4f0ec2ce50fb52e5fab05 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10118825694329555156+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e50431e6e3ec3e6317acc7e994afa9cf40ee1210 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86311f9f0a00aedc439bf6b1a7e72130115305331deaa6d802b2852f929f1b25 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19703429879a7f3bc5d59e7b5079982adb34d518 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10149811978412508127+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7fcef3c41c0d6edeb3832dd09ad315e34d1c3b05 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3772c9b53cf2c833d0d1c07c90e6b645617324394ad76cd3936137a547c93adf +size 1144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ec3109636bd5d62ae10022a8f5031cda60cfbf0 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10449065838075029877+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e03ec74b4e69de91c04658dca659380a83905b03 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d7647959b5592dd250b2a9f5c68330b6f6bca38bd34f9fb881d2958e03f881 +size 83591 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b35d7b683f8a0782b0ab61cd27129d580f5af38 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e76aff9b7791f1118cf7da468310c32943d4a8a828a53d19775178041d8deb +size 707584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bc7dba47c95bebece02d0288cd8935858c400394 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae138f6fc3e7b0cb4fdf2453c4cd8579723d427253d4f54a890b46ff7e1df6a +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e2d87dbe4210d24e144091592315aa159015a48c Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1054760819228113117+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..262465c49748bb91f8bc2cacb35de9ccb0cce94c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48cdf21fbdf4fdcb05b1a22e95aee811e40354d8145c54198b487140629ca097 +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d28fe3a295751f72cdec335e8a2040ecc21a2f35 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10665707039599941067+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3bbccb1c0369f1c21fb06543a454b91e3c62a475 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac431d89ac5d753c6c7948e2d74f830fde1dab9bf7d7cc89d1878ad7a29689a +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cd31b5d2291ed05782d345c505f4be131938aeea Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_10855235522929658999+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ccd2f94f5984921811e989fbb95e83d9518803ce --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3e5d8a329342c24bbba2d50035e229e31a2376adb622342e9d86956cd7408a +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e7a66ae2692072c7206a3a84b8050a92ca6be1be Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1090664928465068038+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4ddaad102731278721b6f783da50c45d424260f1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a896dad3fa2c0fa6a2db99ab90c4b5edba1ae9ccb09ee00f41e99c511b14541 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..da7c433129dedcbc0a0c53169537632b5e9d0c4e Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11202137614579807680+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2cf74a4202b1ae1308ad55ccd45438c72e8c1749 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092b80157d518351dfb8d5dd0c0b1dde38d23e902bee644d29f7666f5d3f4892 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1fc47775867e1ec9a550692914354285691c624f Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11277182249750227945+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4edd5d9a069278486ab901ecd35a433607127fe1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80b2fbc4b8ef776e0b5bc8f2a97a91e3775a1451e3b54eec5a1bcb8b37460595 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..25bd96ef0478d5500baf6d1a039c638093bb681a Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11434367459596307593+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b102a1d0de592826766b33432a3aa79162be57f1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3428f502a287da892ae564e12a0364d236abb2b133c6a7d79b419f19582c424 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..27e2704fef8d14dd88b7a4326e6cf9a713b1948c Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11620898142083431840+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78b7516c43435cb9705f91c72e2d3fcb300b3da8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e7bb1a2ae698c9cb728cbc2763b153dd03fa2b5747fbfb418d4989c502a885 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4ee52cb351bf48393554fffb5e143662e5add8ed Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11819691844502964612+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bba865dbec7e95ea91468c2d5a97c49df528617 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50844827e4f02bbc6f1a8f5cced9c8cb36e9855929bdc3fd3d5d5b1491ff2fee +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0df361c37c9d06191ca712fd756bc16a90ac3e27 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_11949607074455821922+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e512d9b350f71d61580af9624c8e618457f80f6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dac28e3f7ebe7579d25eedc525ab122ad6859dcd448c798929190baa40b7b67 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b7cb0d7bb4fa52abf86ba0d64e3198d02cef0df4 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12291785342055497735+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..86186414e79e4a5c24b2629e86e20de265e29b46 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a3873aec9c3d0922d30abefbf62110abe7dc6b32896dbcd91c23420428ece2 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..76ab8878677379e8a1ec80e65f3563c7acc09a2c Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_12858209690589865209+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..906bc6541e7964254a83672ac6149f3c99f7e406 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab3f9c95ee2facf77840c5a7d904d47fa5a46a3357a9d58ac551123c76dcd8f +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c34c852675a9a8faf47c344c2361e882bb3fe2e2 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13298849504190979618+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f283430fe535f609f836db5e2010a8f5441f690b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc8ff919893cccee58620b063b799fbf3548c59f0c1d6840e4e1391d9c31a5e +size 2861 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7cff05637a8bf7e97027d0ec918592ef01bd11a6 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13708481322180332252+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5b665fd26512856f41d9ff5f4c28cdc774168db2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead735220105699b2334a57958d25734e9bd86cb5ba67dad31e66e9c3ab5d204 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..124407dbd7717c8483a52240ee469262f15ed4ba Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13837596457087810475+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..62e96717329b0f35eba8936a3baf1c3da9e38f0e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f6ad81d3bb99d5b3f0fde132e512790135e2299ae8666dd544405693fd15df +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..71e44888e6a1f6c7c6caefacd65ac4a30a37e8d5 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_13856630914759037234+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dcb949f8c35940d8f604c37d029a7419395cb738 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719f8aa3529c519d75245493496aa89e47dc178486a6333dd3d93a573d444f9d +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4854186dd84ba90e3bad6922f35c0d6ecd3cb072 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15261586862706909689+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a1fb038ac2a04f0a549497dca45b236bf444fb28 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7b797e842693cce6c65cbe434bd9d3e09bf967a3623fff8b14ba5a1bfb0590 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4558c5215051e79dc812d7ff44b036622655d2fc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15294796227089169965+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14aa67860cc05058197eb0d397a115fb67ef92b6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8910c0e31207508f5190f2b1bada1d933456fe8cba5adae89ee1c0f6fb8ab34 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ac153364ade2c08a03377bbb97c0dd230d974890 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15383404534416318704+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1f615ae21bde0e83462d58e60e93cc7036fa4311 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b453d9412b470017be7adc71575130eacf5a7c8277a6b4424e0751ceecaf33df +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9946abee35ad2701c02c5ae84ce0958dd7798ecc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15704081915593163079+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..509e50c2b377d4012d29dc2dcbcc87f7a2562cd9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6011200a05558b8d896f11050898479276a557413676de70326fa9810afbacd4 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2e858e6059eb6212f6cb9b24d7756d6abfdaa9b Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1587399457649203969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4788a09773007b0b2bb5281caa239cd71647d43b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11732d15caa2f5c6a00fa8bd3618ffb07a86a7a6fb22ebc991e3510ddc16e02e +size 581174 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..41566e2c859bea3ba39f7e7aca1fbffacf70102e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4033a05b6dd4f9ba42d199d7dfd153a2130faaa7ba4483d2f63aba23b7a82fb6 +size 15094784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f9287944d40128c1ec671204681b511e75f25b60 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e0bc5a3c7d16f319499949a174246d04450f8cc2080d819fa3aa4bceecd678 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..35284706e21dbfe2cd9996cf69edd1085a914182 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16774151007614265487+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7f57ca2ba84f1cb98fa7fa791dc6ac57e243838 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cfc19f1b309d6a8d5805d5f4d2996e55655c88bed387674b3a369598c40518c +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3c6d10388cd293fcdb7a28d6f0203b0f30699e6c Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16963083935060167100+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6f597d61e71506d05a43bbc7f786e50e1f8b90e3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a1066be3228bbe47e1228ad7b3defbc6877df254c70d9c3772ed60075b1a15 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f84af9cd830acb23b46d165c8edd23fde1e412b1 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17127091050305702058+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..77d5f743792bb4c45c37d075ca8d2819322eba9f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe848e2380679bb760f4392c5e43fc5915960275d98ec31e95b0d48cf433369 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..93f81fb08d08b3ea9c47cbc6316bb477cb54bf90 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1729176184418632850+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cac2f593364a0d848768cef979d0ec266d748289 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a6eb22f0b96f5eed0d5990710ddf13d86bc77a63c7b69828d7056ee66fb0cb +size 498526 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7df1ff631b0b36acee82808c5456fc219bb4a96a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f992ce1ac61902555c28228adf7e398266d423bb9a7bf80e89e5529ed4b9bb7b +size 4250624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7d1c4f6767a01ae80f4d152cc28352723ca9dfb8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7752379c4a6681e6d736b9466fc50ed1167744b11c729dc3ad44fdee1021a38 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c433638ec9803cd6d76284d78bb934668e185b9 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17313106528817084385+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cedcae31cd2e003e183f6c87d0d439039ef8b01e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d761528ce6886a8b55d0c5ea938e8bedc2b6e7a1632afaae7f26549f37e571 +size 1369 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ef08330fb1f7895036af3f36fc914f96e447ac87 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17369777196536189124+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d1a6a25662366245e230217e58aec1ec02b33934 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:928fcca6423e3e233e6fec41fe75a3628dbeba9b4abc2fef942b0976f42880d7 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..236572121e67db8368bb787f2c4dba95d8d8a1fc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17428083197400802541+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c4111cbd1e8e50de49dda93d637f2c5f0b943eb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c211fc5277964b7bbc67d57ccdf857ac3e17cb508c34045db4b2935ed8aabaf +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8f8ae5147fa9d8cf7b30f6b3103c3a04c622d2a0 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17557006053795692186+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7dd9349c009c754bd318a617a382b45a148a8fbf --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c172e3fd432c1b7bbc9a605173a704ae4851f588357c1ac13919d74cfbfb7c +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..de6431698b400435af16ff0d69168be10f003454 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17563204255823942492+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71eace50b412cccce65fe73ba7507243777c5a1e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d265c14a4e669b74aaa8e5d9578247f2de04d1d5210e6a981c95368c8c87a026 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a799099075a086d4cb10b1af0bb95d65f7a3fa2e Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17663242194127898969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d560aa7836266076df3bf4ce364a30646e0838a3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4243920bcc2c2a24a52dfa8987ab9473deb4d78a4670a61445d473982e64db7b +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d7357512ccf5dd500a4626d09f4e2e27bbdb570 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_17820036763001301806+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5cbe3dba6566c3bebbbee6980d17a189ee78ab42 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bcafe3bd7cb039293067c828a6cf4bdf13c979e1a00b27bea63fed4f037178 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b6d1123082c030f0e2cd70ddef09fde015aea58 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18065335321178308811+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab5ff6e160a511c3acbe0fe420597dee642311e1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293d0ac96beb67db77da03ff32a557fca6828445dcca222378824d2306810592 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7f3765ae5c5ab9ca205c91ed9b41946c56041c5 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_18132732170084255670+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a8b628f067282995d1808d9eb01cdea8c5f46db8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5d3b10dccea6a1eec02de6e69bac32eb5630d91f157e89a094cf17435c8def +size 75228 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..264426fc6dba1cc128e904344e0e2c8a2a0c60eb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64697e47cf65f8690d1768429dcb3f27a59dc878533117db2d62aa3ae3e9cea +size 287744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5a5028970cbcad7ffa3271b7be2dfc2267e44f33 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2315923d55bce245c9bb23f44f0eca66311860cc0118984556c17ec9c9dec093 +size 295551 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b24bdafabe78078d997f7762fc81e9ee28330bf0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c66551e445c34b0e221ecb2b2b7303796c4e0eb8ca338b4c297bebe314fed43 +size 69881 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d405efcca707df47ebc2e3b4766cb7d9193f2fd8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876faeaea6b0587c3ff8663d3114f70d0484e607ab88e735a275ad01c223cbc9 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d53df88607849568ff47fb5188b7a42dd02b03db --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ebfa9813b63ed97dbe6925f3ba4e5a94ea93bd0ad008070dc32430f87d9bd72 +size 247874 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3168ffbdbf1762cd60c4ce40e547f04290f350f5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab014cbfaedd6071dcaa1fca0d83ce2f63df59982e5dfabe27f4e74caadd4179 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ed1b22e505daee2fe09fbb4721a2d061ab347e15 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2019660303511744503+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..29070f34e6acb8001ae6132eb85d14c6e5aeb256 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe33969857c35b10d949a92acee4f59fd0a6b47fea748c3c54e7eeb8a3953d6e +size 573043 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d6700cbe2142e1394c66a77f9acff74a2d70f8cc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a9c2de0feb308d68e3f6223c99901a93c1c18b245887597848193718bb0ee8 +size 1444864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6c301fb8ca67daa9e27d48473ee9568642050bb8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f21f00ed2b5e9d2659f12c7ae9501d7306ae67f3ae20e37c1149ca452b179da +size 1589890 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7696047ef5a6a0a1fcdc117db4fe2ca5fcbab181 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f545ae7d060322f5989444c0528b27ec2689a53b32d9d11cd53376fa63d3a8e1 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..53494d0e64df4873f756fe655ab06328996ecf86 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_212973925893596393+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..db398d5c9a94bb4ca339a847669d6a4ab5af55ca --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b09a1b5945c04edd50d854098e0642f2885f6f3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2cfa8e742c4893766c3b63b6971e73469c20ed6ed959007a32f73a5d6e66751 +size 81550 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log new file mode 100644 index 0000000000000000000000000000000000000000..45f9851f4cf89dc04c7fa261dc4ed280a0fa5c86 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+4f4b0bdf/model.log @@ -0,0 +1,116 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.hlo_module.pb', '--output', '/tmp/nxd_model/token_generation_model/_tp0_bk0/model.MODULE_2180fcda61d340fd5708+4f4b0bdf.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2--vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt', '--enable-internal-neff-wrapper', '--verbose=35']: usage: neuronx-cc [-h] [--compatible-mode | --no-compatible-mode] [--disable-expensive-checks | --no-disable-expensive-checks] + [--fp16-bilinear-upsampling | --no-fp16-bilinear-upsampling] [--tensor-no-opt-pass [TENSOR_NO_OPT_PASS]] [--set-tensors-no-opt [SET_TENSORS_NO_OPT]] + [--model-specific-opt [MODEL_SPECIFIC_OPT]] [--statebuffer-scratch-size-in-bytes [STATEBUFFER_SCRATCH_SIZE_IN_BYTES]] [--target-mm-vec-size [TARGET_MM_VEC_SIZE]] + [--disable-global-redundant-load-elimination | --no-disable-global-redundant-load-elimination] [--avoid-loop-reduce | --no-avoid-loop-reduce] + [--disable-partition-vectorization | --no-disable-partition-vectorization] + [--disable-tiling-of-non-overlapping-mem-access | --no-disable-tiling-of-non-overlapping-mem-access] [--weight-coalescing-threshold [WEIGHT_COALESCING_THRESHOLD]] + [--static-weights | --no-static-weights] [--tensor-layout-p-order [TENSOR_LAYOUT_P_ORDER]] [--tensor-layout-b-order [TENSOR_LAYOUT_B_ORDER]] + [--tensor-layout-f-order [TENSOR_LAYOUT_F_ORDER]] [--fp32-cast [FP32_CAST]] [--enable-replication | --no-enable-replication] + [--use-inferentia-hwm | --no-use-inferentia-hwm] [--hbm-scratchpad-page-size-in-bytes [HBM_SCRATCHPAD_PAGE_SIZE_IN_BYTES]] + [--enable-tensorized-spiller | --no-enable-tensorized-spiller] [--disable-spill-free-kernels | --no-disable-spill-free-kernels] + [--enable-smt-allocator | --no-enable-smt-allocator] [--nki-manual-allocation | --no-nki-manual-allocation] [--enable-softmax-kernel | --no-enable-softmax-kernel] + [--softmax-division-delay | --no-softmax-division-delay] [--accumulate-on-alu-dtype | --no-accumulate-on-alu-dtype] + [--enable-shard-axis-verifier | --no-enable-shard-axis-verifier] [--non-local-tripcount-threshold [NON_LOCAL_TRIPCOUNT_THRESHOLD]] + [--force-non-local-tensors [FORCE_NON_LOCAL_TENSORS]] [--force-concat-to-non-local | --no-force-concat-to-non-local] + [--force-all-matmult-input-non-local | --no-force-all-matmult-input-non-local] [--large-1d-tensor-threshold [LARGE_1D_TENSOR_THRESHOLD]] [--dump-after [DUMP_AFTER]] + [--dump-path [DUMP_PATH]] [--dump-files | --no-dump-files] [--save-weights | --no-save-weights] [--dump-nki | --no-dump-nki] + [--auto-reduce-crash | --no-auto-reduce-crash] [--debug-mode | --no-debug-mode] [--profile-pass [PROFILE_PASS]] [--rollback-pass [ROLLBACK_PASS]] + [--skip-pass [SKIP_PASS]] [--debug-pass [DEBUG_PASS]] [--max-prefetch-size-in-bytes [MAX_PREFETCH_SIZE_IN_BYTES]] + [--max-indirect-dma-prefetch-size-in-bytes [MAX_INDIRECT_DMA_PREFETCH_SIZE_IN_BYTES]] [--max-statebuffer-tile-size-in-bytes [MAX_STATEBUFFER_TILE_SIZE_IN_BYTES]] + [--max-computation-tile-size [MAX_COMPUTATION_TILE_SIZE]] [--max-local-tensor-tile-size-in-bytes [MAX_LOCAL_TENSOR_TILE_SIZE_IN_BYTES]] + [--max-prefetch-buffer-size-in-bytes [MAX_PREFETCH_BUFFER_SIZE_IN_BYTES]] [--enable-trivial-dmacopy-transpose | --no-enable-trivial-dmacopy-transpose] + [--enable-dmacopy-transpose | --no-enable-dmacopy-transpose] [--target-arithmetic-intensity [TARGET_ARITHMETIC_INTENSITY]] + [--disable-experimental-addr-calc | --no-disable-experimental-addr-calc] [--pool-buffer-size [POOL_BUFFER_SIZE]] [--disable-new-scatter | --no-disable-new-scatter] + [--enable-stream-transpose | --no-enable-stream-transpose] [--enable-transpose-reduce | --no-enable-transpose-reduce] + [--enable-transpose-batchnormstats2 | --no-enable-transpose-batchnormstats2] [--force-transpose-batchnormstats2 | --no-force-transpose-batchnormstats2] + [--mm-transpose-type [MM_TRANSPOSE_TYPE]] [--enable-fp32-mm-transpose | --no-enable-fp32-mm-transpose] [--disable-dma-cast | --no-disable-dma-cast] + [--enable-8bit-tensorcopy-cast | --no-enable-8bit-tensorcopy-cast] [--min-allreduce-tile-size-in-byte [MIN_ALLREDUCE_TILE_SIZE_IN_BYTE]] + [--min-allgather-tile-size-in-byte [MIN_ALLGATHER_TILE_SIZE_IN_BYTE]] [--max-inflight-allreduce [MAX_INFLIGHT_ALLREDUCE]] + [--max-dma-access-free-depth [MAX_DMA_ACCESS_FREE_DEPTH]] [--dve-bn-stats-paritition-max-elements [DVE_BN_STATS_PARITITION_MAX_ELEMENTS]] + [--max-batch-norm-reduction-size [MAX_BATCH_NORM_REDUCTION_SIZE]] [--spmd | --no-spmd] [--prioritize-minimize-transpose | --no-prioritize-minimize-transpose] + [--enable-ccop-compute-overlap | --no-enable-ccop-compute-overlap] [--enable-fine-grained-ccop-compute-overlap | --no-enable-fine-grained-ccop-compute-overlap] + [--fine-grained-ccop-compute-channels-per-ccop [FINE_GRAINED_CCOP_COMPUTE_CHANNELS_PER_CCOP]] + [--enable-dse-after-mask-propagation | --no-enable-dse-after-mask-propagation] [--enable-dge-on-io-dma | --no-enable-dge-on-io-dma] + [--enable-dge-on-spill-reload-dma | --no-enable-dge-on-spill-reload-dma] [--enable-dge-on-indirect-dma | --no-enable-dge-on-indirect-dma] + [--enable-dge-on-vector-indirect-dma | --no-enable-dge-on-vector-indirect-dma] [--enable-dge-on-dst-reduce | --no-enable-dge-on-dst-reduce] + [--enable-scalar-dge-vectorization | --no-enable-scalar-dge-vectorization] [--enable-dram-to-dram-transpose | --no-enable-dram-to-dram-transpose] + [--run-pg-layout-and-tiling | --no-run-pg-layout-and-tiling] [--disable-delinearize-io-tensors | --no-disable-delinearize-io-tensors] + [--delinearize-tensor-maximum-rank [DELINEARIZE_TENSOR_MAXIMUM_RANK]] [--delinearize-min-dim-size [DELINEARIZE_MIN_DIM_SIZE]] + [--delinearize-maximum-loop-depth [DELINEARIZE_MAXIMUM_LOOP_DEPTH]] [--big-tensor-threshold-one-d-memcpy [BIG_TENSOR_THRESHOLD_ONE_D_MEMCPY]] + [--disable-degraded-fusion | --no-disable-degraded-fusion] [--disable-tensor-op-io-reshape | --no-disable-tensor-op-io-reshape] + [--disable-non-compatible-tensor-op-io-reshape | --no-disable-non-compatible-tensor-op-io-reshape] [--dont-delinearize-tensor | --no-dont-delinearize-tensor] + [--disable-single-row-matmult | --no-disable-single-row-matmult] [--disable-single-column-matmult | --no-disable-single-column-matmult] + [--enable-penguin-mac-count | --no-enable-penguin-mac-count] [--min-tc-threshold [MIN_TC_THRESHOLD]] + [--disable-dropout-pattern-match | --no-disable-dropout-pattern-match] [--set-dropout-rate-as-keep | --no-set-dropout-rate-as-keep] + [--enable-advanced-delinearization | --no-enable-advanced-delinearization] [--keep-rng-tensor-op | --no-keep-rng-tensor-op] + [--big-tensor-threshold-one-d [BIG_TENSOR_THRESHOLD_ONE_D]] [--bir-json-version [BIR_JSON_VERSION]] [--dump-ccop-axes-group-graph | --no-dump-ccop-axes-group-graph] + [--cnn-training-model | --no-cnn-training-model] [--enable-all-reduce-axes-as-par | --no-enable-all-reduce-axes-as-par] + [--enable-pag-based-layout-analysis | --no-enable-pag-based-layout-analysis] [--enable-tiling-visualization | --no-enable-tiling-visualization] + [--enable-edge-dump | --no-enable-edge-dump] [--override-pg-tile-size [OVERRIDE_PG_TILE_SIZE]] [--enable-p-to-pp-broadcast | --no-enable-p-to-pp-broadcast] + [--partial-loop-fusion-max-iter [PARTIAL_LOOP_FUSION_MAX_ITER]] [--cast-to-round | --no-cast-to-round] [--keep-remat-dma-transpose | --no-keep-remat-dma-transpose] + [--disable-lower-transpose-to-shuffle | --no-disable-lower-transpose-to-shuffle] [--disable-bitcasted-transpose | --no-disable-bitcasted-transpose] + [--enable-bitcasted-transpose-all | --no-enable-bitcasted-transpose-all] [--enable-saturation-convert | --no-enable-saturation-convert] + [--max-tiling-permutation [MAX_TILING_PERMUTATION]] [--loop-order-heuristic [LOOP_ORDER_HEURISTIC]] [--disable-max-stride-tiling | --no-disable-max-stride-tiling] + [--flatten-single-column-dma | --no-flatten-single-column-dma] [--keep-builtins [KEEP_BUILTINS]] [--experimental-gpsimd-library [EXPERIMENTAL_GPSIMD_LIBRARY]] + [--internal_dynamic_dma_scratch_size_per_partition [INTERNAL_DYNAMIC_DMA_SCRATCH_SIZE_PER_PARTITION]] + [--internal-allow-rmsnorm-cascaded-reduce | --no-internal-allow-rmsnorm-cascaded-reduce] [--softmax-epsilon [SOFTMAX_EPSILON]] + [--max-dma-duplication [MAX_DMA_DUPLICATION]] [--max-weight-rewrite-permutation [MAX_WEIGHT_REWRITE_PERMUTATION]] + [--log-tiling-bottleneck-info | --no-log-tiling-bottleneck-info] [--inst-count-limit [INST_COUNT_LIMIT]] [--macro-instance-limit [MACRO_INSTANCE_LIMIT]] + [--always-transpose | --no-always-transpose] [--enable-prefetch-block-tensors | --no-enable-prefetch-block-tensors] + [--max-dma-legalization-permutation [MAX_DMA_LEGALIZATION_PERMUTATION]] [--disable-vectorize-dge-dma | --vectorize-dge-dma] + [--eager-tkg-vectorize-dma | --no-eager-tkg-vectorize-dma] [--no-fine-grained-cc-spill | --no-no-fine-grained-cc-spill] + [--layout-complexity-warning-threshold [LAYOUT_COMPLEXITY_WARNING_THRESHOLD]] [--partition const dim candidate threshold [PARTITION CONST DIM CANDIDATE THRESHOLD]] + [--run-layout-viewer | --no-run-layout-viewer] [--non-local-num-loadstores-threshold [NON_LOCAL_NUM_LOADSTORES_THRESHOLD]] + [--disable-degraded-flatten-axes | --no-disable-degraded-flatten-axes] [--use-accurate-reduce-cost-model | --no-use-accurate-reduce-cost-model] + [--visualize-detailed-pag-graph | --no-visualize-detailed-pag-graph] [--visualize-simplified-pag-graph | --no-visualize-simplified-pag-graph] + [--visualize-undecided-cc-graph | --no-visualize-undecided-cc-graph] [--disable-prefer-par-on-non-broadcast | --no-disable-prefer-par-on-non-broadcast] + [--cycle-based-layout-solution-size-threshold [CYCLE_BASED_LAYOUT_SOLUTION_SIZE_THRESHOLD]] + [--split-ucc-tensor-size-threshold-in-bytes [SPLIT_UCC_TENSOR_SIZE_THRESHOLD_IN_BYTES]] [--minimum-legal-par-tripcount [MINIMUM_LEGAL_PAR_TRIPCOUNT]] + [--operator-fution-split-ratio [OPERATOR_FUTION_SPLIT_RATIO]] [--keep-tensor-names | --no-keep-tensor-names] [--show-scalar-values | --no-show-scalar-values] + [--one-tensor-per-line | --no-one-tensor-per-line] [--no-ssa-style | --no-no-ssa-style] [--no-collapse-like-dims | --no-no-collapse-like-dims] + [--keep-offloaded-mem-intrinsics | --no-keep-offloaded-mem-intrinsics] [--no-color-terminal | --no-no-color-terminal] + [--dump-sharding-decision-graph | --no-dump-sharding-decision-graph] [--shard-axes [SHARD_AXES]] + [--experimental-sharding-propagation | --no-experimental-sharding-propagation] [--mem-bound-ratio-for-mm-sharding [MEM_BOUND_RATIO_FOR_MM_SHARDING]] + [--enable-lower-shard-axis-before-fusion | --no-enable-lower-shard-axis-before-fusion] [--enable-nki-attention-kernel | --no-enable-nki-attention-kernel] + [--enable-software-pipelining | --no-enable-software-pipelining] [--internal-lnc-pad-sendrecv | --no-internal-lnc-pad-sendrecv] + [--enable-send-recv-cce | --no-enable-send-recv-cce] [--use-ilp-layout-search | --no-use-ilp-layout-search] + [--set-nki-shard-on-producer-consumer | --no-set-nki-shard-on-producer-consumer] + [--insert-offloaded-transpose-dma-free-threshold [INSERT_OFFLOADED_TRANSPOSE_DMA_FREE_THRESHOLD]] [--enable-cast-in-select | --no-enable-cast-in-select] + [--delinear-contract-dim | --no-delinear-contract-dim] [--vectorize-partitions | --no-vectorize-partitions] + [--internal-disable-double-row-gen3 | --no-internal-disable-double-row-gen3] [--internal-autotune | --no-internal-autotune] + [--internal-autotune-config [INTERNAL_AUTOTUNE_CONFIG]] [--internal-autotune-subprocess [INTERNAL_AUTOTUNE_SUBPROCESS]] + [--internal-autotune-extraction-process [INTERNAL_AUTOTUNE_EXTRACTION_PROCESS]] [--tf-dma-size-in-bytes [TF_DMA_SIZE_IN_BYTES]] + [--tf-low-memory-pressure-threshold [TF_LOW_MEMORY_PRESSURE_THRESHOLD]] [--enable-isl-in-injective-check | --no-enable-isl-in-injective-check] + [--enable-symbolic-memory-pressure-estimation-tf | --no-enable-symbolic-memory-pressure-estimation-tf] + [--allow-ccrank-axis-tritium-fusion | --no-allow-ccrank-axis-tritium-fusion] + [--internal-autotune-tritium-use-more-tripcounts | --no-internal-autotune-tritium-use-more-tripcounts] + [--internal-autotune-tritium-only-with-id [INTERNAL_AUTOTUNE_TRITIUM_ONLY_WITH_ID]] [--vectorize-strided-dma | --no-vectorize-strided-dma] + [--profile-smt | --no-profile-smt] [--number-of-devices [NUMBER_OF_DEVICES]] [--cc-pipeline-tiling-factor [CC_PIPELINE_TILING_FACTOR]] + [--no-cc-pipeline-tiling-for-fsdp | --no-no-cc-pipeline-tiling-for-fsdp] [--cc-pipeline-tiling-for-fsdp-only | --no-cc-pipeline-tiling-for-fsdp-only] + [--experimental-convolution-kernel-match | --no-experimental-convolution-kernel-match] [--disable-inline-cast | --no-disable-inline-cast] + [--disable-affine-select | --no-disable-affine-select] [--profile-memory-pressure | --no-profile-memory-pressure] + [--report-n-lowest-utilization [REPORT_N_LOWEST_UTILIZATION]] [--vectorize-direct-dma | --no-vectorize-direct-dma] + [--log-top-n-latency-dmas [LOG_TOP_N_LATENCY_DMAS]] [--low-psum-usage-threshold [LOW_PSUM_USAGE_THRESHOLD]] + [--warn-parallelism-threshold [WARN_PARALLELISM_THRESHOLD]] [--disable-square-matmul | --no-disable-square-matmul] + [--disable-vector-transpose | --no-disable-vector-transpose] [--disable-software-replication | --no-disable-software-replication] + [--internal-disable-fma-on-ios | --no-internal-disable-fma-on-ios] [--nki-dl | --no-nki-dl] [--disable-tiling-allreduce | --no-disable-tiling-allreduce] + [--annotate-no-spill-hint | --no-annotate-no-spill-hint] [--print-nki | --no-print-nki] [--nki-debug-mode | --no-nki-debug-mode] + [--ccop-bucketing | --no-ccop-bucketing] [--fp32-cast-input-tensors | --no-fp32-cast-input-tensors] [--enable-tritium-loopfusion | --no-enable-tritium-loopfusion] + [--enable-ternary-fission | --no-enable-ternary-fission] [--disable-insert-implicit-shard-axis | --no-disable-insert-implicit-shard-axis] + [--enable-hoist-wlo-all-gather | --no-enable-hoist-wlo-all-gather] [--enable-hoist-fsdp-collectives | --no-enable-hoist-fsdp-collectives] + [--disable-concat-delinearizer | --no-disable-concat-delinearizer] [--enable-aliasing-dependency-verifier | --no-enable-aliasing-dependency-verifier] + [--enable-must-alias-to-iobuffer | --no-enable-must-alias-to-iobuffer] [--disable-partition-locality-tiling | --no-disable-partition-locality-tiling] + [--enable-memory-pressure-driven-loop-fusion | --no-enable-memory-pressure-driven-loop-fusion] [--legalize-tensor-tensor-op | --no-legalize-tensor-tensor-op] + [--layout-transform-heuristic [LAYOUT_TRANSFORM_HEURISTIC]] [--disable-bir-codegen-loadstore | --no-disable-bir-codegen-loadstore] + [--dump-tensorizer-bir-json | --no-dump-tensorizer-bir-json] [--disable-rank-id-rewriting | --no-disable-rank-id-rewriting] + [--vectorization-size [VECTORIZATION_SIZE]] [--atol [ATOL]] [--rtol [RTOL]] [--save-locals | --no-save-locals] + [--no-simplify-before-simulation | --no-no-simplify-before-simulation] [--correct-precision-mode | --no-correct-precision-mode] + [--dont-verify-after-all | --no-dont-verify-after-all] [--disable-debug-info-dump | --no-disable-debug-info-dump] [--run-pass-list [RUN_PASS_LIST]] + [--dump-pass-list [DUMP_PASS_LIST]] [--dump-pass-list-and-exit | --no-dump-pass-list-and-exit] [--print-stats | --no-print-stats] + [--run-simulator-after [RUN_SIMULATOR_AFTER]] [--enable-peephole-inst-combine | --no-enable-peephole-inst-combine] + [--enable-repartitioning | --no-enable-repartitioning] [--no-ccop-barrier | --no-no-ccop-barrier] + [--enable-iobuffer-to-must-alias | --no-enable-iobuffer-to-must-alias] [--custom-script [CUSTOM_SCRIPT]] [--enable-bir-converter [ENABLE_BIR_CONVERTER]] + [--custom-compute [CUSTOM_COMPUTE]] [--enable-bircodegen-unroll [ENABLE_BIRCODEGEN_UNROLL]] [--fuse-param-to-neff | --no-fuse-param-to-neff] + [--only-compile-subgraph [ONLY_COMPILE_SUBGRAPH]] [--model-type-transformer | --no-model-type-transformer] [--model-type-cnn-training | --no-model-type-cnn-training] + [--distribution-type-llm-training | --no-distribution-type-llm-training] [--num-neuroncores-per-sengine [NUM_NEURONCORES_PER_SENGINE]] +neuronx-cc: error: argument --cc-pipeline-tiling-factor: invalid int value: '2--vectorize-strided-dma' diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..631c21bd4f2b45ad54395106e6d57bb16abec40b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72e93d823b706748e43edc828dece2fb7b6a98a58e5bbfff127ff47325c1884 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..74a019caea04b4042e61ca278a808edb49558ce9 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_219872634856243073+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b9c0683022c9822f27eeadc90ce9a23cd9fdb909 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73521137a0627d1cbcf3276af2044ea2e025b43384d5ba149c1ee9f28e06ae23 +size 88353 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f50b2cf11504f78ce82f970fe655ca35ee86d92a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66d4b1a0323238c3a55ab0dc7f54b938f398ac7b79ed19aed77d248df0ddc12 +size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d357aff629b30b4aa881a7a0cb6691b3d9b46f14 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e65fb7f1a7ade84bee7263415a7c03f3ea40431bd2e36430b339a378445c4d +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9323b34ac91dcbd57fea02913c4c4a0864422405 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2213217598519524012+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b2c7302cf528c77276ef717f2e1674456569c85 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f66cf7db42deebdd2e40a2d8ffe21cb944546844f0da4d13c53d3cc1b9a5d9 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e9c1846b3eef86959da95c349278d28653186e3 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_22971814024490892+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a297a9b52f229a9d9ced2a6a4f747d96301ab5dd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc4ab0cc5bb46d9eae97062bb084835dd03a2cc877999bbadd1a338e38874e2 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d10d381d0ef1ad8b021a54e1b1a9e9dd0dce3fd Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2312007082139657764+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1d8e0cf5e2571668f7d7cee05675933afe117313 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a7f9c47700367d81a831d44663a9ac8f3dc98eceed9c60caf63caddab83be0 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..09a35f290e199cc84c5a3e3146d2786653688f60 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2629259027206010953+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..99527613aac38a9ccf298d52516de8e3f952863d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4991138b51beb424eda5df96a2eacae9bd7006b90f569b37a2ade355cfede5d0 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bd4df58aed1b16573ea72ae9a274e8b6da6a906d Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_28619913331587313+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..66da87c89d00861f29b565c285a8c6e930f2efac --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793f2df54b945f68bc082b1e306b1a2fe648fa0e3da939c30cfad4a53718b44f +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cbbe79363f9dfae450bddc13bd6de98c49b62918 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2891572966407268505+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/model.neff index 99d05194538b98a7645a8d4b22f39aca365734cc..f0bca55acaf985f48688bc71b117eb04eb39fe6e 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e91897d7e182db481db20a167fa26449d101b3447eebe99c4f8aea88b14b0a93 +oid sha256:d696c6e1c6a435e974c340b8aeea0e33de2b79717cca902b329b8af43cde4048 size 4977664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/wrapped_neff.hlo index b05a40e5c9d905fd633bd247a2218d2c5a2f69d4..046b08a6d73ae052c79d5cf0b87c42ac4bd2b8a3 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_29c699a35eb0b8dda4d5+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa0f0cdfd33f14b3d65e28e1a97bd48a3fe3ab5964f83e8e92dd6251b1e9163c +oid sha256:16b0dc07f8cdc682944017bd5092bef88f0434e6d2ebe457ac93e7c81be5774e size 5134205 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.hlo_module.pb index 53e6c1ea9eb6ba3979f5354d81db9ec6306c9754..b1301e1978bb740fc8c68e603fe2b8bcf2ddb281 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bad8cce0aef7eefd85d29624a01be2a07ce8aa67e20dacbd17a14186ea0daef +oid sha256:547b24ae1b39d13b71277a5fc16204490d540a8e25c8f73e779a7a808a3dd1fa size 430182 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.neff index df700ca3b3956e06e0cb7c5cf2004db51fa1581f..0a5662c36a60aef8d1162db7fa2059d6b9e1344b 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c4be9f40c1db625d9c6dfa4d66746efdcb10ec00260f6623a94a751b5a16d3e +oid sha256:47a57a75086bc877641d3bb7f6c2c96061248289c618c5e500ede12e0ee9d836 size 31120384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c823fa111edc95abe8679fbc97086cc3fe23ead6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d664bbe9f4d888b6b4e31c0bc005106534ce8920ba7889965d36cb9c4c07107 +size 406153 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6d496175438ea4924676a5211093a48282ee0000 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa5756af92e739bad8c1539c160e52d359edacec6e90b98c44a8b8e946017fcf +size 23604224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ff70e52348a3e6e3f090e7da0f720a10d3a9355 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e68080a74eeee69a4cca47ffb58ac94d4475079ee317556d3c4985d658030a7 +size 52641 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..15afaa396aa5f5f7fce31d6894ab0e9447fa7497 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1faffbfaeb70a15963aaa15126906cc088054168366d15b32f56ff2235d63f96 +size 185344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f94150fe135004484c18b0f1fd8f652f905ff5e2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75310b79ede900f855f7d33db734190e3d8b1f8c38de1d620c7660449b8abde +size 195539 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e01c4291c9bd46d805c1ac8d99a7501923af1414 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed64af1de021c762ba16eda1c7ae28f24f316a384351f1f4aaba2e5c119d33c7 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa511f9c4bdb54a630aff9aa648eac0a252b88bb Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3010567314445873136+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d4b89f91e315545bef4f00317f4270b884f6297a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b48addb1bc3d932e8c31773aad6ad55403341cbb47e87889c773528ac3a0c7 +size 581174 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6b04876207d0894f1231fc25cd4af321bde63f86 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1709a073dd962ffd51f1deb4073bf181d321d8f33a8527cac577e57dbce4031e +size 2724864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..88d214ff1dc9f2e95ca012bddcf0bdf50fac0e8d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210ec47218d2f9175c88442e04911929a0d59767cd11e88a600b5f7456924061 +size 588374 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..061338531e68c94010ad760b8013636d2addeae9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea05809fef4aa7b0e9fa8b0f792d3213c9311a9d3596b550c30ad0dce35dfd07 +size 1209344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2cf45fd53d8cb7523cb2af22df69d9d4be5e5d7a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d516c0670b2ae7a13894debac59ce93272afced1dcfd99676a316f94837a5d5 +size 1354255 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f6abc0c89c3f16e78b3869f4813496b26c2b7f03 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abadfcaa9eedb2f185ed24c904b658ca490e28180dad7f310939be00405a3b78 +size 2861 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ce1bfca19713e24460a829bd4fc26f841a4c6e48 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3274027993637618360+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2e001b1e998ad609ac00a61566013878fa450772 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21cb4011c120f03305205b6cbd16aa9aaa3c4d3bb8d862fe6904606cbcaa652 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e38fb945f5ebb69e35b7254a4aaa1770a09e84e9 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3296693401617587065+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c8d892b5e1b7919d439c3785e8a20da90b763ba --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227368435c1e8ffe2b46e4b822f1332a212bf4b5c866ef534ea2b225694482c3 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bd2dc03e15cf0bd3d9fa93a9bb5beb14c3844465 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3584083482496600619+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bfed03255b01b35299ebc637711a8d58c05ec462 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5918ed79a90b94d93d230fe9d9dece2ea3674361398f695e39ea12bfcb7708 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..be5b0ee1e3375299af6ade04ac833bdec572bd27 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3677608697266465166+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..52a840c9dce2ac86d66b1b83342314024acde28e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db1de91ef640d5eb13a673dfdc9be9045617c4aaaedb9e477fd8814ca7018e94 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6de9517f4390ecbec781c554337dacb1a32a6e5 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3726318503065748382+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..096fc4363852567724093d958f6244ccbdd7bb10 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8799d2408fc1465ce0b6131489977ad21e63e23fb124c534cd3f2a643466a2c +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d017f25652d596572bc2827e6c4568878a4e57f Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3733582788604956194+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.hlo_module.pb index 3a937948727ebd33b6e71c467d98be772f9f4205..5ee8001d0129cf64929c75afa235cc6d8f47e115 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:004a842a6a8f26ded0c2a3c6975d382b36cecd0007fc63d53f39e61f0285c1e8 +oid sha256:15d80f68cffd4942710961a2986779fbc3d28a33f9945875fcd0d503d25c6d75 size 1037449 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.neff index 975ce27d16f059714def1027cd9e0bda00ae20f6..3a25633198e3f9243edb6f591c2193a1bafac275 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d2c3f962dd9545304d485267b9dec3f68a33752177089d19035c550496e9d99 +oid sha256:8965232110cbae756f3f06e1060c457108b9643e1e119a95cab1bb688e79f3b2 size 6626304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f843b77ce5aca3d08dddad1907ddb8fcfcdafeaa --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4cd8c9bb30b80bbae17c37817e07cd72864c7431ffe820db04cc90175ae6c79 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1f111324f84432a3ba54d04f0bf6ed600d5936ff Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3973195210839298171+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cfd0db6ced9555f4ed2586c5c4bbfe6f088a1528 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91462061b8dd8d78b36005464d30793234a3b8d0d65025605c7a747756879de4 +size 88814 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ce506297fdd0fb18f276fc7e12214ace7408615f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fccf83e1f68a07690baf4dba83c9edb5a4a0dcb4f9b868628e780b46605ac229 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4fe7e3baa2bf15ee32bd3598d83e71bd5ba05c3d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36538c0f0163c51c100fe984419b77741cb531f8398a3964395440434a82cad2 +size 63656 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2992d524d69c73d7d528b7c17fa44d9a08f03d4c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4f53713a26144868fa7cc36691ac32c154c015ab7fb14386dbb5f14e1aa435 +size 318464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..45a15c2b2d9db06c7f04d66cd9b4f2310b9ed477 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b93861fc70f22c6400c8523b59472675ee8994ba602ffd3ae398ddfae4fe189 +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..22a7de79a2bb5595d7a8014d3611c27aa4424b95 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4029365533398324907+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4ce37aac5f1fde5dfe97deacd2f2efcf2156bf06 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db4125f63f109313509b125a9e78c5ebf9f0f81584fd65adc5d09a5b3cb0e4c +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1eade5f76e45e97e5e6252cfdf10e02d22ee773e Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4102306456011547160+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7087c149ab10d3e6623e4141b6ec36b9d4b463f0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039e9d48f7f34906d77484e66a1bb4877dc8c917213d93a22dbd116de1187039 +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6970e65f20d81dea61e66fb2827d4bc167b81ca6 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_410348709543574224+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2959084c44ba40564baec63edeba3d000e03a90b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c60d97a0eac8ef405f96ada66e7ee30e756a3edeae863d715384e398a52542 +size 78361 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.log new file mode 100644 index 0000000000000000000000000000000000000000..8a105e5ff4a6c1344b5773d39b8b50ef1e0b7607 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_42997433666d65b8817e+ed72d204/model.log @@ -0,0 +1 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_42997433666d65b8817e+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_42997433666d65b8817e+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: 2025-09-04T14:21:48Z [XTP002] Number of instructions (7387296) is over the threshold (5000000). Tiling could potentially do a better job. - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ed1574fb23309367c7a33ef2b9dd26cef56326ef --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fd38fc8debaca681c6e8b774eed44b00d15ed3ae3a91a59bfbd7787ee2dfe0 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89e24a099fd00af11a6144b4d66d6c693fc8b7d8 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4475477153125485388+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0e99b73455431d4ffbc13231cd3aa086c125d5e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f845b504c0b19d058472e862c0b64990746c7410873d9b166dc87bbf767b086f +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5917b95c3d098694b420fee28f033963c57a283c Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4500629659777983660+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7a53155c3a843137a7789da264b465d33d564550 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb66bf82d28c84e4a5b045ecdf4b1020fcfe7a56c6081899742b495d7e5e81af +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8c90c4b79fb2f8de55fdea0f2e029a59e6abb5bd Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4597534921574083181+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..784f0110ba100a90eeb593ff1f371b0082fb5213 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fd7987a874632f4273335e8533a6eb70e4723fe24ff54c239377415003ef49 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f2d95e05bf505b4bc03372c5d34c3295b1e8a00f Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4845384173285952555+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4be9c9441c70a6497381a14a66cf92d2a238a41c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c677c7bc44e425031add4e8c2ee5035a5d8e3bfa4a20b9206a9280479719d9f2 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..558b6e1843201702c01b1bfaf5f3d9d4028270bf Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_497689108418605143+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ee8fcd549dd62c0e5567ae6151b1245b771ac9ae --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a69d231302f1ace099640b628626bd426e927268210c9431a728eefc96c597d +size 77505 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1d8b3904e45054a8157db1857f950534015fe13c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b822e8a1392f8fbcacb0c80b92a975c2aa1f72426ce4d8be100569a4347107 +size 738304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8c5d44a75821f66c4d1e70779371aeaa7a70f1f0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efe5736caa991f079da38009260efd6e5c31311de7842556bd0d0445219dbaa +size 746143 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/model.neff index acb3c198a477b12cd675d865bb92ad6a5dd85727..cf1cbf30b6a89edd651cd279901e0545ae430391 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5625dfa5e600d8aef4ed293645867163112521fe828fc0e5856335ec28d4a4d +oid sha256:58a936ed98bc686ae01cd1153b9753578c84db9043f3fa8a00427c3dfdac1541 size 8827904 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/wrapped_neff.hlo index de540af2871da302b9f59ee3ca8a3057a26f294d..adec45a4b132092023d227244717d74368594cca 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:129017e8e9887bc15ef819527db3044b0bac79a328316a8764f69925a2ed1a46 +oid sha256:47691a6ce998b1727a0ba56f08b8984b9ec94b2eb6b2124dea4846a9bc9348b4 size 8965270 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4eb6512c8c76ac8737c6519403b66c20b91c275e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faad67f60f451d7be3dc1de8b30fb20285e1c8e09349dccee964d63daca6655c +size 1582 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9295dd9e73171cc011f67c61dff5b54b2ad0fe18 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_514904626320229970+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..77de4f4bd225419e1cceb41761e42d784794d0a4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604974ae0ba20745debcda04b0dbfb0cb379733289d5afcdddd9e5c7e110388b +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c263d6ec49aa16f7a202c70b5c0fc03bba902754 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5226137176673983632+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e8d3af0707b6329fc79d8c4ec9acf0691b2de059 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_05fa1c72-7802-464d-9e26-0892c34a0f54/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ab28b5000cf8134123b05e9a1074757177b51459 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_552d1a3105a0e273b50a+388b7e4b/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0bc51e5f7b01eb5992ce05e13f8fbe0e68baf913 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df936becd64dda0177d3f57cdc28f9f36c9b4ec17b364b1eeec0f2869710a0eb +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ff114ab0f0f3375e2d43aa154ffc40eeadbd798 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5533629969901078552+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ec3e04f0ffa8bab5b7331e2d4b877f2031e844da --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d001874c1cfff56a7406c62ab384aea6aaa95dfde55a4fdfe5b23b9a21f01ed1 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01c3cd4401bba2b65bd48ad39c9c439981549124 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5562114832786762356+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..052b6e009a7e46218f341bc8c2c1e7ddcee4f23b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3882a8d688fef84e1e5f2e7a376a851fafb60860f697b26f47b109236267833a +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b2b633ef0188839994b6d60615c4a6ad94ce9cdc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5895476730067979013+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..806c1b44528c71e535b60fadd883a19d07953e23 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f04fd44d5c3c60b41d274465832a19736ef191f8bb0e11d61ffd319fc90dc5 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d219390357a114c67e8de70ea80b71355fad35f2 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6041836500170073779+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab2e7fd722ba30909b86ff27822e087e8bb1f7b2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7692c4fb4e0ccf0a62179640aa269c73fe18a81e8e6d2ce7259d3e2b0c120c +size 82265 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..583c5cfcd168decf63b00fd93e5f75cacc67b7ff --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e782164696f8783b28888fa812d4bfadf04ac5d14c7a08f16babaff3eb2014 +size 257024 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ea874d2cd8675252fca892feadb1b174efa66d78 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fe72a4dd31d497f1f07423ff84564d47c98c777313908d0a99609100ed0054 +size 264831 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f4d37cfd39c92551ae3db0b6c2384dafa78de904 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7669149cd154008a58452d032c92b6a5e39fb8724af1c68d7bb14043a0f69a +size 79837 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b9b13e1482d848ea91e8609a8d82054fe4376cbd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615385c54261c4e93035957bb1c78114fb679805c1a1e1ecb17155623f420af2 +size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..505b50a9be39a77d4e3ec188c42a88c7b9b18f9f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4c64fc408ef56adc28ff4532af1f34d6161d90c0e7b040e133a46e5216155d +size 223802 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0f549bd30b89ae1398077ad99891f27c14c15c6e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352ab657e2f088fc7fb97e52d94260befdeb0c4e7827b7e4e2e661696a05f9f1 +size 104207293 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9294319ed90b5da452f4c0ec7c4d4e762a93b949 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63edb5697fe7d15cfecfd4fbc02a257c8c5b7778969bd81dd498e64221b1e039 +size 38892544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/model.neff index aef624e96780b8128a84328a86e64754bdf2ecff..cfab72abbbd4f636c20ec95531306f17cc606b5c 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2ee99ed969f1b132ca9cc586347645f2327b3c508bc60094ebbb5ecb4a229e9 +oid sha256:ad19c98c3e197f08503cf5b7f699cbc34d49fe85c2eb037db0d5910839f04be4 size 1434624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/wrapped_neff.hlo index d319aa149fafad6edb33d2db73c3ecbdbcf4b797..f6db22ced5efb7460d0894cc68803e016d0ec01b 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:539ad2a559b69dc9d11d0151d61aec1e60be16b36761018c710b2d6ac0eb002c +oid sha256:49c69b1f275e71c1fdb46255940ed202beb451f5843b221202dc57b2d924a214 size 1521594 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df3436874f0777f61809b4ddc3749d9eb2c007c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44d693804e155c576eec13b548f496b60d2bc58cb22de1c1e80a125686057da +size 86197 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..53bc4acf694c0ea250e560622958e26a6ab82abc --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ed2eef0c7edb1c76fe30285c3c9e8ae6a3e4213abc6417231c6836412277bc +size 349184 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbda7847f15c722522e0633fb35886058b796503 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20b03b5f808b1728a8ccbe18b35db71159daf9aab44dc54a201ddec4b5ce3e4 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..665e94ecd2eedcdc2f9ab264ecf860fa2e5801e3 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6808350649031706313+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..341f984b86cd0d9198450dfd971c77e4acd21fc4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7deabae600784856e767b65d057075165713745a080fc73088104d5e622ae7e +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..880e9f58ce9375b2691f9d2456b875ae5d265b13 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6819259119391355174+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6085784586e5c531d521096883b6df01c2dd7e88 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4941f7a4c2773e1e362c0926b970972364cc6a11fba521df1bd85f6be5c35886 +size 103130005 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0b82921b88315425b06a9876e1dd89d67227a9b3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9909a5e54b3523d9adaa4e07bb3ca58d752bf840ab774ae32b9f61dfecc922b2 +size 7721984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..79845144dc34aadbe2d2b59cb649a6b052f684f1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96ddf0267cd407e73f8ac27a10f628bd6861ae452af4627fa616a624cbb99c3 +size 7996904 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d3cc7f3147159ac31c60890cc5feb13635e7f6ac --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f580b7a4fa38040a204b081a8509dbbdf2111fc26fc68dc3114d763744a711 +size 685704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2daef98d2d8bd53d1e67c89b59112ce8b3cded1e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6630e7b79950067ee403a77b657665ae30401840c4372bf77ae9754c26438b7 +size 20849664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a078c3548bc0f03ae31ae83196df65bd7e392080 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e8bfff8477709368ce31a7d33f42990e6a2b02331bea021ebb35636de81cc5e +size 588374 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0962cf8cadb8e758368adddb3d5f9eb1b3656cc8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bfb1795a540ec325918d8be5f6353b1ed9b72acec14786fbf4a5bd3e497694 +size 1444864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1a77c064d99dacdfbe7f11b3fd3bb0e9b1a83213 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbebe9538a508e045da52637fd9411c30a6712987f6551864b5977d9c860c5f0 +size 1589775 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d4c69632a75fc4f250328eb60993734b08f4c80 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df760532a487465927f6e3e04bc6493c7457718f0dffb3e3896e3c092c4050b +size 2861 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..711c5b85019914d620eb4d160a407382b1498c15 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_746058432585752165+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6da10c7d19e56bcbe9a2c06c2bcbff2fe71f666a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18454f3fd66c63f196e97f6d51c70a03d817c5adb05a90e131b763d7a39f3e6e +size 705694 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..311bb1b8511719351929ad02720e2c1681e43afb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7cbdc249ada77157539793ec5d2fbbdbef8f31417dd8503f1ea022f7b250ef8 +size 19631104 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07a875780732ce90dcfcccf4eec00bb91d737f62 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a0912a6557ce89d664bd248e9616169077869e62b88b1fc1bd091849d285be +size 1291 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5e5a1e06fe9e208606fb927b234c84ce375accfc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7565122625112339535+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a6593ae993bcc2575c0027d32a1a98ac92420b50 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17621d1c0828baa67b83082d43ae4fd71c5549e4b933e3543de0d35429366849 +size 82265 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d1e240bf9e3cb5cb2c0c595958dd9dc5a00e8d7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242ebec44f8cf33e2c44ffbafd1451402c81cdc182bc6f20a4a176ad98a3af7d +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b02f23e675eb52ec8a39e7ba2e256bd589dfd07f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0343be401c64cb2827fe18b6324bca0d81ad99f8b09e3c491f9bf7e97ef7d3 +size 254591 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..262c1aa8943e6a2a3f9296735a32dbd09d1131f4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a2f12f6bccf30cc2abd7dbbc5b47b3b17004020bd177e6b4e76cbc51f1b577 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e856ef66ed0ee2567689bf77ff60d459e38ef49 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7619465509971901533+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df16b7896b8aa6a1d4656a8eb41451e57c927750 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7c111f2d45ec406435ee09555a8a6ffef6f268b057a8b803504caf1c9d3712 +size 60798 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..11896a872f7a6d87ac29fea80f99959a84b8ec1b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deef48730e90852a4f32f51f4cd98860bf6ec515c182de0c0d4a6c05aed62d47 +size 277504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1b8e59ebcdeab97a0a0bb974a740fe679aee9302 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4826eb7ecb62de311b6673fc7eb91ab6a0e787590e7562a704a4b85a57cfd8 +size 286418 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a2ecfa4f37eb969dc7b4c147656db597768d23e4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998dba5e3120e80b6651d5327fe758a558ed33d91945dcdb1d846d70cfec0915 +size 80969 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..046e01ca7a9b76578d21b0c3b778e6bd8abafe47 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:655713ff2edb98a57641f5f0f6f2b33f1f50cfd6ee5e52d099cf7893f34a1c38 +size 226304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c4169c7a9a63acaafee70d76e3d484e282f53713 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa596c0a9f77d8680ca3227ae5069e643d3eb7d8cd8e6233c2c3646a33eb26d6 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67dbc35183a1cf223c19be7ad41c10066cd93052 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7731931779321626430+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b55bf771168625cafc7c7fd42e95db99e727884 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e21d06bff81aafa11991b58f80064dc5a04db71ebd537651d4d03d6a7b862b9 +size 88413 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e7a825ab6fc3b259cadd8bbb6b55cc859221ec5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14cece69317980d8decf5a3c4ed9965738503f20f8988596601098e37fb3fffd +size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..389dff17ec683e7b9d67f9d8b95fe745efd61b30 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946470c3c77ebf305123fcc64a1e2fd9f38b72d315246eb5275e317012edb555 +size 338613 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca050fd6083b747947f4fab389bfa606f05ee955 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34dd119851bcc69f8bd87d0c1020c78f802d74ce811982ccc0396969b924b7f7 +size 23512064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..306076a9bd36c3ea5fc0e515e72b3e5ac5234475 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ea22281fc2add77e3c3c6711d133bc46cf99b3ee28352feb135c440d5c9170 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e81989c12d6348bc2e5d21a385af226620842e69 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8121416822490248166+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc23fd3402c8a943a80f911e7a32db50aeb22f14 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e849d859bec3a78845d8d8edfb39d11296a9cdfa2632cfc46fceb0d4c17f2eed +size 107879818 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0d325c26a67423043949b2a3b82afe3979f37052 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9271d2c6a64d31824da4da9ab85b1f57769e801aa053e05b8b63109c5da7ab5f +size 42476544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..db71d4e3106206a25053761dca771eac1b1131e4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19706837fa8f629355d388fc369e1998698d2c0f03aa9a93aebce4592bfd2715 +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1abf05d24ed9d94e0c13f80903b1bb189c0b29a1 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8410360898827170920+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.hlo_module.pb index 7891f4c9a2c75d1b782d9143d6b23b30bbec9dcc..a1a266c9d0f31ecd5a9e5f08e8ddb43834bfa43f 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ffeb8e9ece16ac41c4cf88706d49bf0b121145138f40c6107f5f96f3cb6a275 +oid sha256:97a37fa929a4e10a1de1ddba776b80db22cb13d5ac4565706dc0e73a706f4c97 size 847776 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.neff index 21921bff2561bdb6bbf49a3af3b97b0e63dbeac1..35fd902f5a055890c60156dc5729f1ca431d22e9 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:134d14dba6aae5338b4dbbc3b448710798a58d8b15010a09d424dfe2f7a911b5 +oid sha256:d805b88f127c5709846c5f7ebdb89770d1623f5a5e6016c3033a40eb3aaf934e size 27546624 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dfb3f9c17cd0199cbef0b85ad4ad19190fdf01fb --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4672984557a6f98db9bc6b045a165de6201d8bdc3a5f28d5f23a500275add866 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fab50bc52b817727587654f34914b7ae09ceee41 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8730286359330870368+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a7e25eeb37121b478d999b750fbcd88e0bf27d24 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ead2842be9fad710a8dd2987593d6340a0f6bf739f2bb6bf829d1a65712b6 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0311dbc94d6d0f14e3d314508fe0f55d0624df98 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8882312230130555847+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1ae3fdfff2e5d4e617bfc90376a07f658b87060b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97438df91d4087b7d810d80603732d393604b9705b49b4adcceaeffcd4fb5800 +size 719452 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c427c5a34b89c4b57044daf5565c467196d42600 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1477221ab961818fa4293d5ebc1a25376bb914cd17d01b5c5979c4575aaaf7f4 +size 19835904 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5ddeed939f2f1ae57441a94835c1c3e372b31b0b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ee5278b859b36037969c475b41338dfd901468e1065d8fc4579aaf4c13e158 +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b9fd29fc8a8c1f310c8b8d9e6b8fb19293d19635 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_895360613944277627+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b7c399964b3ec27bcc58726b07bc431a2dd3a471 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d38232b18f13a6843f2572e1d5ebf9f675b587693f6af9893523e2a2c0e3e63 +size 55986 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bdc93df5a26530d93722510506e94cde951b3836 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04f368ee98a74d289c5725ef9e76b3b8204efb107e50754aa4c57e0b9f8ff0d +size 297984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0b7a92060e44418d0fdf405bbc746a3f993d27f6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75baec4f7e9174eda684e3943cad7e2176f45d2c9dfa3532a1c21fb68e24b48 +size 306898 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b17e8b0031fd8af682c7bfada4b3c1c4d8def6fe --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61843a6cac41be6a991c88265006855d123fe541334c7dcc87b2c28e127addda +size 81649 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..86908f3f32b8de45b8d425d61af82836717e7408 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cdbe3ed90c5ff1392117606b148e2eb5df617c19f79d3b8ea6f655734024a71 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d6b7d69716b60b6158b40193960e810f5241db30 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02b2b247292777264ae0659d1943a3d0f43a1eb646fe6775277833e08f10440 +size 254591 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..61087b466dbcbedcbcaf8690982037fe18c29eba --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7897935bc5758d372827d0e0c19b8df2763b52adeb53f436b100581b7f13249d +size 54371 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ffecf071a5253d8545a4880b1b13d20661cb453 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b220925e5a8ad257990da53a5da69fccccd356c282300ffa11384c37579b32e +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b89820c1796a4d732bb86fb3234c3fa7680fd3f4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c6665c365ab0da459979eae7bfa5988a54c62c5ef028861aba356060730803 +size 81649 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c07cbfe6bf6c3d829748b7df41918aec4c8792a6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b314a3f778679299f4635beae8dc5d1e98e1c03645cb4a0ffbdf873cdfcc44 +size 246784 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..860f67f0aa82849c21d5a911e63adde1258eb0f2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc38a0aedf3d54cf3f40cc6804246dcd6ecc6692a93226faabc0fbb1b238cc7f +size 254591 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf64c5fc9152ea734412ce437abf44113b64ff35 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a16435815f55018f92dc9158264d64baf957ff4ccf27672582a0afbda8eddb +size 581174 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..400852c162f883c71f01b21cfd320f55894c7a43 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ae6d3db9f395d9e5cd74a6fd5a96bc23c20049e829399934444e7d447a3d2a +size 1117184 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0fb4f5a0968de7f41688feb865adf3fd1c65cfa8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95ac4cb2f31241edb0bbc24e895d46e4b98131b4e55e5380122c86ce200b2f0 +size 1146 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..98cd7787c5be4b7245df47d7c5dab3d9ec6e9f65 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9386326336670335099+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..76397ae93b0d532ddf1f928878800cf8d619e522 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0074a02de24121e5fe1490fdd84a2d1b473988d39bd4cc6d3b26f473af3657 +size 296661 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01e3f7eea93ae32057b1dc2e023ff8a9f1b80cc2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7b092bf39a335c2408a6eff7f3b0a3f7446f37fbc4b09fab06ba0d3acccc5b +size 24392704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..934b21876914fcdf9aab30fd43c5d2d458dab872 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3caca14697a55e9d583316a1e5009411087bc4c2c21ca1ba38bd1d90857ccbb4 +size 1585 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1fe7bade1fb53e8f7918dc8cd3b870ff8029528a Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_958184176017870682+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5a9a496595b5357604e6b6a2acc95580f996756e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada6b6503422ec5090ed0ed680e54be4e4f63bd6e02f8406896e0b3978536633 +size 665847 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7036a39c20bde38ea3787bd2c5df264ba7771066 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951c556e5bcbd4830b7d098213dee36b17d08bb83852f566c21134da5f20f4bd +size 21648384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e4e764c67b2b733f69ba66834bc26e694ab9473 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd42f55cc5c81b6313f96e55873674d2133e9043a79afb6e612b5556ccd1348 +size 90039 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aadf5a9f750f56ca0779fbfdd84f77a2b2da48e9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:599762ae4b0636a1734145c644de6d9b73a0b0fbe8ac978b2bb8b60e78a53d8e +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2306779787c7af0ffa2ca58209574c1b9f66270 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71e39c17a3411e18568c55b52651b53ab17e1d4e8b529b9ac921e9cee82761e +size 247185 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..faee3663d458c31d081f7e5b80571d489690737d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80f6fbf505ffbb78c6468a746a6a6e231ec29ab87746a41d3578dabf2c74d649 +size 523884 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5a6fbd60e08c4c97138829942a240bdb6dbc8e5b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12c12be1cc0eac518f0ba9ce2f3fa89541527a5c43800b051bde5e7da0c2c7a3 +size 17798144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..53106159b41032be7c47dbdc5a3c64a8376b0ba1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bfdc0760a79380ea1098fa4aed18a291ebcb6762914301413e21bf0d01ae0bf +size 81089 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96fc08cdd8288b538319c02e4ce83c17b7deb53e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea708693657b4495fe8c265dbfe08ff697b61c8937de5b2b0abd16b63663a0d +size 226304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6fd01a462989bdb0f9262cf9df66ee190b67e888 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db9875bfb45337adbb610dd67d5895463189bd817618cec70e55fa3a40c26a6 +size 1584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3c890d884c056a40b01de1310cc3b253959a3976 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9817560555747346786+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..640a81f6e36a80e7e418419a201f1be054ddc5ac --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_37cc541f-5422-47c2-bfcd-2c2315456b7d/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1ea58ed387a4a9be184523e080dadc16aca6e43d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2921a248a0a6d5ac64fe039f2ab144f42edd9de9726386085ae64fa04a4abebb +size 195584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a23a7e653e986e39df5dc10e6e17f49231de607a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd3bb8d33675f04c83039615c88efb0b98876a94b4057c272d3ddf2fbde3eb98 +size 198230 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8fff47b5086ee5079a230a53eb41a92acac61499 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4068082d0b537a4ab9ca1b9bbb4463fd32fdcde75aab900c7ef7212ed55d98 +size 625621 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96025dc2c721db659d1380875a741ecbd6e43eb2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83363cfd17c5c2e282dcc95246a712b4efd20bdb2dff7820bad9cbf508e9a6d3 +size 1168384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..878eece4f8d62eaa3e6c0c74ae4428602b39db5a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec0bf25dd121c1fae30f00e47234de4dff684e44867a34c29fcea84ae6704e2 +size 1313410 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/model.neff index 5629b4356295d92253421403b140dc57b4e15466..58fddf08f26149feb57540bfb4b90f99030133a8 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ad71d8b6338ff2c14e47e9b22d8cfb8d91979355c7cf8427e6452fd95f370a2 +oid sha256:245bada10f4582fed1ceb499a5080110993224a0103d5ecae5a07f6863258d33 size 2233344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/wrapped_neff.hlo index 2559fe3b1db79fb84eae71fd231ba345d2f05441..157566be9c1b1509d132a3c49247b4807c32707f 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ef2a9d2c8684d1a2494eaf6e20339f67a87ce9717662b111f9434525ad99ef4 +oid sha256:d4c95c102a8729ed58b78094ae57d047361ccc53a7fc3b96336843930bd4a291 size 2303088 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6236c1914a83c71749472448f39484204a05f824 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224ae4658301bf1fe984f513469223007447321a4298e11617c7306a275f0221 +size 80455 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b03044eebabfb169be570f39486dadd377a2f945 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aba88a2f73d240b22d60db4385f4a56847faa84b25b258dbea541a3920f2e35 +size 226304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..289ffff77e7a6a026a5c2fa23c02001fd8b3572c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e84e9e9052add0c2e6ec077c69738ac0a387e69bdfe82c96ac1458386dac0c +size 234042 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c4524b604d502dbe676c2ccb3852a8945aa0756 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4225a0b484c5969d90e132b177d1a3c18ca6e81fa4f0f4e3009da5845a8a9278 +size 89589 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e575c98c7de3c4fb62242863ac0f7fafccc9101 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a7c903e2c000edc930e5c166c4c079a146d852bc044fb0ffddaa49b3a85402 +size 236544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..07acf91a252b4060bd7befeb050110864d6e584e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4c2279119f709d38ff739df0c2d9bfba73dd25c6f3511305180565fbc3e4b8 +size 247185 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b5b48885de03cb92ef95ca0e2d8b065a979ca57f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49db54c9f59a8ed22017758ba4c3885791e87ae22f6aea1a65af2d2fdef1e90 +size 581894 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..254004655177d848463d007923eb35b595989f72 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0207bc53613864486455ff22206b0a4f31d43ccbbc342dc4f56e2d9bb9154c +size 2724864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/model.neff index 62c08c0dcf65c3a657bc6761c5570bb4d2599e37..77112ea337b384246bc1cc5e465864ac7b3ac42c 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3943a35d4a79c83f1ae10ba16c0785b81d73494ad9f043f5c51ae1e6ccb160ea +oid sha256:b303572e10366024665d0c7b8a998379ecedfcf0daa4c490590e33e3858238e8 size 4998144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/wrapped_neff.hlo index 7d3ff5c29bcdcf392e80bd46f9e323f37b752710..50e0ff2876dd4a82dc79687f5f3483c3a528e5e3 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f06dfa5266a83fea6bc8becedc14e5393fd3765894bb33dcbb1381dc870cc22 +oid sha256:5af64a54fc92984175f1f6d3cf1aa627df4ec8abe31d4d3c033c61a8c9ab55a8 size 5171427 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d4e34e71c6046ef9853c1ec2d1795b858bccc7bd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a03cd0fd441692bbb65d7b13b869cf6feb1a37804c77d29866b9b458a2342a +size 213344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..05065ad9640205a0a739834ee8d9d5731ddead7f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135d137b79ddd84f519811620a99ae786a8c81b939fb614626c9d018761f1cea +size 420864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..80148f1d687144285b6b69f925994b0242431520 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2332baeb0d308febdb97fcb05ef1ad4e1192e9902603bb488b0381d1bf94380d +size 63776 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e4715ebbf1325b1f2a278f25831fdb122cac2f33 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215b39aafe9a55e761356d8ea81883bed7b6f85983535834c3564e379dfd694d +size 318464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c375a5c297a340fbefc2b7b38d8cec257920d29f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b41832549a6bc6b6ac34d56c008f42bee1769ebb42280684d6a18d4b14ab0d +size 529164 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.log new file mode 100644 index 0000000000000000000000000000000000000000..0100770fb1fccf7145e9dbb78987180c4de59875 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae633976c414e74f7634+ed72d204/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_ae633976c414e74f7634+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_ae633976c414e74f7634+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-09-04T15:05:14Z Non-signal exit. Backend exited with code 1 and stderr: [GCA022] DRAM usage for Internal DRAM tensor exceeds 16GB of device space limit, cannot fit into device, model requires too much HBM memory ! - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..570583495d9603016c378cf431b27e1c6374bdc9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb7af878e983eeaf53fe336828b65513b338117f32e6a785c58b4c35483c2c7 +size 86197 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb365009c059307b342165df8d7fef76f4fef468 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d2fdb90306fdf97e0e55dc88c46263d346f49162a24c62d49d4d686a94de959 +size 287744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..705784f85650c65ff8d1c44a8e66b14b56a345c2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f3beaed47689b723e87507fc62eff03bc6babf99ebff995ce214d8f73eac33 +size 636354 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a3a571f1bd26cba4919495fa47f35036477c9071 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26ef1c1e63da491f0e2d1480b95e0e2666d55ea047709aa20d737ad3132927d +size 431104 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c722cf9d1e081d2709c9cf81223a66083e828fb5 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586be70deca88721229e2ed7fe145b1a6f9d21b9ee7a9b02a7fe62f8a938adc3 +size 449015 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b75a1633e11cc79ab173+677eeb9d/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b75a1633e11cc79ab173+677eeb9d/model.neff index aca14539804f206fbd228a8b100b7570a87f900e..29bcacf6bf3f8f8e631e5aa30ea9b6766f0693c0 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b75a1633e11cc79ab173+677eeb9d/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_b75a1633e11cc79ab173+677eeb9d/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1f7416533727b1fa0d82acbb80380a8156a7a8e53fe897f87fb3a7d309410fe +oid sha256:0dea24cc079d5f17838a99d47df885b1511b36498593e63aa19a6510d978c264 size 2694144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..811cd75b95e5b25aae6238a1448cce087fb938b8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877e346ca065f48a69443e372919272a5b414ed86b8016778ba1db29779d5f18 +size 708377 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1e49d35e816c500ad0253954a8334746b7595a15 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178136b3ef4721e7580834b9a3ab31bc39cd644ef6e5aa6344aaedeafd5319d6 +size 7650304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..102fa38447e0b776961c4094f72e5d44fae026fe --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d5d1ad74b01a2e5e37ac67466bc1dd919fa7ff8a1eb7cc69f4e5b9e0e8041c +size 827469 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..47cbd2779233eeb8f30d90b913330481c0f5d12a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84eca67d2edc0ca75360a1494b7fc99f75ffb6a6b8d7347a9bf7f6b43fc63dd8 +size 10978304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..90ce6bb2170a67f757a040fbe0e9090a6cf5553f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0340ea9c9f3757f6701e2467620e00001e3bac281a5067da7491f5debf45c9a2 +size 514401 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..032339e3b493b2100dfcd178b734cd47449dde91 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3dc15944d3e940ac9ba6a8536562a42eb50b3233c85ebd9bc81764817c53c2e +size 646144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d438eb86bb9838a96cdce4932b23e43b84f7ac6c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b38f7cb10576814869756d20f8e829cd01d2410c19c9926989559a9ba13f15 +size 780434 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e39362994c1679aa5165c7d5a6df2d8dec1b4c80 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15362c6d52e74b24ac0eb1eec6b9301b4c06a679a07d8f7d9c8c08ccde70eb94 +size 80739 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7c14608b0f8ccd6253325aa5ed50905410f6f63a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e68a76f10320ff5d42f050fe1c47771ba6cbacd17ac69f38f98df3726a3331a +size 216064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..16b339ba3687a755a0b13cdd5691f2dfba9fd212 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd62275e4f4cda20ab569c4cf4400e0cf00f54a14dcb048cee54fd347acfa501 +size 223802 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..34b9a2635cf4bce024582d7a44d1de8752be8896 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b9e8632d8c4814ca31e16553a0d0420a50add19d49466131c423a3605733f5 +size 381519 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4236e5f7e57a77c02223a7a866e519392aec43d1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7681e715dacfe94e1fc50eb60c4642052dc4563245fe1fb12c57efea0ee85d +size 4496384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cb6ef7b1205d1d19bf46+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cb6ef7b1205d1d19bf46+ed72d204/model.neff index 9c3abcd4b1116e32c1e8b91daaab7543c1bee18f..9db050ed48195749898ae0cad8685be0fc268329 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cb6ef7b1205d1d19bf46+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_cb6ef7b1205d1d19bf46+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4492649f3ff048845468a54a5c5784fef7a4dc5f56df87f811e1db00990653ba +oid sha256:a19b13f8bbad710539910031ea87d11b9923f862a5d17bc149fc0657894a76b9 size 31878144 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a879a5f4d029584a640f3b0b9e01472d5b3207f7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca069c4b82b5ec5bcda4470d4248f42760f3265cc8a8cf28d9e750e4bef5bc6 +size 582901 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5eb82857eb13c0020bf5dc9e4439865b3095cf8 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f42d47d44d96481748c731c4e3f74b21cc4c775d32b0cfcfe9e805134f247b +size 1147904 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..af07c852ecc0c77fadd10d9c316150d16a88dc57 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f61e4840df55936d5ba716f891116f24852d16aa6e85bd285e7b4b99a9a9037 +size 1292815 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5361c7217145b0b6f266d92c4775925ad7a77055 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40dee86f8546d2afe31775a13de5f73b8c014b99a04ee6708f21e668715008b +size 582901 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a0501dfbcddb1f182e54a20a14d1a2fd1266327 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed06cae0ac580b3e5cf99225769ab97a2b47611d3b1eca03dde45d3ff1f89f7 +size 1209344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..81f4a8730f2b6ee2efa4cb919fcc301b79369e94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63adabb8457539ec6b44286e7dbddc2c339f70531eec4653ed09612ed3e55ad +size 1354255 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..55f45c3a9863883a7fe6038b38e200d534136a49 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_21aaf1ef-8433-4296-aec1-8c76011195d2/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..58be1555d80c8c6cd79dd9505d6260bb1772e2c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c76bdb8b377803e1cd75e7006f8021eadc4675562936c50cbb4be6c7648f4cc +size 1096704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0919735f41c436aac37829b035450689e61a2c1e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986c0b70d2bf415feba62be729d104a13794be67c4cb653c733fc426dca5f95f +size 1099577 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b79fa720a111db9f3b7ba393bd90655d7ce41e20 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb9fd1d8c10870af816eeeb55b3cbd46ef931c5f8dda186d41c5c286c9f762d +size 574638 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c284eb28fb9f036fcac256ed27ab18e0f8859554 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbcd798f37abc6a8268a2a1f14eecaf3d19e56eb675b0598332b6732449a6cb8 +size 2274304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..34ea007f076e20228d4c35dbcaf81d9e789cd4d3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a344c50dd82aa23a12f89e8d87445a5b501098df08358b753193ede7b3ad9880 +size 2419673 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d2ee3ee46bf15b601812f4712bf5a60de58923c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c077b146c093d1094b99be7d6233d2a6d03d77450a33172f6a69d844f35002 +size 665566 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..09a3c46ccb7a631ba4b7ccd46a1976a29a273b89 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a435429abb26d37524d78bf65172fd6ab2172cbe033df03f49aa8d073542eb +size 492544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..91eb5b470d3095d426b6ea9548fecd0d9bb4c161 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20c40a64710bd37c5dafcaa17195416b33bf361c0926c511a6c157106f56e657 +size 578515 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8eec58394bb37cbd1a4c3c196fc2b459ebab7ec9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6376ca6fb4233ef52b9ba16a63525458a28a39113643a6428e024b3e198d01fb +size 1444864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c86a717be1bb0ed8c7f4a43e5d7fc659e39ef102 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae053162739c06d034e10b5b8c093fe5361328f9817cc130d884d84301f50c4 +size 1589890 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..437c307e7f1d7194897bd0ea577862fac5f47750 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_5a18d9d9-d482-48bc-afe3-652891d43a54/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2b18d78a211a997c4ecc2401ccb0b3901feb12e2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5f5e1064fdb76a26a16986136d6a624bc088a8246ece8d51cb1029ea58ec10e +size 113664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a088eb7f8a8bb13d1bf2b2a7f8de917e0f97806e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7149d47a6240f4a48cef14bd8b964a41a5fff71250018d71e660358ab25b3299 +size 115643 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.hlo_module.pb index b8a9679d389721fe5e0b7e823b10ef0e15f4dcd5..2e8321f27f38e54cc06df334847b4ec87495997b 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:faf230a29d7c5746f640a9b189af232ed164e806cb1accaefe12c42979cc1673 +oid sha256:eede4373b2454c24f7d226a16a1efa44391995aa3e7e7cbc8b4af8aec85bf412 size 900724 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.neff index f8c74c7c17b62eacfd5b43a3732e6406835bf44d..fde37c42837586b74e95787e6ddd852c98b72c7d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8988d792d050311cca8b32114e026df9b6bc8cfdb6c0bfe28ab8d2572da1744 +oid sha256:e8cbe181602df06148ab18da3141ae57a33c8fc4c36717b3f59637b742783ba2 size 5817344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0b7341c55e1cd01f8ccddb215e72aeb7f57d5c49 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309ecb7452fd67e7d5433e06b119fefb4ee796db045d68f2af040b4fd42bb2de +size 78947 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..581b20cd5e5cfb03df60afeb4ec53b06d8a5f1c1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e2047821839bdeda4aee878b88d856461ac175c50572229ae33a9e29800298 +size 513024 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b3c9589c0abe241d40f836ffebf2a3109a41296 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7744f2ef00b6491200a3a5c2d547b91933aecd109db7f32d64173c4a97b1168f +size 84746 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6fa5fb899229880f621acd3720e1a4440172214 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b1600273651746901f3e77aeecc3fd6b858e011b9ecb8cda8a0663bbb10940 +size 195584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c245e7208d2108a71e5bb6ca41b292deea77c933 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d80bb8d9f2fd92756c59f63ac64013cdc4907df7fe79274b2721a07f8dbda8 +size 203386 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83075f716a63635ea1e2996401e5dcd936e8c420 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f00d9c754221630c0756817a73d25093082e96eabcec1123942f4dd55ed5135 +size 85319 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..582b3488d883e1327d1b8e027d02b8138af25aef --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55af098df4713b79462e382fdd0a062bac1dbfa92b2d34f284717a8c752891e5 +size 656384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82a000aeb4b140eab5d71473b6b2bc88d42f0794 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c7c2fe70801778bea0059d8be2d176f6b09d9aa4d1f397501aa040c4aa784a +size 55086 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..87811d5e9f627c8150a89f47f8f53e81d33633b7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e4a783e394c23eff12cdfb0ab2528d1dc5a5316372ab6855be9ff69aac6718 +size 297984 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f95449bb517172ab424c31a43efb67d32269bc46 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7b37dc069928dc2acf901981dc71daf9b77f17e73a157694ea7a9de6d5ebf7 +size 306898 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3f13dd478fc2937baa592ebd737cc3f0cbc463da --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b5b81c43e54e9946e228d28a61313aaa344d2e435c91369f39cfd724a935cc +size 74597 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dc2c07998e5594b32b5bf56faac25e72a2f04140 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5538a174ad41242dcc309f0cffb05f962d86f14575c30387abbb1209e4fe43 +size 308224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a84ef60cf10899aa251166dd7eb1d72c9c9ed23 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c94bbc7e5a7df25410746ca641181b447c82e7e6de4291a835d87afa5f865e +size 223215 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..21ba2343aa1dd4f33ad12442f15b3ba17b7031a1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008087529aa5bda5a1930323fe5d4e8fabc273cf06e89eb67c0a86662ad00913 +size 1260544 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fd50cd3da7de1eb5e77056dc4c1acab7b7a6a90d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a2a3faca-a868-42aa-9426-1c8d236b46dc/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ecb3d6aa6430d0a29ec49760036a192027e67486 Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2dcf8fe239901e50b1d1c294186ebc26334195bc Binary files /dev/null and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee6d087df2e1cce2446e+62cf4c7d/wrapped_neff.hlo differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6f141c7ac97a6571c2462df267e029fec19b1ac1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800cfe3d381295acb830bc4083b5769ecdb3c043145538ee5ddd03d245399621 +size 665127 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..de01f91f79e072f9d68add77681f9555fd6c671e --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f3b87fe9c638da44b110ad50d28a8f4b2fc8088a227d62624392ad545e4d68 +size 21648384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07e548a46ceed8bc38b348b3b86fca6bf630a010 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f864a5f78da4217ef4473095af4e74c2b102f99f76eaac6eeac946980e7d5081 +size 84132 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c91f3858eaed7d93bd0822e9c6f81586d8c8582 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e7b4fb835f650ed3c0c391802124236278c0a48c38ca890d0d306421412297 +size 195584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..162aaf9b2ce3ddf13a8f21673456b24998270e2a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fb0c41b6d33e8847f0a04d94c222a606175a978c597cc07cfb8a2d6c5259c7 +size 203386 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d94ce1eb0d46dccf1a4a030a61cc6277b6c70ce4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad3a0a73d8c5a5e97b58c00f43a0e4cf8f51574498072c78d85d11b968dc65a +size 81847 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c4a9ad43bee6247a6aabfeafc998720f244162b --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955f16aeeb09758db2e7324e69bf73044bb67c40824f7cb04c5635a97d069418 +size 277504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8aa63e74d19e10951a30189d4eb26a9459b3e65 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfaca6db5b2aaec16623396ec57a3d587e61e1c2ec39288b62860818841185db +size 84464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d8e57ae12a5c86c85836343073946935140ac875 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2edcc8193e72d555485823bcc1bf7d8ef5f25f6d860adf55807ff3c7c28a29 +size 226304 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.hlo_module.pb index 9f21b06d3e3a6a9b5ec4b90cc3e83077053e68ab..b2ab452d9ddbe7e941b15dea1c5143d6f2f8b1c2 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f348d7b362916cd5c9ae844f1512726d3a96dbc7698bcd652a48f7cd317ac87 +oid sha256:30865aac066f620329c25ae3bcc4ab61140ae94beefd253a370139c970315bc2 size 1036280 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.neff index 30cc1bdc64a51cf2d1ee41e78ebd1f35c6d89f77..73c854522789ef93794cd120cf94f0b88ae729a1 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_f9ed5e3ab30730f18597+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d92b43193ac64c59582f565c4aec7d1ed451e489a00f042d015bbd0e8c67cef +oid sha256:7ade42c669c083ac77bd9e757120cf37c07e06032242b4ea18ba5431311c8549 size 10681344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/model.neff index c60c7ed4c59b48d77794f82525223ffcfaa98f1f..a383382713e6222f7f183ad91c13c67fa021d7bc 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/model.neff +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:965e436898344a1794a7485a6d22c9270a361cc0d1708fc79a009622a4f1024f +oid sha256:ad861842425dc2da9f200172d770c01d1923969a81a8fdedfc5c6d8b33e33d5e size 2612224 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/wrapped_neff.hlo index 881790075f7406b5325cde84193ff6c94f84d907..1f3df519f62ae22342cf1831077f84c377555e70 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89b05680539419dadecda1725b6115918e24721e06b15bd7a0cef2d8306d1d09 +oid sha256:d26aedd259a8a6b35eadcfff9de2d3eecc696164f2238500c3d74b7c8cebf535 size 2742657 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3d6ef694608ef9e66aa33f476a2fe1b1a666273a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a35631f789d0639ee704f5b00fbb1c39d2bd32c4ab5f5f596df76e21e56c27 +size 102398391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..60240658d973b1e6418699c3eff08aa5d5cf33fd --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33caeaadbc10c0e8acc791ab1ed93f764257d306e8dcae087a51ae90b0167e21 +size 4752384 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5a96412e117fb9a415941de9ed37587dd0437727 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34ec63b3c6bcc2baba525bf2e36008c165a77b92ceddac688246667d1c5f5b1 +size 5069544