diff --git a/.gitattributes b/.gitattributes index c7ba786f12e54e2f038a698210d06f9675f81b49..8c057fa376324743e08f3027f186b375139cf483 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11920,3 +11920,50 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b32493175a70d9e16b9+747527b0/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b32493175a70d9e16b9+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb50383f9a879a8375b9+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_eb50383f9a879a8375b9+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/model.neuron filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/model.neuron filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/model.neuron filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/model.neuron filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/daf3f26f1dd6b6a7b205.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/daf3f26f1dd6b6a7b205.json new file mode 100644 index 0000000000000000000000000000000000000000..8e677cb8b57e29c3ff3a2d0e4dba187badd5c518 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/diffusion-transformer/PixArt-alpha/PixArt-XL-2-512x512/daf3f26f1dd6b6a7b205.json @@ -0,0 +1,102 @@ +{ + "_entry_class": "MultiModelCacheEntry", + "_model_id": "PixArt-alpha/PixArt-XL-2-512x512", + "_task": null, + "text_encoder": { + "architectures": [ + "T5EncoderModel" + ], + "classifier_dropout": 0.0, + "d_ff": 10240, + "d_kv": 64, + "d_model": 4096, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "export_model_type": "t5-encoder", + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": false, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": null, + "auto_cast_type": null, + "compiler_type": "neuronx-cc", + "compiler_version": "2.21.18209.0+043b1bf7", + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": false, + "instance_type": "trn1", + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_sequence_length": 120, + "task": "feature-extraction", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 24, + "num_heads": 64, + "num_layers": 24, + "output_past": true, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "use_cache": false, + "vocab_size": 32128 + }, + "transformer": { + "_class_name": "PixArtTransformer2DModel", + "activation_fn": "gelu-approximate", + "attention_bias": true, + "attention_head_dim": 72, + "attention_type": "default", + "caption_channels": 4096, + "cross_attention_dim": 1152, + "double_self_attention": false, + "dropout": 0.0, + "in_channels": 4, + "interpolation_scale": null, + "neuron": { + "auto_cast": null, + "auto_cast_type": null, + "compiler_type": "neuronx-cc", + "compiler_version": "2.21.18209.0+043b1bf7", + "dynamic_batch_size": false, + "float_dtype": "bf16", + "inline_weights_to_neff": false, + "instance_type": "trn1", + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_encoder_hidden_size": 4096, + "static_height": 64, + "static_num_channels": 4, + "static_sequence_length": 120, + "static_vae_scale_factor": 8, + "static_width": 64, + "task": "semantic-segmentation", + "tensor_parallel_size": 1 + }, + "norm_elementwise_affine": false, + "norm_eps": 1e-06, + "norm_num_groups": 32, + "norm_type": "ada_norm_single", + "num_attention_heads": 16, + "num_embeds_ada_norm": 1000, + "num_layers": 28, + "num_vector_embeds": null, + "only_cross_attention": false, + "out_channels": 8, + "output_attentions": false, + "patch_size": 2, + "upcast_attention": false, + "use_additional_conditions": null, + "use_linear_projection": false + } +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2592b69b1afedac6c249.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2592b69b1afedac6c249.json new file mode 100644 index 0000000000000000000000000000000000000000..decb5a553da86a79ab84b32a10193291f368d878 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2592b69b1afedac6c249.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/f9479b3f424f0ae4542c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/f9479b3f424f0ae4542c.json new file mode 100644 index 0000000000000000000000000000000000000000..ccfb6dee16204ccbd1acf35e1ec713029c4f18f6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/llamafactory/tiny-random-Llama-3/f9479b3f424f0ae4542c.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/43cfc61763bcaf55bb9e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/43cfc61763bcaf55bb9e.json new file mode 100644 index 0000000000000000000000000000000000000000..887bfabf9ebfda5d0ed99d4e652ceb94b611b8c7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/43cfc61763bcaf55bb9e.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/a26633219d7308901cb9.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/a26633219d7308901cb9.json new file mode 100644 index 0000000000000000000000000000000000000000..2d8338038a52b7596158c223a5373eebd0210051 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/a26633219d7308901cb9.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ee0b934f9d86b0c0ec63.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ee0b934f9d86b0c0ec63.json new file mode 100644 index 0000000000000000000000000000000000000000..03b10083c6d8ecf2037c1ebd43029d8d990a46f4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ee0b934f9d86b0c0ec63.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/eeb8b7f2495863c099b1.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/eeb8b7f2495863c099b1.json new file mode 100644 index 0000000000000000000000000000000000000000..dfef6c38ff1ea9df94ed94f64cabc2663acceeb6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama/unsloth/Llama-3.2-1B-Instruct/eeb8b7f2495863c099b1.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/1ac5416e0e8a986447af.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/1ac5416e0e8a986447af.json new file mode 100644 index 0000000000000000000000000000000000000000..6279cc9ab0d85baffd6359576bb961287f02decc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/llama4_text/tiny-random/llama-4/1ac5416e0e8a986447af.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/68e8a9aa766528672764.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/68e8a9aa766528672764.json new file mode 100644 index 0000000000000000000000000000000000000000..5d59fe83aa0b14d0aecb28dc736e784b5623085b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/mixtral/dacorvo/Mixtral-tiny/68e8a9aa766528672764.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/8d92c29cef19fddb825c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/8d92c29cef19fddb825c.json new file mode 100644 index 0000000000000000000000000000000000000000..be317702631c0b7cea923a22c4fff260becbc98a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/8d92c29cef19fddb825c.json @@ -0,0 +1,62 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/1d198d134b6736b8c253.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/1d198d134b6736b8c253.json new file mode 100644 index 0000000000000000000000000000000000000000..dba5689c0788fccd18091311fe5419109e18ac6b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/phi3/yujiepan/phi-4-tiny-random/1d198d134b6736b8c253.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/06d3a1f0282762f9f49a.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/06d3a1f0282762f9f49a.json new file mode 100644 index 0000000000000000000000000000000000000000..ef8dc51960b32aa9643ee8faa336d79ab4c824fb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/06d3a1f0282762f9f49a.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/17b13a58612cc061dc5d.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/17b13a58612cc061dc5d.json new file mode 100644 index 0000000000000000000000000000000000000000..c676e5781ec5301361ee723005c2c6fee76229da --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/17b13a58612cc061dc5d.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/239b5e7c2206baa8917a.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/239b5e7c2206baa8917a.json new file mode 100644 index 0000000000000000000000000000000000000000..d26ed9b862e3a137f3ea8387d305f06414c29c15 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/239b5e7c2206baa8917a.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/a35f43930569fd7bd14c.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/a35f43930569fd7bd14c.json new file mode 100644 index 0000000000000000000000000000000000000000..46ea06352c3443997b9e88a01c52fd6b2cfe15ce --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/a35f43930569fd7bd14c.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/71aae2feb728a4bc1bea.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/71aae2feb728a4bc1bea.json new file mode 100644 index 0000000000000000000000000000000000000000..7abd62b901e0ad167c05735432ed0f29e7145f92 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/yujiepan/qwen2.5-128k-tiny-random/71aae2feb728a4bc1bea.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bda840f20094d5c6f760.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bda840f20094d5c6f760.json new file mode 100644 index 0000000000000000000000000000000000000000..f0364de250a3d475d0a45bdd66fb0eb35cdf26a3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/bda840f20094d5c6f760.json @@ -0,0 +1,87 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-Embedding-0.6B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B", + "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 1 + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151669 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/322124df0d79b4f7f8bb.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/322124df0d79b4f7f8bb.json new file mode 100644 index 0000000000000000000000000000000000000000..cc54b46eb76baf3c5f0a82eff33901afae709a7f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/322124df0d79b4f7f8bb.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.1.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..633dbed2eaa37dabdf9ee36dec02aa45f2e9d0ed --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a97bb411dcd64871e1b1784620de52aab98fa041ae87f39fee46a8518b9f118 +size 78518 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7a8003b7f1ea1d7edf638b3722bfa968cc911f36 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2282ddf8f23c97222ece601e4f0757f78fe73c6f641b3ffd4d3fe5c24e50b7 +size 287744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4c86200906230322b4d380546c80912e2f75bf82 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_01efbef13f746aece3be+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bc663eeb8487a29b91741cdc21020abf78254b490d02397bdaaaaab3f2b119 +size 296094 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..86dbb720979c71489ac235e59ca0f77a86bc0680 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..380e691f108e93e00dd9e9bf0b9bc3a6e14db163 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61586b49e1e903cab9fff0482909f040c5e97621b59f6233899973b7cd27184d +size 84807 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c1e306ee3a3e27778fd522f49edef71ad85925d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dc4bd0a16b1a4ab1733+c2248236/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33b221d4039eaaa99cb40b84d626d05299fe802098783ac5418f3b32574abe8 +size 646144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ecd5d720009890f91588c6b36962aee90216592 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714ebfd9ca52e3b66ca85e6f7d1f920791921aa127dc18c9273251b90f276a77 +size 593552 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..39142cdb69528943404b098dc825e6b272e88a0d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e70ec9049ca26a7f0f6+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad65458553f2e14df14309086b054374027fcf02dfa5d0c7e21b1977817f1ae +size 3769344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be8215e4db02a98e7c7d9a9b026367f5b42a91ea --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ed0858162f8fc2c58bc4c23d0925dbeda5897dae0889f87758206676394f4e +size 81843 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4b73f8be39b0687e66c93f803551ed1a14d785fe --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d9f3177230710833e1a22c473ceb0215149d4996fb169ded5f87e0ebd9b6e7 +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6947b23b1665a11411eff49c5a64caa7c8eb0fda --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13aa568fe38435861b30+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11296a74882475c7e044dddae990ac1e06a377d292f8bbbabccbf9f52a68de87 +size 255104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71cc5af4454da4b12e5b11d5a5cd0c9f1768fe1d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010699d8fe7fd543b752b93c8bc2cd72e5309050ab149427dfd589c97e7268c5 +size 81516 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1f401e6c73ae59741f109c92ca047d40bbced4d2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_198eb3e5a6d933feebd0+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36bf12c475e30b9ae5c3e55dc492a5b36e2b84326264d9707f19e411d4862c1a +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb9a088a70cc3ed35cd9c1c2443ce8f328cf694b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c4af19c1f6118b30a135c4553925fc304a74428ae95971dcad784cdef2729b +size 97794 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0006b75ec7688b8ab09218e772eaaef6ed0176d2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_19f0ddffb404ac2b8204+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3706159398be1c857cb9410baa841093252d4bd2db4a6ec6e5fcfcb5866e59d +size 410624 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4e048268a2ebda42a903d9c30e684be97aec650d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d1111cccd29ce4edd640c6b5880fa7b920b5c1211396da06d3b58670ec7a12 +size 90382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fde2255bdb0c89a5a5638df492a98fed01c7aaef --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1e1261ac026ac7064c12+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07848927e2ef873e5477430034ef70cac7c7af77f9e216a81255e421762ed85 +size 359424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cad0fbfc3f3d7194c0b8de6b469433928d73a232 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3b6a0ea5977bf9124bbef9091daf3ab3acffd04afd968383c5df8268501518 +size 79552 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96702f9d7b40e4e2ca9d10badfa20f552498cc41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_243316f018b041915a7e+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e48ef0d3d4fd0d15bdd689e6cdf5d1898680cc3e8a41b478d5c210cf083bd6b +size 338944 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f8a17cd08c6a504ae7d0fd64b6823290f002a0ba --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d27d563a4e3028e4ea49838d9c3b5f1557c20ff9dd0f09d8fc5f9c669102fbe +size 658192 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ad21d7119da25bd9da999474f45924b612934dbe --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e6bd5080e30cb75bdd6b2157daeb16e6c52ad35e65ccdaa0a523408517ec0ea +size 1915904 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..278bd42d9daa3a479d8f18300698bd079bf2eff9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3360dfdbc9cf20ffe3ea+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a5ae8a75a3a4e0656c3937dc2bec0710df3820ba3db609ff551cf806a50c75 +size 2053551 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..432703ca06587028e081b3337bf8e5c0b1e77a10 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56077a01ff146a4392ab8f44a679b7a61a93a98b0689f1f004df657cc89d6d9 +size 418223 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..579c0a2e6f0fab9f785de002ce968d449b07e131 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07eb3095ef4419384219a66155a12e688b10ba35214414470103c6a030c459e +size 3093504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2654287d3d521218a8d7aead08184ae0c5f2fde4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4be33b117e68e2de3326+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75292c2c71464cc78c3ef24bceffe9c98027a5ca0d3844e42bcc4478d714fb9 +size 3167748 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7ad085fa649cf1da3b4eace6cdf4e70b954f0806 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_bdc1e7bb-6313-4016-a46d-a1410f4efde5/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ac327671534bf4882fdac21ca81b2875bcb38c3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592e80e1bffbd437bacb4001b4b80f6e54f1fd34a64af2d516dcdcaa57f6cdce +size 1444864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d5c42b828df129788e611a10c4c1dcfd478e31dd --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4c5098190e907f2a54fb+8c16a911/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b534f5733c6240dde4a8aab2b87c743eb5ecb9c2f13835d0623982c7bb41e5 +size 1447845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..19e4d5ae4b3cd857444eaf64a6ba7bd6f498acad --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3312876ce41e0fcc32e5f19ddc83cfa44fe317d39b1f1aa97a6b21f49acbb23 +size 586051 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1cc29860a288ab10650f92cc8572f76fe0b29daa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e93fdf5154aebbd1ec7+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81219ec13b4f22f03d84b7a75fa54cf308d9910802fcaff32c3be93c0486790 +size 7097344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0e0d09444ee244cb7c7eb47368ff8b4c36f641 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2117dab3e3ed8caad749b8256ab430c591d3b72a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec57d9d2f9d9be197ad88a58ece6b14b0ccab97c121b8d20cc9512558ddf6562 +size 70276 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..389986f6420c8d5b0f98999feb0b5b86942d2cc5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c575672250d5f23841d008ced6c33f1df62b6d57923b8be0b7797455ae58cf2 +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..617ded72a276d7638fe3fa89d291e1dc6af446a3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5949b466ee5d32903b5e+ca355898/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05228c5d86d72717d585b6556529ede68ef62b4a306d5d7bfaf5f07ef7d9d96 +size 289571 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8aa1efec651d6c6dfe05237c7181891c361c80af --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_38e9154d-8e64-419a-965f-d7efa146aa17/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..049797fb9a080a28cb4ded7aa83b76ab6942f7f1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245d5e7af27461459b0791912e0f89c33a9493848539058559a74a829b073d7c +size 1444864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..12aa5f66ae08bbb821701429ca89274a264cabc2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6aac25f548c1812d8b53+088d889e/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8a5b414833177ae5ffb36be31595fb5032512541b3ebd43555eadc356280c2 +size 1447845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d1f1bbcf75feb5ea0f64e3347bb1d71771e3336d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9c56f973-ecb5-4e85-aeac-6fad60b308e6/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..153a42da959cbb02ed93e703ccfea31303f5b640 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12120ae8f7a60e70b9b15515259b5090bb5827c34359dc4b47b59ba4856098aa +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2941e039db1b0c747ffdabc71efbc32be023e544 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_72ee895d32151757c529+d36b334e/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01aeb7504192d925aa66afd6766e1d163fd38409c0e1fcd88b72f63c92731062 +size 249608 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dca379a5c1988d5cc22a2ef46b05ee50fbc98a3d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:168729ace178aab53bb364861f32f0a113d7a3603411bf752a9c654700469411 +size 509252 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..27ed751bd6d1ed09b01961f141162aa732b68595 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73b50e8e61409e1f8fe1+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec47f01b746ca3c89b4411a0d737ee6a7205568789b4b9e20cb3cfae0e80d2b4 +size 41585664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..449b5720c5911938c8c44b129fe894a3eb514a18 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_bf39c7e6-4d0c-436a-8429-901687e99683/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d157ad6c554dea593844b5c49ecd17a87ad75db3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f508862404b94b0c5f1c77e0e44f08299b9fb64c7224bfcb31c40d79b71b9d +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3b4e287ba6f30f93512d0d1e818937df690331b8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9bc951663ff96735f71e+aad90931/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e5a3ad1f132015036421817714ed868e99cf4b92c1430b8e5151e705cf4e9c +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b6098b16ad76b21c5ec1e1867138a64bbab5367 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf131c43642f157e6bbfa112e521c7db709a716383a86c176ec34c321a62160 +size 588406 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eaa3e1ca72f9d4cc10ffe20d240f796af53a3ba4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac615b643bca430c3eaad9c1816876d5f545c777ecdc57a9f20a9ffd774454b2 +size 1577984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b57448d6af9192256358178466f2a087b1f59629 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a2547d4e8c084e5503d9+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aabf3069adebc7029a978ce74ba8c242c6c62647724dca03bd1208ab1e2c1132 +size 1734318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f9aac85f572373187ed5932ede47f59a7a91323f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0902b9467e36affd529236be99c466bb326eb59e56e6b6414b2bde17d7913ac8 +size 676474 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..82abb82374c55b8fe514a43f6c684dcf66aa796e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6dba7c62aa41379f9b1362ec08c4a98c7c5a74f1e99be09a8ea6d1a512a7372 +size 543744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d83acc0dd5b1f73207e5c25238f699191c322b90 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a6ac71f9922f4f01abf5+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8516abb72de6f5c8d2b9ad74508eb524c404b4dbf23acc26daac265ba6925161 +size 563380 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b28dfdd4b213537b2dea5c6bcc967e9c9762156 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d61ac2165fde7772e8d3035b0f89b50beec5e0d305548cbacc624eefa9452d3 +size 82753 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..757acf24be3a9d8ec5df52a73e520031a1b7069d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aac12629bb09a3f29593e822adc9258c99737b4f857e6fc1d78284fea075b5d +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d932a4dc0113ea2e873d1acae381acf9064e3b61 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b26694c7ca7ec7ec6e04+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db0772da52a0bb2b152a5ed85c72eb32f7e865604c07c0d0d40db2980016c27c +size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71a45b2e71496cd9a4651b67356cd1c41ba41ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7ebbb9e2aa4d3c3d4667f098e18faa9ed0a231c9831e73c8118bfad21264ad +size 91147 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..56156ccaa5698a6741ceb3f082a2a99df68b33b5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b61279642ab3f4895869ac9220451ba0253a65848a0ea4cc5f727f78970ff3f +size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1766967e3a09d034e0adc5cb4feda25531ea6de0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c0a2583211556c96a6a3+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717b264eed41092c6a186e7596770239039aa71701a44a551fae1b51b3a3fc3a +size 289031 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0acda2091419ffefac552b387c604242c216e220 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0209401219006cca96e01930250e97052c586f1267ed95762a3fd065de8cbe9 +size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c749246a95d9b4924f615b0fc826a2ffcb3045be --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_c6450080bc47619254df+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3be943131798156033b5df1ddf7aeaa12171fb23e4115c6048429dd50c9ce02 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/model_index.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/model_index.json new file mode 100644 index 0000000000000000000000000000000000000000..6e86c661515a0aca4b2f60c5a4738d063ee68cec --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/model_index.json @@ -0,0 +1,25 @@ +{ + "_class_name": "PixArtAlphaPipeline", + "_diffusers_version": "0.35.1", + "_name_or_path": "PixArt-alpha/PixArt-XL-2-512x512", + "scheduler": [ + "diffusers", + "DPMSolverMultistepScheduler" + ], + "text_encoder": [ + "transformers", + "T5EncoderModel" + ], + "tokenizer": [ + "transformers", + "T5Tokenizer" + ], + "transformer": [ + "diffusers", + "PixArtTransformer2DModel" + ], + "vae": [ + "diffusers", + "AutoencoderKL" + ] +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/scheduler/scheduler_config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/scheduler/scheduler_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dc10f8d63268216248ed744f57d2ff279ee94581 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/scheduler/scheduler_config.json @@ -0,0 +1,32 @@ +{ + "_class_name": "DPMSolverMultistepScheduler", + "_diffusers_version": "0.35.1", + "algorithm_type": "dpmsolver++", + "beta_end": 0.02, + "beta_schedule": "linear", + "beta_start": 0.0001, + "dynamic_thresholding_ratio": 0.995, + "euler_at_final": false, + "final_sigmas_type": "zero", + "flow_shift": 1.0, + "lambda_min_clipped": -Infinity, + "lower_order_final": true, + "num_train_timesteps": 1000, + "prediction_type": "epsilon", + "rescale_betas_zero_snr": false, + "sample_max_value": 1.0, + "solver_order": 2, + "solver_type": "midpoint", + "steps_offset": 0, + "thresholding": false, + "time_shift_type": "exponential", + "timestep_spacing": "linspace", + "trained_betas": null, + "use_beta_sigmas": false, + "use_dynamic_shifting": false, + "use_exponential_sigmas": false, + "use_flow_sigmas": false, + "use_karras_sigmas": false, + "use_lu_lambdas": false, + "variance_type": null +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cc662b677d536c321eaf2a2b70168cfa739b224e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "T5EncoderModel" + ], + "classifier_dropout": 0.0, + "d_ff": 10240, + "d_kv": 64, + "d_model": 4096, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "export_model_type": "t5-encoder", + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": false, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": null, + "auto_cast_type": null, + "compiler_type": "neuronx-cc", + "compiler_version": "2.21.18209.0+043b1bf7", + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": false, + "instance_type": "trn1", + "int_dtype": "int64", + "model_type": "t5-encoder", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_sequence_length": 120, + "task": "feature-extraction", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 24, + "num_heads": 64, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "torchscript": true, + "transformers_version": "4.55.4", + "use_cache": false, + "vocab_size": 32128 +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/model.neuron b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/model.neuron new file mode 100644 index 0000000000000000000000000000000000000000..c8fd314604e833d8c36747ef56c5affeaeba5216 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/text_encoder/model.neuron @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1463a4af17b58d636a417d36b951f4449d0f9489efc4a4da4f4f6f82006f20 +size 9685732433 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/added_tokens.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/special_tokens_map.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/spiece.model b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/tokenizer_config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddb6837b61620d0c17e8742b2a06f83ee4063ae1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/tokenizer/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32002": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32003": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32004": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32005": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32006": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32007": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32008": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32009": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32010": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32011": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32012": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32013": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32014": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32015": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32016": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32017": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32018": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32019": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32020": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32021": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32022": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32023": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32024": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32025": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32026": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32027": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32028": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32029": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32030": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32031": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32032": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32033": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32034": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32035": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32036": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32037": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32038": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32039": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32040": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32041": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32042": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32043": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32044": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32045": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32046": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32047": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32048": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32049": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32050": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32051": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32052": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32053": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32054": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32055": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32056": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32057": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32058": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32059": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32060": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32061": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32062": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32063": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32064": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32065": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32066": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32067": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32068": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32069": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32070": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32071": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32072": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32073": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32074": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32075": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32076": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32077": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32078": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32079": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32080": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32081": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32082": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32083": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32084": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32085": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32086": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32087": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32088": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32089": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32090": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32091": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32092": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32093": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32094": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32095": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32096": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32097": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32098": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + }, + "32099": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": true, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": true, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8530a0631f3d2621c596056be7201a0edc1deab4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/config.json @@ -0,0 +1,59 @@ +{ + "_class_name": "PixArtTransformer2DModel", + "_diffusers_version": "0.35.1", + "_use_default_values": [ + "use_additional_conditions", + "interpolation_scale" + ], + "activation_fn": "gelu-approximate", + "attention_bias": true, + "attention_head_dim": 72, + "attention_type": "default", + "caption_channels": 4096, + "cross_attention_dim": 1152, + "double_self_attention": false, + "dropout": 0.0, + "in_channels": 4, + "interpolation_scale": null, + "neuron": { + "auto_cast": null, + "auto_cast_type": null, + "compiler_type": "neuronx-cc", + "compiler_version": "2.21.18209.0+043b1bf7", + "dynamic_batch_size": false, + "float_dtype": "bf16", + "inline_weights_to_neff": false, + "instance_type": "trn1", + "int_dtype": "int64", + "model_type": "pixart-transformer-2d", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_encoder_hidden_size": 4096, + "static_height": 64, + "static_num_channels": 4, + "static_sequence_length": 120, + "static_vae_scale_factor": 8, + "static_width": 64, + "task": "semantic-segmentation", + "tensor_parallel_size": 1 + }, + "norm_elementwise_affine": false, + "norm_eps": 1e-06, + "norm_num_groups": 32, + "norm_type": "ada_norm_single", + "num_attention_heads": 16, + "num_embeds_ada_norm": 1000, + "num_layers": 28, + "num_vector_embeds": null, + "only_cross_attention": false, + "out_channels": 8, + "output_attentions": false, + "patch_size": 2, + "sample_size": 64, + "transformers_version": null, + "upcast_attention": false, + "use_additional_conditions": null, + "use_linear_projection": false +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/model.neuron b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/model.neuron new file mode 100644 index 0000000000000000000000000000000000000000..fd85b1a6de9feaef3c6c0b3c4161a2ecafe44ed5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/transformer/model.neuron @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3bf4379ff5403163183cf484f67c72eb27a89f860846dd82e44245f51c296f +size 1297774620 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c976d41ea9255b0eecbfdf06c59314828e3ca94c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/config.json @@ -0,0 +1,68 @@ +{ + "_class_name": "AutoencoderKL", + "_diffusers_version": "0.35.1", + "_use_default_values": [ + "shift_factor", + "latents_std", + "use_quant_conv", + "latents_mean", + "use_post_quant_conv", + "mid_block_add_attention" + ], + "act_fn": "silu", + "block_out_channels": [ + 128, + 256, + 512, + 512 + ], + "down_block_types": [ + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D" + ], + "force_upcast": true, + "in_channels": 3, + "latent_channels": 4, + "latents_mean": null, + "latents_std": null, + "layers_per_block": 2, + "mid_block_add_attention": true, + "neuron": { + "auto_cast": null, + "auto_cast_type": null, + "compiler_type": "neuronx-cc", + "compiler_version": "2.21.18209.0+043b1bf7", + "dynamic_batch_size": false, + "float_dtype": "bf16", + "inline_weights_to_neff": false, + "instance_type": "trn1", + "int_dtype": "int64", + "model_type": "vae-decoder", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_height": 64, + "static_num_channels": 4, + "static_width": 64, + "task": "semantic-segmentation", + "tensor_parallel_size": 1 + }, + "norm_num_groups": 32, + "out_channels": 3, + "output_attentions": false, + "sample_size": 256, + "scaling_factor": 0.18215, + "shift_factor": null, + "transformers_version": null, + "up_block_types": [ + "UpDecoderBlock2D", + "UpDecoderBlock2D", + "UpDecoderBlock2D", + "UpDecoderBlock2D" + ], + "use_post_quant_conv": true, + "use_quant_conv": true +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/model.neuron b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/model.neuron new file mode 100644 index 0000000000000000000000000000000000000000..aa6feca097ae0ea6bfb85e9eb91828fa9683905c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_decoder/model.neuron @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae62b726941ad809753a43a51b0bab0d6c523ccfde0d56092c2430ec10dbab10 +size 311513621 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/config.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/config.json new file mode 100644 index 0000000000000000000000000000000000000000..51fd393ba928943250e8fefa8eac24b9392de42f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/config.json @@ -0,0 +1,68 @@ +{ + "_class_name": "AutoencoderKL", + "_diffusers_version": "0.35.1", + "_use_default_values": [ + "shift_factor", + "latents_std", + "use_quant_conv", + "latents_mean", + "use_post_quant_conv", + "mid_block_add_attention" + ], + "act_fn": "silu", + "block_out_channels": [ + 128, + 256, + 512, + 512 + ], + "down_block_types": [ + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D" + ], + "force_upcast": true, + "in_channels": 3, + "latent_channels": 4, + "latents_mean": null, + "latents_std": null, + "layers_per_block": 2, + "mid_block_add_attention": true, + "neuron": { + "auto_cast": null, + "auto_cast_type": null, + "compiler_type": "neuronx-cc", + "compiler_version": "2.21.18209.0+043b1bf7", + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": false, + "instance_type": "trn1", + "int_dtype": "int64", + "model_type": "vae-encoder", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_height": 512, + "static_num_channels": 3, + "static_width": 512, + "task": "semantic-segmentation", + "tensor_parallel_size": 1 + }, + "norm_num_groups": 32, + "out_channels": 3, + "output_attentions": false, + "sample_size": 256, + "scaling_factor": 0.18215, + "shift_factor": null, + "transformers_version": null, + "up_block_types": [ + "UpDecoderBlock2D", + "UpDecoderBlock2D", + "UpDecoderBlock2D", + "UpDecoderBlock2D" + ], + "use_post_quant_conv": true, + "use_quant_conv": true +} diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/model.neuron b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/model.neuron new file mode 100644 index 0000000000000000000000000000000000000000..ebf9500788ece9c38bcec61e07ab004e94e01c68 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_daf3f26f1dd6b6a7b205/vae_encoder/model.neuron @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ccc46f93c9349c254a7c35010b23d19adb912a59c026faa5c9a7fb4e88baad +size 189176531 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..64028cc78541c1833466a343e251d2e19d025203 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70386c0d4c7acaf312dfe4908f119fe46d817ab3d82394d549c5ed84707d551 +size 80382 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..63bf496eb607784c0e8506e2b735ee208526d3f0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce5e959b8b423ae1f3d90425e5bf6e8b650943e80f178dcae24c9822386cc2a +size 216064 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f22e21d74c5cd9512e07747f147570fc77878600 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e7bf36a5a3a95f1e548a+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a95fe8dafa968bb5e4852ffddd4f721b765275db7a6a3d53c2ffadb6c27e04 +size 224412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..be0ba40f70c99b047e474501f9331c01e99cbc52 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_79c7e93e-2793-444d-8250-493cda4dd626/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d726c4c0af4ec5507e85a27f05c6d56f6d9ac32a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92410f3650ee33754e762b2fc5f854f1ade676aec848b3fe74e9eb021f1d9c64 +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b2e86c9e3846254608e45c389f2f2786d015ab7a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ee146796c3abc2c48a55+9e46248e/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10462aeebc3ef8701906788c0d868bbdaad214aa964e5c575c5eed6c8c7c4f3d +size 249608 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..727fe2caac58854e2f659732c11b6b5473ac4505 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6596d3769509c9146ae4a3975d1c70c2eb236f1527bf8cab5cf9306c4300289f +size 89555 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ae865be9b374885c2fc48ad4210998bfa5b31822 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f541888df5d30c3d67a56163a1068c3692ee9ada874e678ab7254fb51af0cfeb +size 369664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..97aa5f33bb794c16f3aa9b236b2190d234e0990d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4ada83ccf930dab43a6+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c7e5a9a1208751581b37d797ce8d5b5e52f19f045eb31cd2ca6f5149d7c038 +size 379362 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a291eef6371ee0cd154490c84b4b4979190709b3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5b9b5d719971d47dec5dcb4d4b5bf90ec5f188bf96403d9ef0053c49d97724 +size 82772 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c67c004fd985288462c0ef18268b717def94acea --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9572352e721af4c0c3a2c56e76a4b5ab2ae9605d4829aea3fe819b4329163be +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7ac504dad1ca324a321baa4d7ed0d98d9e0079b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed0bc9e6a1063940352f77cf3c69fd69dfe037f37ff904cce52c6b300a4a81a +size 694128 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bccd5738420a9038361bde5e17daf9b65b46e23a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99aac18d392a7fa78c603ee2eaa429959a523455a432a6bbb186be4711f1396 +size 625664