aws-neuron
/

optimum-neuron-cache

dacorvo HF Staff commited on Jan 29

Commit

4d1e615

verified ·

1 Parent(s): a03f58f

Add DeepSeek distilled model

Files changed (1) hide show

inference-cache-config/llama3-70b.json CHANGED Viewed

@@ -1,5 +1,19 @@
 {
-  "meta-llama/Meta-Llama-3-70B": [
     {
       "batch_size": 1,
       "sequence_length": 4096,

 {
+  "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 24,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 24,
+      "auto_cast_type": "bf16"
+    }
+  ],
+  "meta-llama/Llama-3.3-70B-Instruct": [
     {
       "batch_size": 1,
       "sequence_length": 4096,