Add DeepSeek distilled model
Browse files
    	
        inference-cache-config/llama3-70b.json
    CHANGED
    
    | @@ -1,5 +1,19 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              " | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 3 | 
             
                {
         | 
| 4 | 
             
                  "batch_size": 1,
         | 
| 5 | 
             
                  "sequence_length": 4096,
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
              "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": [
         | 
| 3 | 
            +
                {
         | 
| 4 | 
            +
                  "batch_size": 1,
         | 
| 5 | 
            +
                  "sequence_length": 4096,
         | 
| 6 | 
            +
                  "num_cores": 24,
         | 
| 7 | 
            +
                  "auto_cast_type": "bf16"
         | 
| 8 | 
            +
                },
         | 
| 9 | 
            +
                {
         | 
| 10 | 
            +
                  "batch_size": 4,
         | 
| 11 | 
            +
                  "sequence_length": 4096,
         | 
| 12 | 
            +
                  "num_cores": 24,
         | 
| 13 | 
            +
                  "auto_cast_type": "bf16"
         | 
| 14 | 
            +
                }
         | 
| 15 | 
            +
              ],
         | 
| 16 | 
            +
              "meta-llama/Llama-3.3-70B-Instruct": [
         | 
| 17 | 
             
                {
         | 
| 18 | 
             
                  "batch_size": 1,
         | 
| 19 | 
             
                  "sequence_length": 4096,
         | 

