Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | 
         @@ -6,7 +6,67 @@ base_model: 
     | 
|
| 6 | 
         | 
| 7 | 
         
             
            ## Model Details
         
     | 
| 8 | 
         | 
| 9 | 
         
            -
            This model is a  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 10 | 
         | 
| 11 | 
         
             
            ## Generate the Model
         
     | 
| 12 | 
         | 
| 
         | 
|
| 6 | 
         | 
| 7 | 
         
             
            ## Model Details
         
     | 
| 8 | 
         | 
| 9 | 
         
            +
            This model is a int8 model with group_size 128 and symmetric quantization of deepseek-ai/DeepSeek-V3.1-Terminus generated by intel/auto-round via RTN(no algorithm tuning). Please refer to Section Generate the model for more details. Please follow the license of the original model.
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            ## Model Version(s)
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            The model is quantized with auto-round v0.8.0
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            ## How To Use
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            ### INT8 Inference
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            ```python
         
     | 
| 20 | 
         
            +
            from transformers import AutoModelForCausalLM, AutoTokenizer
         
     | 
| 21 | 
         
            +
            import transformers
         
     | 
| 22 | 
         
            +
            import torch
         
     | 
| 23 | 
         
            +
            quantized_model_dir = "Intel/DeepSeek-V3.1-Terminus-int8-AutoRound"
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            model = AutoModelForCausalLM.from_pretrained(
         
     | 
| 26 | 
         
            +
                    quantized_model_dir,
         
     | 
| 27 | 
         
            +
                    torch_dtype=torch.bfloat16,
         
     | 
| 28 | 
         
            +
                    device_map="auto",
         
     | 
| 29 | 
         
            +
            )
         
     | 
| 30 | 
         
            +
            tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
         
     | 
| 31 | 
         
            +
            prompts = [
         
     | 
| 32 | 
         
            +
                    "9.11和9.8哪个数字大",
         
     | 
| 33 | 
         
            +
                    "strawberry中有几个r?",
         
     | 
| 34 | 
         
            +
                    "There is a girl who likes adventure,",
         
     | 
| 35 | 
         
            +
                    "Please give a brief introduction of DeepSeek company.",
         
     | 
| 36 | 
         
            +
                    ]
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            texts=[]
         
     | 
| 39 | 
         
            +
            for prompt in prompts:
         
     | 
| 40 | 
         
            +
                messages = [
         
     | 
| 41 | 
         
            +
                        {"role": "system", "content": "You are a helpful assistant."},
         
     | 
| 42 | 
         
            +
                        {"role": "user", "content": prompt}
         
     | 
| 43 | 
         
            +
                ]
         
     | 
| 44 | 
         
            +
                text = tokenizer.apply_chat_template(
         
     | 
| 45 | 
         
            +
                        messages,
         
     | 
| 46 | 
         
            +
                        tokenize=False,
         
     | 
| 47 | 
         
            +
                        add_generation_prompt=True
         
     | 
| 48 | 
         
            +
                        )
         
     | 
| 49 | 
         
            +
                texts.append(text)
         
     | 
| 50 | 
         
            +
            inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
            outputs = model.generate(
         
     | 
| 53 | 
         
            +
                    input_ids=inputs["input_ids"].to(model.device),
         
     | 
| 54 | 
         
            +
                    attention_mask=inputs["attention_mask"].to(model.device),
         
     | 
| 55 | 
         
            +
                    max_length=200, ##change this to align with the official usage
         
     | 
| 56 | 
         
            +
                    num_return_sequences=1,
         
     | 
| 57 | 
         
            +
                    do_sample=False  ##change this to align with the official usage
         
     | 
| 58 | 
         
            +
                    )
         
     | 
| 59 | 
         
            +
            generated_ids = [
         
     | 
| 60 | 
         
            +
                    output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs["input_ids"], outputs)
         
     | 
| 61 | 
         
            +
                    ]
         
     | 
| 62 | 
         
            +
            decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
         
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
            for i, prompt in enumerate(prompts):
         
     | 
| 65 | 
         
            +
                input_id = inputs
         
     | 
| 66 | 
         
            +
                print(f"Prompt: {prompt}")
         
     | 
| 67 | 
         
            +
                print(f"Generated: {decoded_outputs[i]}")
         
     | 
| 68 | 
         
            +
                print("-"*50)
         
     | 
| 69 | 
         
            +
            ```
         
     | 
| 70 | 
         | 
| 71 | 
         
             
            ## Generate the Model
         
     | 
| 72 | 
         |