Improve code snippet (#3)
Browse files- Improve code snippet (c4f91327b0b9f51cee01f41f9da8ed195e030378)
- Update README.md (c15b81007348c72c96b834df15b62bc57dc62965)
- Update README.md (d9b36fe104aaadadd78b092dc6f6b6e0263c3df4)
README.md
CHANGED
|
@@ -102,18 +102,15 @@ prompt = [
|
|
| 102 |
]
|
| 103 |
|
| 104 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 105 |
-
|
| 106 |
-
inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda()
|
| 107 |
-
|
| 108 |
model = AutoAWQForCausalLM.from_pretrained(
|
| 109 |
model_id,
|
| 110 |
torch_dtype=torch.float16,
|
| 111 |
low_cpu_mem_usage=True,
|
| 112 |
device_map="auto",
|
| 113 |
-
fuse_layers=True,
|
| 114 |
)
|
| 115 |
|
| 116 |
-
|
|
|
|
| 117 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
| 118 |
```
|
| 119 |
|
|
|
|
| 102 |
]
|
| 103 |
|
| 104 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
|
|
|
|
|
|
| 105 |
model = AutoAWQForCausalLM.from_pretrained(
|
| 106 |
model_id,
|
| 107 |
torch_dtype=torch.float16,
|
| 108 |
low_cpu_mem_usage=True,
|
| 109 |
device_map="auto",
|
|
|
|
| 110 |
)
|
| 111 |
|
| 112 |
+
inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to('cuda')
|
| 113 |
+
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
| 114 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
| 115 |
```
|
| 116 |
|