Update README.md
Browse files
README.md
CHANGED
|
@@ -346,13 +346,13 @@ Run the benchmarks under `vllm` root folder:
|
|
| 346 |
### baseline
|
| 347 |
```Shell
|
| 348 |
export MODEL=microsoft/Phi-4-mini-instruct
|
| 349 |
-
|
| 350 |
```
|
| 351 |
|
| 352 |
### AWQ-INT4
|
| 353 |
```Shell
|
| 354 |
export MODEL=pytorch/Phi-4-mini-instruct-AWQ-INT4
|
| 355 |
-
VLLM_DISABLE_COMPILE_CACHE=1
|
| 356 |
```
|
| 357 |
</details>
|
| 358 |
|
|
|
|
| 346 |
### baseline
|
| 347 |
```Shell
|
| 348 |
export MODEL=microsoft/Phi-4-mini-instruct
|
| 349 |
+
vllm bench latency --input-len 256 --output-len 256 --model $MODEL --batch-size 1
|
| 350 |
```
|
| 351 |
|
| 352 |
### AWQ-INT4
|
| 353 |
```Shell
|
| 354 |
export MODEL=pytorch/Phi-4-mini-instruct-AWQ-INT4
|
| 355 |
+
VLLM_DISABLE_COMPILE_CACHE=1 vllm bench latency --input-len 256 --output-len 256 --model $MODEL --batch-size 1
|
| 356 |
```
|
| 357 |
</details>
|
| 358 |
|