Spaces:
Sleeping
Sleeping
| MODEL="OpenGVLab/InternVL2_5-8B" | |
| # export CUDA_DEVICE_ORDER="PCI_BUS_ID" | |
| # export NCCL_P2P_DISABLE=1 | |
| # export CUDA_VISIBLE_DEVICES="0" | |
| # export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True | |
| PORT=8000 | |
| vllm serve $MODEL \ | |
| --port $PORT \ | |
| --trust-remote-code \ | |
| --limit-mm-per-prompt image=4 \ | |
| --max-model-len 8192 \ | |
| --gpu-memory-utilization 0.97 \ | |
| --disable-log-requests | |