Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| MODEL="OpenGVLab/InternVL2_5-8B" | |
| # export CUDA_DEVICE_ORDER="PCI_BUS_ID" | |
| # export NCCL_P2P_DISABLE=1 | |
| # export CUDA_VISIBLE_DEVICES="0" | |
| # export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True | |
| PORT=8000 | |
| vllm serve $MODEL \ | |
| --port $PORT \ | |
| --trust-remote-code \ | |
| --limit-mm-per-prompt image=4 \ | |
| --max-model-len 8192 \ | |
| --gpu-memory-utilization 0.97 \ | |
| --disable-log-requests | |