Spaces:

peterpeter8585
/

vllm

Build error

App Files Files Community

vllm / Dockerfile

peterpeter8585

Upload 6 files

a88a542 verified about 2 months ago

raw

history blame contribute delete

1.66 kB

	FROM nvidia/cuda:12.1.0-base-ubuntu22.04

	RUN apt-get update -y \
	&& apt-get install -y python3-pip

	RUN ldconfig /usr/local/cuda-12.1/compat/

	# Install Python dependencies
	COPY builder/requirements.txt /requirements.txt
	RUN --mount=type=cache,target=/root/.cache/pip \
	python3 -m pip install --upgrade pip && \
	python3 -m pip install --upgrade -r /requirements.txt

	# Install vLLM (switching back to pip installs since issues that required building fork are fixed and space optimization is not as important since caching) and FlashInfer
	RUN python3 -m pip install vllm==0.10.0 && \
	python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3

	# Setup for Option 2: Building the Image with the Model included
	ARG MODEL_NAME=""
	ARG TOKENIZER_NAME=""
	ARG BASE_PATH="/runpod-volume"
	ARG QUANTIZATION=""
	ARG MODEL_REVISION=""
	ARG TOKENIZER_REVISION=""

	ENV MODEL_NAME=$MODEL_NAME \
	MODEL_REVISION=$MODEL_REVISION \
	TOKENIZER_NAME=$TOKENIZER_NAME \
	TOKENIZER_REVISION=$TOKENIZER_REVISION \
	BASE_PATH=$BASE_PATH \
	QUANTIZATION=$QUANTIZATION \
	HF_DATASETS_CACHE="${BASE_PATH}/huggingface-cache/datasets" \
	HUGGINGFACE_HUB_CACHE="${BASE_PATH}/huggingface-cache/hub" \
	HF_HOME="${BASE_PATH}/huggingface-cache/hub" \
	HF_HUB_ENABLE_HF_TRANSFER=0

	ENV PYTHONPATH="/:/vllm-workspace"


	COPY src /src
	RUN --mount=type=secret,id=HF_TOKEN,required=false \
	if [ -f /run/secrets/HF_TOKEN ]; then \
	export HF_TOKEN=$(cat /run/secrets/HF_TOKEN); \
	fi && \
	if [ -n "$MODEL_NAME" ]; then \
	python3 /src/download_model.py; \
	fi

	# Start the handler
	CMD ["python3", "/src/handler.py"]