Spaces:
Build error
Build error
| FROM nvidia/cuda:12.1.0-base-ubuntu22.04 | |
| RUN apt-get update -y \ | |
| && apt-get install -y python3-pip | |
| RUN ldconfig /usr/local/cuda-12.1/compat/ | |
| # Install Python dependencies | |
| COPY builder/requirements.txt /requirements.txt | |
| RUN --mount=type=cache,target=/root/.cache/pip \ | |
| python3 -m pip install --upgrade pip && \ | |
| python3 -m pip install --upgrade -r /requirements.txt | |
| # Install vLLM (switching back to pip installs since issues that required building fork are fixed and space optimization is not as important since caching) and FlashInfer | |
| RUN python3 -m pip install vllm==0.10.0 && \ | |
| python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3 | |
| # Setup for Option 2: Building the Image with the Model included | |
| ARG MODEL_NAME="" | |
| ARG TOKENIZER_NAME="" | |
| ARG BASE_PATH="/runpod-volume" | |
| ARG QUANTIZATION="" | |
| ARG MODEL_REVISION="" | |
| ARG TOKENIZER_REVISION="" | |
| ENV MODEL_NAME=$MODEL_NAME \ | |
| MODEL_REVISION=$MODEL_REVISION \ | |
| TOKENIZER_NAME=$TOKENIZER_NAME \ | |
| TOKENIZER_REVISION=$TOKENIZER_REVISION \ | |
| BASE_PATH=$BASE_PATH \ | |
| QUANTIZATION=$QUANTIZATION \ | |
| HF_DATASETS_CACHE="${BASE_PATH}/huggingface-cache/datasets" \ | |
| HUGGINGFACE_HUB_CACHE="${BASE_PATH}/huggingface-cache/hub" \ | |
| HF_HOME="${BASE_PATH}/huggingface-cache/hub" \ | |
| HF_HUB_ENABLE_HF_TRANSFER=0 | |
| ENV PYTHONPATH="/:/vllm-workspace" | |
| COPY src /src | |
| RUN --mount=type=secret,id=HF_TOKEN,required=false \ | |
| if [ -f /run/secrets/HF_TOKEN ]; then \ | |
| export HF_TOKEN=$(cat /run/secrets/HF_TOKEN); \ | |
| fi && \ | |
| if [ -n "$MODEL_NAME" ]; then \ | |
| python3 /src/download_model.py; \ | |
| fi | |
| # Start the handler | |
| CMD ["python3", "/src/handler.py"] | |