Spaces:

Thadillo
/

participatory-planner

Sleeping

thadillo

Fix HuggingFace model cache permission error

18bfe26 18 days ago

2.67 kB

	# Hugging Face Spaces Dockerfile
	FROM python:3.11-slim

	# Set working directory
	WORKDIR /app

	# Install system dependencies including GDAL for contextily/maps
	RUN apt-get update && apt-get install -y \
	build-essential \
	curl \
	gdal-bin \
	libgdal-dev \
	python3-gdal \
	&& rm -rf /var/lib/apt/lists/*

	# Set GDAL environment variables
	ENV GDAL_CONFIG=/usr/bin/gdal-config
	ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
	ENV C_INCLUDE_PATH=/usr/include/gdal

	# Copy requirements
	COPY requirements.txt .

	# Install Python dependencies
	RUN pip install --no-cache-dir -r requirements.txt

	# Copy application code
	COPY . .

	# Create data directory in a writable location for HF Spaces
	# HF Spaces has persistent storage at /data
	RUN mkdir -p /data && chmod 777 /data

	# Create cache directory for Hugging Face models
	RUN mkdir -p /data/.cache/huggingface && chmod -R 777 /data/.cache

	# Create models directory for fine-tuned models
	RUN mkdir -p /data/models/finetuned && chmod -R 777 /data/models

	# Create model cache in container (not in /data) to save persistent storage
	RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache

	# Create matplotlib config directory (prevent permission errors)
	RUN mkdir -p /tmp/matplotlib && chmod 777 /tmp/matplotlib
	ENV MPLCONFIGDIR=/tmp/matplotlib

	# Pre-download models into container image to avoid using /data storage
	ENV HF_HOME=/app/.cache/huggingface

	# Download zero-shot models (for immediate analysis capability)
	# These are loaded on first analysis, pre-downloading saves time and /data space
	RUN python -c "from transformers import pipeline; \
	print('Downloading BART-large-MNLI...'); \
	pipeline('zero-shot-classification', model='facebook/bart-large-mnli', device=-1); \
	print('Downloading DeBERTa-v3-base-MNLI...'); \
	pipeline('zero-shot-classification', model='MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli', device=-1); \
	print('Downloading DistilBART-MNLI...'); \
	pipeline('zero-shot-classification', model='valhalla/distilbart-mnli-12-3', device=-1); \
	print('All models downloaded successfully')" \
	&& chmod -R 777 /app/.cache

	# Hugging Face Spaces uses port 7860
	EXPOSE 7860

	# Set runtime environment variables
	ENV FLASK_ENV=production
	ENV PYTHONUNBUFFERED=1
	ENV PORT=7860
	ENV DATABASE_PATH=/data/app.db
	# Keep model cache in container, only store database and fine-tuned models in /data
	ENV HF_HOME=/app/.cache/huggingface
	ENV HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface

	# Health check
	HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
	CMD curl -f http://localhost:7860/login \|\| exit 1

	# Run the application
	CMD ["python", "app_hf.py"]