#!/usr/bin/env bash set -euo pipefail # Default to 1 worker to prevent OOM on low-memory hosts WORKERS_VALUE="${WORKERS:-1}" TIMEOUT_VALUE="${TIMEOUT:-120}" PORT_VALUE="${PORT:-10000}" # Initialize the database with retries (handle DB cold start) echo "Initializing database..." MAX_RETRIES="${DB_INIT_RETRIES:-10}" SLEEP_BASE="${DB_INIT_SLEEP_BASE:-5}" i=1 until python scripts/init_pgvector.py; do if [ "$i" -ge "$MAX_RETRIES" ]; then echo "Database initialization failed after $i attempts; exiting." exit 1 fi echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..." sleep $((SLEEP_BASE * i)) i=$((i + 1)) done echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s" export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}" # Determine gunicorn config usage GUNICORN_CONFIG_ARG="" if [ -f gunicorn.conf.py ]; then GUNICORN_CONFIG_ARG="--config gunicorn.conf.py" else echo "Warning: gunicorn.conf.py not found; starting with inline CLI options only." fi # Start gunicorn in background so we can trap signals and collect diagnostics gunicorn \ --bind 0.0.0.0:${PORT_VALUE} \ --workers "${WORKERS_VALUE}" \ --timeout "${TIMEOUT_VALUE}" \ --log-level info \ --access-logfile - \ --error-logfile - \ --capture-output \ ${GUNICORN_CONFIG_ARG} \ app:app & GUNICORN_PID=$! # Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait handle_term() { echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') =====" echo "--- Top processes by RSS ---" ps aux --sort=-rss | head -n 20 || true echo "--- /proc/meminfo (if available) ---" cat /proc/meminfo || true echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})" kill -TERM "${GUNICORN_PID}" 2>/dev/null || true # Wait for gunicorn to exit wait "${GUNICORN_PID}" || true echo "Gunicorn exited; wrapper exiting" exit 0 } trap 'handle_term' SIGTERM SIGINT # Readiness probe loop echo "Waiting for application readiness (health endpoint)..." READY_TIMEOUT="${READY_TIMEOUT:-60}" # total seconds to wait READY_INTERVAL="${READY_INTERVAL:-3}" # seconds between checks ELAPSED=0 READY=0 while [ "$ELAPSED" -lt "$READY_TIMEOUT" ]; do if ! kill -0 "${GUNICORN_PID}" 2>/dev/null; then echo "Gunicorn process exited prematurely during startup; aborting." >&2 exit 1 fi if curl -fsS "http://localhost:${PORT_VALUE}/health" >/dev/null 2>&1; then READY=1 break fi sleep "$READY_INTERVAL" ELAPSED=$((ELAPSED + READY_INTERVAL)) done if [ "$READY" -ne 1 ]; then echo "Health endpoint not ready after ${READY_TIMEOUT}s; continuing but marking as degraded." >&2 fi # Pre-warm (chat) if health is ready echo "Pre-warming application via /chat endpoint..." curl -sS -X POST http://localhost:${PORT_VALUE}/chat \ -H "Content-Type: application/json" \ -d '{"message":"pre-warm"}' \ --max-time 30 --fail >/dev/null 2>&1 || echo "Pre-warm request failed but continuing..." # Explicit embedding warm-up to surface ONNX model issues early. echo "Running embedding warm-up..." if python - <<'PY' import time, logging from src.embedding.embedding_service import EmbeddingService start = time.time() try: svc = EmbeddingService() emb = svc.embed_text("warmup") dur = (time.time() - start) * 1000 print(f"Embedding warm-up successful; dim={len(emb)}; duration_ms={dur:.1f}") except Exception as e: dur = (time.time() - start) * 1000 print(f"Embedding warm-up FAILED after {dur:.1f}ms: {e}") raise SystemExit(1) PY then echo "Embedding warm-up succeeded." else echo "Embedding warm-up failed; terminating startup to allow redeploy/retry." >&2 kill -TERM "${GUNICORN_PID}" 2>/dev/null || true wait "${GUNICORN_PID}" || true exit 1 fi echo "Server is running (PID ${GUNICORN_PID})." # Wait for gunicorn to exit and forward its exit code wait "${GUNICORN_PID}" EXIT_CODE=$? echo "Gunicorn stopped with exit code ${EXIT_CODE}" exit "${EXIT_CODE}"