#!/usr/bin/env bash
set -euo pipefail

# Default to 1 worker to prevent OOM on low-memory hosts
WORKERS_VALUE="${WORKERS:-1}"
TIMEOUT_VALUE="${TIMEOUT:-120}"
PORT_VALUE="${PORT:-10000}"

# Initialize the database with retries (handle DB cold start)
echo "Initializing database..."
MAX_RETRIES="${DB_INIT_RETRIES:-10}"
SLEEP_BASE="${DB_INIT_SLEEP_BASE:-5}"

i=1
until python scripts/init_pgvector.py; do
  if [ "$i" -ge "$MAX_RETRIES" ]; then
    echo "Database initialization failed after $i attempts; exiting."
    exit 1
  fi
  echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..."
  sleep $((SLEEP_BASE * i))
  i=$((i + 1))
done

echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"

# Determine gunicorn config usage
GUNICORN_CONFIG_ARG=""
if [ -f gunicorn.conf.py ]; then
  GUNICORN_CONFIG_ARG="--config gunicorn.conf.py"
else
  echo "Warning: gunicorn.conf.py not found; starting with inline CLI options only."
fi

# Start gunicorn in background so we can trap signals and collect diagnostics
gunicorn \
  --bind 0.0.0.0:${PORT_VALUE} \
  --workers "${WORKERS_VALUE}" \
  --timeout "${TIMEOUT_VALUE}" \
  --log-level info \
  --access-logfile - \
  --error-logfile - \
  --capture-output \
  ${GUNICORN_CONFIG_ARG} \
  app:app &

GUNICORN_PID=$!

# Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
handle_term() {
  echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
  echo "--- Top processes by RSS ---"
  ps aux --sort=-rss | head -n 20 || true
  echo "--- /proc/meminfo (if available) ---"
  cat /proc/meminfo || true
  echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
  kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
  # Wait for gunicorn to exit
  wait "${GUNICORN_PID}" || true
  echo "Gunicorn exited; wrapper exiting"
  exit 0
}
trap 'handle_term' SIGTERM SIGINT

# Readiness probe loop
echo "Waiting for application readiness (health endpoint)..."
READY_TIMEOUT="${READY_TIMEOUT:-60}" # total seconds to wait
READY_INTERVAL="${READY_INTERVAL:-3}" # seconds between checks
ELAPSED=0
READY=0
while [ "$ELAPSED" -lt "$READY_TIMEOUT" ]; do
  if ! kill -0 "${GUNICORN_PID}" 2>/dev/null; then
    echo "Gunicorn process exited prematurely during startup; aborting." >&2
    exit 1
  fi
  if curl -fsS "http://localhost:${PORT_VALUE}/health" >/dev/null 2>&1; then
    READY=1
    break
  fi
  sleep "$READY_INTERVAL"
  ELAPSED=$((ELAPSED + READY_INTERVAL))
done
if [ "$READY" -ne 1 ]; then
  echo "Health endpoint not ready after ${READY_TIMEOUT}s; continuing but marking as degraded." >&2
fi

# Pre-warm (chat) if health is ready
echo "Pre-warming application via /chat endpoint..."
curl -sS -X POST http://localhost:${PORT_VALUE}/chat \
  -H "Content-Type: application/json" \
  -d '{"message":"pre-warm"}' \
  --max-time 30 --fail >/dev/null 2>&1 || echo "Pre-warm request failed but continuing..."

# Explicit embedding warm-up to surface ONNX model issues early.
echo "Running embedding warm-up..."
if python - <<'PY'
import time, logging
from src.embedding.embedding_service import EmbeddingService
start = time.time()
try:
    svc = EmbeddingService()
    emb = svc.embed_text("warmup")
    dur = (time.time() - start) * 1000
    print(f"Embedding warm-up successful; dim={len(emb)}; duration_ms={dur:.1f}")
except Exception as e:
    dur = (time.time() - start) * 1000
    print(f"Embedding warm-up FAILED after {dur:.1f}ms: {e}")
    raise SystemExit(1)
PY
then
  echo "Embedding warm-up succeeded."
else
  echo "Embedding warm-up failed; terminating startup to allow redeploy/retry." >&2
  kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
  wait "${GUNICORN_PID}" || true
  exit 1
fi

echo "Server is running (PID ${GUNICORN_PID})."

# Wait for gunicorn to exit and forward its exit code
wait "${GUNICORN_PID}"
EXIT_CODE=$?
echo "Gunicorn stopped with exit code ${EXIT_CODE}"
exit "${EXIT_CODE}"