Seth McKnight
Enhance run.sh with improved error handling (#87)
d3fd68c
raw
history blame
1.87 kB
#!/usr/bin/env bash
set -euo pipefail
# Default to 1 worker to prevent OOM on low-memory hosts
WORKERS_VALUE="${WORKERS:-1}"
TIMEOUT_VALUE="${TIMEOUT:-120}"
PORT_VALUE="${PORT:-10000}"
# Initialize the database with retries (handle DB cold start)
echo "Initializing database..."
MAX_RETRIES="${DB_INIT_RETRIES:-10}"
SLEEP_BASE="${DB_INIT_SLEEP_BASE:-5}"
i=1
until python scripts/init_pgvector.py; do
if [ "$i" -ge "$MAX_RETRIES" ]; then
echo "Database initialization failed after $i attempts; exiting."
exit 1
fi
echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..."
sleep $((SLEEP_BASE * i))
i=$((i + 1))
done
echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
# Start gunicorn in background so we can trap signals and collect diagnostics
gunicorn \
--bind 0.0.0.0:${PORT_VALUE} \
--workers "${WORKERS_VALUE}" \
--timeout "${TIMEOUT_VALUE}" \
--log-level debug \
--access-logfile - \
--error-logfile - \
--capture-output \
app:app &
GUNICORN_PID=$!
# Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
handle_term() {
echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
echo "--- Top processes by RSS ---"
ps aux --sort=-rss | head -n 20 || true
echo "--- /proc/meminfo ---"
cat /proc/meminfo || true
echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
# Wait for gunicorn to exit
wait "${GUNICORN_PID}" || true
echo "Gunicorn exited; wrapper exiting"
exit 0
}
trap 'handle_term' SIGTERM SIGINT
# Wait for gunicorn to exit normally
wait "${GUNICORN_PID}"
EXIT_CODE=$?
echo "Gunicorn stopped with exit code ${EXIT_CODE}"
exit "${EXIT_CODE}"