Spaces:
Sleeping
Sleeping
Seth McKnight
commited on
Commit
·
d3fd68c
1
Parent(s):
6338213
Enhance run.sh with improved error handling (#87)
Browse filesUpdated error handling and logging for gunicorn process.
run.sh
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#!/usr/bin/env bash
|
| 2 |
-
set -
|
| 3 |
|
| 4 |
# Default to 1 worker to prevent OOM on low-memory hosts
|
| 5 |
WORKERS_VALUE="${WORKERS:-1}"
|
|
@@ -17,16 +17,16 @@ until python scripts/init_pgvector.py; do
|
|
| 17 |
echo "Database initialization failed after $i attempts; exiting."
|
| 18 |
exit 1
|
| 19 |
fi
|
| 20 |
-
echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE
|
| 21 |
-
sleep $((SLEEP_BASE
|
| 22 |
i=$((i + 1))
|
| 23 |
done
|
| 24 |
|
| 25 |
echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
|
| 26 |
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
--bind 0.0.0.0:${PORT_VALUE} \
|
| 31 |
--workers "${WORKERS_VALUE}" \
|
| 32 |
--timeout "${TIMEOUT_VALUE}" \
|
|
@@ -34,4 +34,28 @@ exec gunicorn \
|
|
| 34 |
--access-logfile - \
|
| 35 |
--error-logfile - \
|
| 36 |
--capture-output \
|
| 37 |
-
app:app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
|
| 4 |
# Default to 1 worker to prevent OOM on low-memory hosts
|
| 5 |
WORKERS_VALUE="${WORKERS:-1}"
|
|
|
|
| 17 |
echo "Database initialization failed after $i attempts; exiting."
|
| 18 |
exit 1
|
| 19 |
fi
|
| 20 |
+
echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..."
|
| 21 |
+
sleep $((SLEEP_BASE * i))
|
| 22 |
i=$((i + 1))
|
| 23 |
done
|
| 24 |
|
| 25 |
echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
|
| 26 |
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
|
| 27 |
|
| 28 |
+
# Start gunicorn in background so we can trap signals and collect diagnostics
|
| 29 |
+
gunicorn \
|
| 30 |
--bind 0.0.0.0:${PORT_VALUE} \
|
| 31 |
--workers "${WORKERS_VALUE}" \
|
| 32 |
--timeout "${TIMEOUT_VALUE}" \
|
|
|
|
| 34 |
--access-logfile - \
|
| 35 |
--error-logfile - \
|
| 36 |
--capture-output \
|
| 37 |
+
app:app &
|
| 38 |
+
|
| 39 |
+
GUNICORN_PID=$!
|
| 40 |
+
|
| 41 |
+
# Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
|
| 42 |
+
handle_term() {
|
| 43 |
+
echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
|
| 44 |
+
echo "--- Top processes by RSS ---"
|
| 45 |
+
ps aux --sort=-rss | head -n 20 || true
|
| 46 |
+
echo "--- /proc/meminfo ---"
|
| 47 |
+
cat /proc/meminfo || true
|
| 48 |
+
echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
|
| 49 |
+
kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
|
| 50 |
+
# Wait for gunicorn to exit
|
| 51 |
+
wait "${GUNICORN_PID}" || true
|
| 52 |
+
echo "Gunicorn exited; wrapper exiting"
|
| 53 |
+
exit 0
|
| 54 |
+
}
|
| 55 |
+
trap 'handle_term' SIGTERM SIGINT
|
| 56 |
+
|
| 57 |
+
# Wait for gunicorn to exit normally
|
| 58 |
+
wait "${GUNICORN_PID}"
|
| 59 |
+
EXIT_CODE=$?
|
| 60 |
+
echo "Gunicorn stopped with exit code ${EXIT_CODE}"
|
| 61 |
+
exit "${EXIT_CODE}"
|