Seth McKnight commited on
Commit
d3fd68c
·
1 Parent(s): 6338213

Enhance run.sh with improved error handling (#87)

Browse files

Updated error handling and logging for gunicorn process.

Files changed (1) hide show
  1. run.sh +30 -6
run.sh CHANGED
@@ -1,5 +1,5 @@
1
  #!/usr/bin/env bash
2
- set -e
3
 
4
  # Default to 1 worker to prevent OOM on low-memory hosts
5
  WORKERS_VALUE="${WORKERS:-1}"
@@ -17,16 +17,16 @@ until python scripts/init_pgvector.py; do
17
  echo "Database initialization failed after $i attempts; exiting."
18
  exit 1
19
  fi
20
- echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE ** i))s before retry..."
21
- sleep $((SLEEP_BASE ** i))
22
  i=$((i + 1))
23
  done
24
 
25
  echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
26
  export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
27
 
28
- # Use explicit logging options so any worker crash or import error is visible
29
- exec gunicorn \
30
  --bind 0.0.0.0:${PORT_VALUE} \
31
  --workers "${WORKERS_VALUE}" \
32
  --timeout "${TIMEOUT_VALUE}" \
@@ -34,4 +34,28 @@ exec gunicorn \
34
  --access-logfile - \
35
  --error-logfile - \
36
  --capture-output \
37
- app:app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env bash
2
+ set -euo pipefail
3
 
4
  # Default to 1 worker to prevent OOM on low-memory hosts
5
  WORKERS_VALUE="${WORKERS:-1}"
 
17
  echo "Database initialization failed after $i attempts; exiting."
18
  exit 1
19
  fi
20
+ echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..."
21
+ sleep $((SLEEP_BASE * i))
22
  i=$((i + 1))
23
  done
24
 
25
  echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
26
  export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
27
 
28
+ # Start gunicorn in background so we can trap signals and collect diagnostics
29
+ gunicorn \
30
  --bind 0.0.0.0:${PORT_VALUE} \
31
  --workers "${WORKERS_VALUE}" \
32
  --timeout "${TIMEOUT_VALUE}" \
 
34
  --access-logfile - \
35
  --error-logfile - \
36
  --capture-output \
37
+ app:app &
38
+
39
+ GUNICORN_PID=$!
40
+
41
+ # Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
42
+ handle_term() {
43
+ echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
44
+ echo "--- Top processes by RSS ---"
45
+ ps aux --sort=-rss | head -n 20 || true
46
+ echo "--- /proc/meminfo ---"
47
+ cat /proc/meminfo || true
48
+ echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
49
+ kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
50
+ # Wait for gunicorn to exit
51
+ wait "${GUNICORN_PID}" || true
52
+ echo "Gunicorn exited; wrapper exiting"
53
+ exit 0
54
+ }
55
+ trap 'handle_term' SIGTERM SIGINT
56
+
57
+ # Wait for gunicorn to exit normally
58
+ wait "${GUNICORN_PID}"
59
+ EXIT_CODE=$?
60
+ echo "Gunicorn stopped with exit code ${EXIT_CODE}"
61
+ exit "${EXIT_CODE}"