fix(federation): healthcheck + restart policy for federated-test stacks (#492)
This commit was merged in pull request #492.
This commit is contained in:
@@ -36,6 +36,12 @@
|
||||
# tested locally — gateway boots, imports resolve, tier-detector runs.
|
||||
# Update digest here when promoting a new build.
|
||||
#
|
||||
# HEALTHCHECK NOTE (2026-04-21)
|
||||
# Switched from busybox wget to node http.get on 127.0.0.1 (not localhost) to
|
||||
# avoid IPv6 resolution issues on Alpine. Retries increased to 5 and
|
||||
# start_period to 60s to cover the NestJS/GC cold-start window (~40-50s).
|
||||
# restart_policy set to `any` so SIGTERM/clean-exit also triggers restart.
|
||||
#
|
||||
# NOTE: This is a TEST template — production deployments use a separate
|
||||
# parameterised template with stricter resource limits and secrets.
|
||||
|
||||
@@ -76,7 +82,7 @@ services:
|
||||
deploy:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
condition: any
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
labels:
|
||||
@@ -88,11 +94,15 @@ services:
|
||||
- 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt'
|
||||
- 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000'
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '-qO-', 'http://localhost:3000/health']
|
||||
test:
|
||||
- 'CMD'
|
||||
- 'node'
|
||||
- '-e'
|
||||
- "require('http').get('http://127.0.0.1:3000/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))"
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 20s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
depends_on:
|
||||
- postgres
|
||||
- valkey
|
||||
|
||||
Reference in New Issue
Block a user