fix(federation): healthcheck + restart policy for federated-test stacks (#492)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful

This commit was merged in pull request #492.
This commit is contained in:
2026-04-22 02:56:40 +00:00
parent f2cda52e1a
commit bb24292cf7

View File

@@ -36,6 +36,12 @@
# tested locally — gateway boots, imports resolve, tier-detector runs. # tested locally — gateway boots, imports resolve, tier-detector runs.
# Update digest here when promoting a new build. # Update digest here when promoting a new build.
# #
# HEALTHCHECK NOTE (2026-04-21)
# Switched from busybox wget to node http.get on 127.0.0.1 (not localhost) to
# avoid IPv6 resolution issues on Alpine. Retries increased to 5 and
# start_period to 60s to cover the NestJS/GC cold-start window (~40-50s).
# restart_policy set to `any` so SIGTERM/clean-exit also triggers restart.
#
# NOTE: This is a TEST template — production deployments use a separate # NOTE: This is a TEST template — production deployments use a separate
# parameterised template with stricter resource limits and secrets. # parameterised template with stricter resource limits and secrets.
@@ -76,7 +82,7 @@ services:
deploy: deploy:
replicas: 1 replicas: 1
restart_policy: restart_policy:
condition: on-failure condition: any
delay: 5s delay: 5s
max_attempts: 3 max_attempts: 3
labels: labels:
@@ -88,11 +94,15 @@ services:
- 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt' - 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt'
- 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000' - 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000'
healthcheck: healthcheck:
test: ['CMD', 'wget', '-qO-', 'http://localhost:3000/health'] test:
- 'CMD'
- 'node'
- '-e'
- "require('http').get('http://127.0.0.1:3000/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))"
interval: 30s interval: 30s
timeout: 5s timeout: 5s
retries: 3 retries: 5
start_period: 20s start_period: 60s
depends_on: depends_on:
- postgres - postgres
- valkey - valkey