# deploy/portainer/federated-test.stack.yml # # Portainer / Docker Swarm stack template — federated-tier test instance # # PURPOSE # Deploys a single federated-tier Mosaic gateway with co-located Postgres # (pgvector) and Valkey for end-to-end federation testing. Intended for # mos-test-1.woltje.com and mos-test-2.woltje.com (DEPLOY-03/04). # # REQUIRED ENV VARS (set per-stack in Portainer → Stacks → Environment variables) # STACK_NAME Unique name for Traefik router/service labels. # Examples: mos-test-1, mos-test-2 # HOST_FQDN Fully-qualified domain name served by this stack. # Examples: mos-test-1.woltje.com, mos-test-2.woltje.com # POSTGRES_PASSWORD Database password — set per stack; do NOT commit a default. # BETTER_AUTH_SECRET Random 32-char string for BetterAuth session signing. # Generate: openssl rand -base64 32 # BETTER_AUTH_URL Public gateway base URL, e.g. https://mos-test-1.woltje.com # # OPTIONAL ENV VARS (uncomment and set in Portainer to enable features) # ANTHROPIC_API_KEY sk-ant-... # OPENAI_API_KEY sk-... # OTEL_EXPORTER_OTLP_ENDPOINT http://:4318 # OTEL_SERVICE_NAME (default: mosaic-gateway) # # REQUIRED EXTERNAL RESOURCES # traefik-public Docker overlay network — must exist before deploying. # Create: docker network create --driver overlay --attachable traefik-public # letsencrypt Traefik cert resolver configured on the Swarm manager. # DNS A record ${HOST_FQDN} → Swarm ingress IP (or Cloudflare proxy). # # IMAGE # Pinned to sha-9f1a081 (main HEAD post-#488 Dockerfile fix). The previous # pin (fed-v0.1.0-m1, sha256:9b72e2...) had a broken pnpm copy and could # not resolve @mosaicstack/storage at runtime. The new digest was smoke- # tested locally — gateway boots, imports resolve, tier-detector runs. # Update digest here when promoting a new build. # # HEALTHCHECK NOTE (2026-04-21) # Switched from busybox wget to node http.get on 127.0.0.1 (not localhost) to # avoid IPv6 resolution issues on Alpine. Retries increased to 5 and # start_period to 60s to cover the NestJS/GC cold-start window (~40-50s). # restart_policy set to `any` so SIGTERM/clean-exit also triggers restart. # # NOTE: This is a TEST template — production deployments use a separate # parameterised template with stricter resource limits and secrets. version: '3.9' services: gateway: image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02 # Tag for human reference: sha-9f1a081 (post-#488 Dockerfile fix; smoke-tested locally) environment: # ── Tier ─────────────────────────────────────────────────────────────── MOSAIC_TIER: federated # ── Database ─────────────────────────────────────────────────────────── DATABASE_URL: postgres://gateway:${POSTGRES_PASSWORD}@postgres:5432/mosaic # ── Queue ────────────────────────────────────────────────────────────── VALKEY_URL: redis://valkey:6379 # ── Gateway ──────────────────────────────────────────────────────────── GATEWAY_PORT: '3000' GATEWAY_CORS_ORIGIN: https://${HOST_FQDN} # ── Auth ─────────────────────────────────────────────────────────────── BETTER_AUTH_SECRET: ${BETTER_AUTH_SECRET} BETTER_AUTH_URL: https://${HOST_FQDN} # ── Observability ────────────────────────────────────────────────────── OTEL_SERVICE_NAME: ${STACK_NAME:-mosaic-gateway} # OTEL_EXPORTER_OTLP_ENDPOINT: http://:4318 # ── AI Providers (uncomment to enable) ───────────────────────────────── # ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} # OPENAI_API_KEY: ${OPENAI_API_KEY} networks: - federated-test - traefik-public deploy: replicas: 1 restart_policy: condition: any delay: 5s max_attempts: 3 labels: - 'traefik.enable=true' - 'traefik.docker.network=traefik-public' - 'traefik.http.routers.${STACK_NAME}.rule=Host(`${HOST_FQDN}`)' - 'traefik.http.routers.${STACK_NAME}.entrypoints=websecure' - 'traefik.http.routers.${STACK_NAME}.tls=true' - 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt' - 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000' healthcheck: test: - 'CMD' - 'node' - '-e' - "require('http').get('http://127.0.0.1:3000/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))" interval: 30s timeout: 5s retries: 5 start_period: 60s depends_on: - postgres - valkey postgres: image: pgvector/pgvector:pg17 environment: POSTGRES_USER: gateway POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DB: mosaic volumes: - postgres-data:/var/lib/postgresql/data networks: - federated-test deploy: replicas: 1 restart_policy: condition: on-failure delay: 5s max_attempts: 3 healthcheck: test: ['CMD-SHELL', 'pg_isready -U gateway'] interval: 10s timeout: 5s retries: 5 start_period: 10s valkey: image: valkey/valkey:8-alpine volumes: - valkey-data:/data networks: - federated-test deploy: replicas: 1 restart_policy: condition: on-failure delay: 5s max_attempts: 3 healthcheck: test: ['CMD', 'valkey-cli', 'ping'] interval: 10s timeout: 3s retries: 5 start_period: 5s volumes: postgres-data: valkey-data: networks: federated-test: driver: overlay traefik-public: external: true