feat(federation): two-gateway test harness scaffold (FED-M3-02)

Adds tools/federation-harness/ — the permanent test bed for M3+ federation
E2E tests. Boots two gateways (Server A + Server B) on a shared Docker bridge
network with per-gateway Postgres/pgvector + Valkey and a shared Step-CA.

- docker-compose.two-gateways.yml: gateway-a/b, postgres-a/b, valkey-a/b,
  step-ca; image digest-pinned to sha256:1069117740e... (sha-9f1a081, #491)
- seed.ts: provisions scope variants A/B/C via real admin REST API; walks
  full enrollment flow (peer keypair → grant → token → redeem → cert store)
- harness.ts: bootHarness/tearDownHarness/serverA/serverB/seed helpers for
  vitest; idempotent boot (reuses running stack when both gateways healthy)
- README.md: prereqs, topology, seed usage, vitest integration, port override,
  troubleshooting, image digest note

No production code modified. Quality gates: typecheck ✓ lint ✓ format ✓

Closes #462

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Jarvis
2026-04-23 20:21:31 -05:00
parent ee3f2defd9
commit b445033c69
4 changed files with 1147 additions and 0 deletions

View File

@@ -0,0 +1,243 @@
# tools/federation-harness/docker-compose.two-gateways.yml
#
# Two-gateway federation test harness — local-only, no Portainer/Swarm needed.
#
# USAGE (manual):
# docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
# docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
#
# USAGE (from harness.ts):
# const handle = await bootHarness();
# ...
# await tearDownHarness(handle);
#
# TOPOLOGY:
# gateway-a — "home" instance (Server A, the requesting side)
# └── postgres-a (pgvector/pg17, port 15432)
# └── valkey-a (port 16379)
# gateway-b — "work" instance (Server B, the serving side)
# └── postgres-b (pgvector/pg17, port 15433)
# └── valkey-b (port 16380)
# step-ca — shared CA for both gateways (port 19000)
#
# All services share the `fed-test-net` bridge network.
# Host port ranges (15432-15433, 16379-16380, 14001-14002, 19000) are chosen
# to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
#
# IMAGE:
# Pinned to the immutable digest sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# (sha-9f1a081, post-#491 IMG-FIX, smoke-tested locally).
# Update this digest only after a new CI build is promoted to the registry.
#
# STEP-CA:
# Single shared Step-CA instance. Both gateways connect to it.
# CA volume is ephemeral per `docker compose down -v`; regenerated on next up.
# The harness seed script provisions the CA roots cross-trust after first boot.
services:
# ─── Shared Certificate Authority ────────────────────────────────────────────
step-ca:
image: smallstep/step-ca:0.27.4
container_name: fed-harness-step-ca
restart: unless-stopped
ports:
- '${STEP_CA_HOST_PORT:-19000}:9000'
volumes:
- step_ca_data:/home/step
- ../../infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
- ../../infra/step-ca/templates:/etc/step-ca-templates:ro
- ../../infra/step-ca/dev-password:/run/secrets/ca_password:ro
entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
networks:
- fed-test-net
healthcheck:
test:
[
'CMD',
'step',
'ca',
'health',
'--ca-url',
'https://localhost:9000',
'--root',
'/home/step/certs/root_ca.crt',
]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
# ─── Server A — Home / Requesting Gateway ────────────────────────────────────
postgres-a:
image: pgvector/pgvector:pg17
container_name: fed-harness-postgres-a
restart: unless-stopped
ports:
- '${PG_A_HOST_PORT:-15432}:5432'
environment:
POSTGRES_USER: mosaic
POSTGRES_PASSWORD: mosaic
POSTGRES_DB: mosaic
volumes:
- pg_a_data:/var/lib/postgresql/data
- ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
networks:
- fed-test-net
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U mosaic']
interval: 5s
timeout: 3s
retries: 5
valkey-a:
image: valkey/valkey:8-alpine
container_name: fed-harness-valkey-a
restart: unless-stopped
ports:
- '${VALKEY_A_HOST_PORT:-16379}:6379'
volumes:
- valkey_a_data:/data
networks:
- fed-test-net
healthcheck:
test: ['CMD', 'valkey-cli', 'ping']
interval: 5s
timeout: 3s
retries: 5
gateway-a:
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
container_name: fed-harness-gateway-a
restart: unless-stopped
ports:
- '${GATEWAY_A_HOST_PORT:-14001}:3000'
environment:
MOSAIC_TIER: federated
DATABASE_URL: postgres://mosaic:mosaic@postgres-a:5432/mosaic
VALKEY_URL: redis://valkey-a:6379
GATEWAY_PORT: '3000'
GATEWAY_CORS_ORIGIN: 'http://localhost:14001'
BETTER_AUTH_SECRET: harness-secret-server-a-do-not-use-in-prod
BETTER_AUTH_URL: 'http://gateway-a:3000'
STEP_CA_URL: 'https://step-ca:9000'
FEDERATION_PEER_HOSTNAME: gateway-a
# Admin key — fixed for harness use only; never use in production
ADMIN_API_KEY: harness-admin-key-a
depends_on:
postgres-a:
condition: service_healthy
valkey-a:
condition: service_healthy
step-ca:
condition: service_healthy
networks:
- fed-test-net
healthcheck:
test:
[
'CMD',
'node',
'-e',
"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
]
interval: 10s
timeout: 5s
retries: 5
start_period: 60s
# ─── Server B — Work / Serving Gateway ──────────────────────────────────────
postgres-b:
image: pgvector/pgvector:pg17
container_name: fed-harness-postgres-b
restart: unless-stopped
ports:
- '${PG_B_HOST_PORT:-15433}:5432'
environment:
POSTGRES_USER: mosaic
POSTGRES_PASSWORD: mosaic
POSTGRES_DB: mosaic
volumes:
- pg_b_data:/var/lib/postgresql/data
- ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
networks:
- fed-test-net
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U mosaic']
interval: 5s
timeout: 3s
retries: 5
valkey-b:
image: valkey/valkey:8-alpine
container_name: fed-harness-valkey-b
restart: unless-stopped
ports:
- '${VALKEY_B_HOST_PORT:-16380}:6379'
volumes:
- valkey_b_data:/data
networks:
- fed-test-net
healthcheck:
test: ['CMD', 'valkey-cli', 'ping']
interval: 5s
timeout: 3s
retries: 5
gateway-b:
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
container_name: fed-harness-gateway-b
restart: unless-stopped
ports:
- '${GATEWAY_B_HOST_PORT:-14002}:3000'
environment:
MOSAIC_TIER: federated
DATABASE_URL: postgres://mosaic:mosaic@postgres-b:5432/mosaic
VALKEY_URL: redis://valkey-b:6379
GATEWAY_PORT: '3000'
GATEWAY_CORS_ORIGIN: 'http://localhost:14002'
BETTER_AUTH_SECRET: harness-secret-server-b-do-not-use-in-prod
BETTER_AUTH_URL: 'http://gateway-b:3000'
STEP_CA_URL: 'https://step-ca:9000'
FEDERATION_PEER_HOSTNAME: gateway-b
# Admin key — fixed for harness use only; never use in production
ADMIN_API_KEY: harness-admin-key-b
depends_on:
postgres-b:
condition: service_healthy
valkey-b:
condition: service_healthy
step-ca:
condition: service_healthy
networks:
- fed-test-net
healthcheck:
test:
[
'CMD',
'node',
'-e',
"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
]
interval: 10s
timeout: 5s
retries: 5
start_period: 60s
networks:
fed-test-net:
name: fed-test-net
driver: bridge
volumes:
step_ca_data:
name: fed-harness-step-ca
pg_a_data:
name: fed-harness-pg-a
valkey_a_data:
name: fed-harness-valkey-a
pg_b_data:
name: fed-harness-pg-b
valkey_b_data:
name: fed-harness-valkey-b