From c56dda74aa3804f5c428ec0f7b990381e902c66e Mon Sep 17 00:00:00 2001 From: "jason.woltje" Date: Wed, 22 Apr 2026 02:21:49 +0000 Subject: [PATCH] feat(federation): Step-CA sidecar in federated compose [FED-M2-02] (#490) --- .gitignore | 3 ++ docker-compose.federated.yml | 60 ++++++++++++++++++++++++++ docs/federation/TASKS.md | 30 ++++++------- infra/step-ca/dev-password.example | 1 + infra/step-ca/init.sh | 60 ++++++++++++++++++++++++++ infra/step-ca/templates/federation.tpl | 48 +++++++++++++++++++++ 6 files changed, 187 insertions(+), 15 deletions(-) create mode 100644 infra/step-ca/dev-password.example create mode 100755 infra/step-ca/init.sh create mode 100644 infra/step-ca/templates/federation.tpl diff --git a/.gitignore b/.gitignore index 0d0970d..7728aae 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ coverage *.tsbuildinfo .pnpm-store docs/reports/ + +# Step-CA dev password — real file is gitignored; commit only the .example +infra/step-ca/dev-password diff --git a/docker-compose.federated.yml b/docker-compose.federated.yml index 60fa88b..5687e31 100644 --- a/docker-compose.federated.yml +++ b/docker-compose.federated.yml @@ -27,6 +27,7 @@ services: postgres-federated: image: pgvector/pgvector:pg17 profiles: [federated] + restart: unless-stopped ports: - '${PG_FEDERATED_HOST_PORT:-5433}:5432' environment: @@ -45,6 +46,7 @@ services: valkey-federated: image: valkey/valkey:8-alpine profiles: [federated] + restart: unless-stopped ports: - '${VALKEY_FEDERATED_HOST_PORT:-6380}:6379' volumes: @@ -55,6 +57,64 @@ services: timeout: 3s retries: 5 + # --------------------------------------------------------------------------- + # Step-CA — Mosaic Federation internal certificate authority + # + # Image: pinned to 0.27.4 (latest stable as of late 2025). + # `latest` is forbidden per Mosaic image policy (immutable tag required for + # reproducible deployments and digest-first promotion in CI). + # + # Profile: `federated` — this service must not start in non-federated dev. + # + # Password: + # Dev: bind-mount ./infra/step-ca/dev-password (gitignored; copy from + # ./infra/step-ca/dev-password.example and customise locally). + # Prod: replace the bind-mount with a Docker secret: + # secrets: + # ca_password: + # external: true + # and reference it as `/run/secrets/ca_password` (same path the + # init script already uses). + # + # Provisioner: "mosaic-fed" (consumed by apps/gateway/src/federation/ca.service.ts) + # --------------------------------------------------------------------------- + step-ca: + image: smallstep/step-ca:0.27.4 + profiles: [federated] + restart: unless-stopped + ports: + - '${STEP_CA_HOST_PORT:-9000}:9000' + volumes: + - step_ca_data:/home/step + # init script — executed as the container entrypoint + - ./infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro + # X.509 template skeleton (wired in M2-04) + - ./infra/step-ca/templates:/etc/step-ca-templates:ro + # Dev password file — GITIGNORED; copy from dev-password.example + # In production, replace this with a Docker secret (see comment above). + - ./infra/step-ca/dev-password:/run/secrets/ca_password:ro + entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh'] + healthcheck: + # The healthcheck requires the root cert to exist, which is only true + # after init.sh has completed on first boot. start_period gives init + # time to finish before Docker starts counting retries. + test: + [ + 'CMD', + 'step', + 'ca', + 'health', + '--ca-url', + 'https://localhost:9000', + '--root', + '/home/step/certs/root_ca.crt', + ] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + volumes: pg_federated_data: valkey_federated_data: + step_ca_data: diff --git a/docs/federation/TASKS.md b/docs/federation/TASKS.md index d8ae8ff..66cf9c7 100644 --- a/docs/federation/TASKS.md +++ b/docs/federation/TASKS.md @@ -63,21 +63,21 @@ Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.co Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3). -| id | status | description | issue | agent | branch | depends_on | estimate | notes | -| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FED-M2-01 | needs-qa | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests. | #461 | sonnet | feat/federation-m2-schema | — | 5K | PR #486 open. First review NEEDS CHANGES (missing DESC indexes + reserved cols). Remediation subagent `a673dd9355dc26f82` in flight in worktree `agent-a4404ac1`. | -| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script. | #461 | sonnet | feat/federation-m2-stepca | DEPLOY-02 | 4K | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file. | -| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes. | #461 | sonnet | feat/federation-m2-scope-schema | — | 4K | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement. | -| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container. | #461 | sonnet | feat/federation-m2-ca-service | M2-02 | 6K | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. | -| FED-M2-05 | not-started | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl). | #461 | sonnet | feat/federation-m2-key-sealing | M2-01 | 5K | Separate from M2-06 to keep crypto seam isolated; reviewer focus is sealing only. | -| FED-M2-06 | not-started | `grants.service.ts`: CRUD + status transitions (`pending` → `active` → `revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones. | #461 | sonnet | feat/federation-m2-grants-service | M2-03, M2-05 | 6K | Business logic only — CSR + cert work delegated to M2-04. Revocation handler is M6. | -| FED-M2-07 | not-started | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending` → `active`; emits enrollment audit (table-only write, M4 tightens). | #461 | sonnet | feat/federation-m2-enrollment | M2-04, M2-06 | 6K | Tokens single-use with 410 on replay; tokens TTL'd at 15min; rate-limited at request layer (M4 introduces guard, M2 uses simple lock). | -| FED-M2-08 | not-started | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option. | #461 | sonnet | feat/federation-m2-cli | M2-06, M2-07 | 7K | `peer add ` is the client-side flow; resolves enrollment URL → CSR → store sealed key + cert. | -| FED-M2-09 | not-started | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`. | #461 | sonnet | feat/federation-m2-integration | M2-08 | 8K | Tests #4 (cert OID match) + #6 (two-gateway peer-add) handled separately by M2-10 (E2E). | -| FED-M2-10 | not-started | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461 | sonnet | feat/federation-m2-e2e | M2-08, DEPLOY-04 | 6K | Falls back to local docker-compose-two-gateways if remote test hosts not yet available. Documents both paths. | -| FED-M2-11 | not-started | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths. | #461 | sonnet | feat/federation-m2-security-review | M2-10 | 8K | Apply M1 two-round pattern. Reviewer should explicitly attempt enrollment-token replay, OID-spoofing CSR, and key leak in error messages. | -| FED-M2-12 | not-started | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred. | #461 | haiku | feat/federation-m2-docs | M2-11 | 4K | Adds CA bootstrap section to SETUP.md with `docker compose --profile federated up step-ca` example. | -| FED-M2-13 | not-started | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row. | #461 | sonnet | feat/federation-m2-close | M2-12 | 3K | Same close pattern as M1-12; queue-guard before merge; tea release-create with notes including deploy-stream PRs. | +| id | status | description | issue | agent | branch | depends_on | estimate | notes | +| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FED-M2-01 | needs-qa | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests. | #461 | sonnet | feat/federation-m2-schema | — | 5K | PR #486 open. First review NEEDS CHANGES (missing DESC indexes + reserved cols). Remediation subagent `a673dd9355dc26f82` in flight in worktree `agent-a4404ac1`. | +| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script. | #461 | sonnet | feat/federation-m2-stepca | DEPLOY-02 | 4K | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file. | +| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes. | #461 | sonnet | feat/federation-m2-scope-schema | — | 4K | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement. | +| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container. | #461 | sonnet | feat/federation-m2-ca-service | M2-02 | 6K | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. **Acceptance**: must (a) wire `federation.tpl` template into `mosaic-fed` provisioner config and (b) include a unit/integration test asserting issued certs contain BOTH OIDs — fails-loud guard against silent OID stripping (carry-forward from M2-02 review). | +| FED-M2-05 | not-started | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl). | #461 | sonnet | feat/federation-m2-key-sealing | M2-01 | 5K | Separate from M2-06 to keep crypto seam isolated; reviewer focus is sealing only. | +| FED-M2-06 | not-started | `grants.service.ts`: CRUD + status transitions (`pending` → `active` → `revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones. | #461 | sonnet | feat/federation-m2-grants-service | M2-03, M2-05 | 6K | Business logic only — CSR + cert work delegated to M2-04. Revocation handler is M6. | +| FED-M2-07 | not-started | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending` → `active`; emits enrollment audit (table-only write, M4 tightens). | #461 | sonnet | feat/federation-m2-enrollment | M2-04, M2-06 | 6K | Tokens single-use with 410 on replay; tokens TTL'd at 15min; rate-limited at request layer (M4 introduces guard, M2 uses simple lock). | +| FED-M2-08 | not-started | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option. | #461 | sonnet | feat/federation-m2-cli | M2-06, M2-07 | 7K | `peer add ` is the client-side flow; resolves enrollment URL → CSR → store sealed key + cert. | +| FED-M2-09 | not-started | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`. | #461 | sonnet | feat/federation-m2-integration | M2-08 | 8K | Tests #4 (cert OID match) + #6 (two-gateway peer-add) handled separately by M2-10 (E2E). | +| FED-M2-10 | not-started | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461 | sonnet | feat/federation-m2-e2e | M2-08, DEPLOY-04 | 6K | Falls back to local docker-compose-two-gateways if remote test hosts not yet available. Documents both paths. | +| FED-M2-11 | not-started | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths. | #461 | sonnet | feat/federation-m2-security-review | M2-10 | 8K | Apply M1 two-round pattern. Reviewer should explicitly attempt enrollment-token replay, OID-spoofing CSR, and key leak in error messages. | +| FED-M2-12 | not-started | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred. | #461 | haiku | feat/federation-m2-docs | M2-11 | 4K | Adds CA bootstrap section to SETUP.md with `docker compose --profile federated up step-ca` example. | +| FED-M2-13 | not-started | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row. | #461 | sonnet | feat/federation-m2-close | M2-12 | 3K | Same close pattern as M1-12; queue-guard before merge; tea release-create with notes including deploy-stream PRs. | **M2 code workstream estimate:** ~72K tokens (vs MILESTONES.md 30K — same over-budget pattern as M1, where per-task breakdown including tests/review/docs catches the real cost). diff --git a/infra/step-ca/dev-password.example b/infra/step-ca/dev-password.example new file mode 100644 index 0000000..be30142 --- /dev/null +++ b/infra/step-ca/dev-password.example @@ -0,0 +1 @@ +dev-only-step-ca-password-do-not-use-in-production diff --git a/infra/step-ca/init.sh b/infra/step-ca/init.sh new file mode 100755 index 0000000..c363ffc --- /dev/null +++ b/infra/step-ca/init.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# infra/step-ca/init.sh +# +# Idempotent first-boot initialiser for the Mosaic Federation CA. +# +# On the first run (no /home/step/config/ca.json present) this script: +# 1. Initialises Step-CA with a JWK provisioner named "mosaic-fed". +# 2. Writes the CA configuration to the persistent volume at /home/step. +# +# On subsequent runs (config already exists) this script skips init and +# starts the CA directly. +# +# The provisioner name "mosaic-fed" is consumed by: +# apps/gateway/src/federation/ca.service.ts (added in M2-04) +# +# Password source: +# Dev: mounted from ./infra/step-ca/dev-password via bind mount. +# Prod: mounted from a Docker secret at /run/secrets/ca_password. +# +# OID template: +# infra/step-ca/templates/federation.tpl is copied into the CA config +# directory so the JWK provisioner can reference it. The template +# skeleton is wired in M2-04 when the CA service lands the SAN-bearing +# CSR work. + +set -e + +CA_CONFIG="/home/step/config/ca.json" +PASSWORD_FILE="/run/secrets/ca_password" + +if [ ! -f "${CA_CONFIG}" ]; then + echo "[step-ca init] First boot detected — initialising Mosaic Federation CA..." + + step ca init \ + --name "Mosaic Federation CA" \ + --dns "localhost" \ + --dns "step-ca" \ + --address ":9000" \ + --provisioner "mosaic-fed" \ + --password-file "${PASSWORD_FILE}" \ + --provisioner-password-file "${PASSWORD_FILE}" \ + --no-db + + echo "[step-ca init] CA initialised." + + # Copy the X.509 template into the Step-CA config directory so the + # provisioner can reference it in M2-04. + if [ -f "/etc/step-ca-templates/federation.tpl" ]; then + mkdir -p /home/step/templates + cp /etc/step-ca-templates/federation.tpl /home/step/templates/federation.tpl + echo "[step-ca init] Federation X.509 template copied to /home/step/templates/." + fi + + echo "[step-ca init] Startup complete." +else + echo "[step-ca init] Config already exists — skipping init." +fi + +echo "[step-ca init] Starting Step-CA on :9000..." +exec step-ca /home/step/config/ca.json --password-file "${PASSWORD_FILE}" diff --git a/infra/step-ca/templates/federation.tpl b/infra/step-ca/templates/federation.tpl new file mode 100644 index 0000000..0e2f132 --- /dev/null +++ b/infra/step-ca/templates/federation.tpl @@ -0,0 +1,48 @@ +{ + "subject": {{ toJson .Subject }}, + "sans": {{ toJson .SANs }}, + + {{- /* + Mosaic Federation X.509 Certificate Template + ============================================ + This template is used by the "mosaic-fed" JWK provisioner to sign + federation client certificates. + + Custom OID extensions (per PRD §6): + 1.3.6.1.4.1.99999.1 — mosaic.federation.grantId (UUID string) + 1.3.6.1.4.1.99999.2 — mosaic.federation.subjectUserId (UUID string) + + TODO (M2-04): Wire actual OID extensions below once the CA service + (apps/gateway/src/federation/ca.service.ts) lands the SAN-bearing CSR + work and the template can be exercised end-to-end. + + Step-CA template reference: + https://smallstep.com/docs/step-ca/templates + + Expected final shape of the extensions block (placeholder — not yet + activated): + + "extensions": [ + { + "id": "1.3.6.1.4.1.99999.1", + "critical": false, + "value": {{ toJson (first .Token.mosaic_grant_id) }} + }, + { + "id": "1.3.6.1.4.1.99999.2", + "critical": false, + "value": {{ toJson (first .Token.mosaic_subject_user_id) }} + } + ], + + The provisioner must pass these values in the ACME/JWK token payload + (token claims `mosaic_grant_id` and `mosaic_subject_user_id`) when + submitting the CSR. M2-04 owns that work. + */ -}} + + "keyUsage": ["digitalSignature"], + "extKeyUsage": ["clientAuth"], + "basicConstraints": { + "isCA": false + } +}