Compare commits

...

5 Commits

Author SHA1 Message Date
Jarvis
d1925149c6 feat(federation): add Step-CA sidecar to federated compose stack [FED-M2-02]
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
Adds a profile-gated `step-ca` service to `docker-compose.federated.yml`
so the federated tier has its own internal CA. No gateway code consumes
the CA yet — that lands in M2-04 (ca.service.ts).

- docker-compose.federated.yml: new `step-ca` service using image
  `smallstep/step-ca:0.27.4` (pinned stable; `latest` forbidden by
  Mosaic image policy), named volume `step_ca_data`, port 9000,
  `[federated]` profile gate, healthcheck with 30s start_period
- infra/step-ca/init.sh: idempotent first-boot init; runs `step ca init`
  with JWK provisioner `mosaic-fed` if /home/step/config/ca.json absent;
  otherwise starts CA directly
- infra/step-ca/dev-password.example: sample dev password (real file
  is gitignored)
- infra/step-ca/templates/federation.tpl: X.509 template skeleton for
  custom OID SAN extensions (grantId 1.3.6.1.4.1.99999.1,
  subjectUserId 1.3.6.1.4.1.99999.2); TODO comment links M2-04 as the
  landing point
- .gitignore: ignores infra/step-ca/dev-password (real password)

Refs #461

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 21:00:05 -05:00
Jarvis
508f57118d feat(federation): scope JSON schema and validator [FED-M2-03]
Pure Zod-based validator for federation grant scope objects per PRD §8.1.
Independent of CA, DB, and NestJS wiring — those land in M2-06.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 20:58:40 -05:00
4dbd429203 feat(deploy): portainer stack template for federation test instances [DEPLOY-02] (#485)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
2026-04-22 01:34:44 +00:00
b985d7bfe2 docs(federation): M2 mission planning — TASKS decomposition + manifest update (#483)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline failed
2026-04-22 01:24:00 +00:00
45e8f02c91 feat(mosaic-portainer): PORTAINER_INSECURE flag for self-signed TLS (#484)
Some checks failed
ci/woodpecker/push/publish Pipeline failed
ci/woodpecker/push/ci Pipeline failed
2026-04-22 01:21:54 +00:00
19 changed files with 841 additions and 16 deletions

3
.gitignore vendored
View File

@@ -9,3 +9,6 @@ coverage
*.tsbuildinfo
.pnpm-store
docs/reports/
# Step-CA dev password — real file is gitignored; commit only the .example
infra/step-ca/dev-password

View File

@@ -0,0 +1,187 @@
/**
* Unit tests for FederationScopeSchema and parseFederationScope.
*
* Coverage:
* - Valid: minimal scope
* - Valid: full PRD §8.1 example
* - Valid: resources + excluded_resources (no overlap)
* - Invalid: empty resources
* - Invalid: unknown resource value
* - Invalid: resources / excluded_resources intersection
* - Invalid: filter key not in resources
* - Invalid: max_rows_per_query = 0
* - Invalid: max_rows_per_query = 10001
* - Invalid: not an object / null
* - Defaults: include_personal defaults to true; excluded_resources defaults to []
* - Sentinel: console.warn fires for sensitive resources
*/
import { describe, it, expect, vi, afterEach } from 'vitest';
import {
parseFederationScope,
FederationScopeError,
FederationScopeSchema,
} from './scope-schema.js';
afterEach(() => {
vi.restoreAllMocks();
});
describe('parseFederationScope — valid inputs', () => {
it('accepts a minimal scope (resources + max_rows_per_query only)', () => {
const scope = parseFederationScope({
resources: ['tasks'],
max_rows_per_query: 100,
});
expect(scope.resources).toEqual(['tasks']);
expect(scope.max_rows_per_query).toBe(100);
expect(scope.excluded_resources).toEqual([]);
expect(scope.filters).toBeUndefined();
});
it('accepts the full PRD §8.1 example', () => {
const scope = parseFederationScope({
resources: ['tasks', 'notes', 'memory'],
filters: {
tasks: { include_teams: ['team_uuid_1', 'team_uuid_2'], include_personal: true },
notes: { include_personal: true, include_teams: [] },
memory: { include_personal: true },
},
excluded_resources: ['credentials', 'api_keys'],
max_rows_per_query: 500,
});
expect(scope.resources).toEqual(['tasks', 'notes', 'memory']);
expect(scope.excluded_resources).toEqual(['credentials', 'api_keys']);
expect(scope.filters?.tasks?.include_teams).toEqual(['team_uuid_1', 'team_uuid_2']);
expect(scope.max_rows_per_query).toBe(500);
});
it('accepts a scope with excluded_resources and no filter overlap', () => {
const scope = parseFederationScope({
resources: ['tasks', 'notes'],
excluded_resources: ['memory'],
max_rows_per_query: 250,
});
expect(scope.resources).toEqual(['tasks', 'notes']);
expect(scope.excluded_resources).toEqual(['memory']);
});
});
describe('parseFederationScope — defaults', () => {
it('defaults excluded_resources to []', () => {
const scope = parseFederationScope({ resources: ['tasks'], max_rows_per_query: 1 });
expect(scope.excluded_resources).toEqual([]);
});
it('defaults include_personal to true when filter is provided without it', () => {
const scope = parseFederationScope({
resources: ['tasks'],
filters: { tasks: { include_teams: ['t1'] } },
max_rows_per_query: 10,
});
expect(scope.filters?.tasks?.include_personal).toBe(true);
});
});
describe('parseFederationScope — invalid inputs', () => {
it('throws FederationScopeError for empty resources array', () => {
expect(() => parseFederationScope({ resources: [], max_rows_per_query: 100 })).toThrow(
FederationScopeError,
);
});
it('throws for unknown resource value in resources', () => {
expect(() =>
parseFederationScope({ resources: ['unknown_resource'], max_rows_per_query: 100 }),
).toThrow(FederationScopeError);
});
it('throws when resources and excluded_resources intersect', () => {
expect(() =>
parseFederationScope({
resources: ['tasks', 'memory'],
excluded_resources: ['memory'],
max_rows_per_query: 100,
}),
).toThrow(FederationScopeError);
});
it('throws when filters references a resource not in resources', () => {
expect(() =>
parseFederationScope({
resources: ['tasks'],
filters: { notes: { include_personal: true } },
max_rows_per_query: 100,
}),
).toThrow(FederationScopeError);
});
it('throws for max_rows_per_query = 0', () => {
expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 0 })).toThrow(
FederationScopeError,
);
});
it('throws for max_rows_per_query = 10001', () => {
expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 10001 })).toThrow(
FederationScopeError,
);
});
it('throws for null input', () => {
expect(() => parseFederationScope(null)).toThrow(FederationScopeError);
});
it('throws for non-object input (string)', () => {
expect(() => parseFederationScope('not-an-object')).toThrow(FederationScopeError);
});
});
describe('parseFederationScope — sentinel warning', () => {
it('emits console.warn when resources includes "credentials"', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({
resources: ['tasks', 'credentials'],
max_rows_per_query: 100,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining(
'[FederationScope] WARNING: scope grants sensitive resource "credentials"',
),
);
});
it('emits console.warn when resources includes "api_keys"', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({
resources: ['tasks', 'api_keys'],
max_rows_per_query: 100,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining(
'[FederationScope] WARNING: scope grants sensitive resource "api_keys"',
),
);
});
it('does NOT emit console.warn for non-sensitive resources', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({ resources: ['tasks', 'notes', 'memory'], max_rows_per_query: 100 });
expect(warnSpy).not.toHaveBeenCalled();
});
});
describe('FederationScopeSchema — boundary values', () => {
it('accepts max_rows_per_query = 1 (lower bound)', () => {
const result = FederationScopeSchema.safeParse({ resources: ['tasks'], max_rows_per_query: 1 });
expect(result.success).toBe(true);
});
it('accepts max_rows_per_query = 10000 (upper bound)', () => {
const result = FederationScopeSchema.safeParse({
resources: ['tasks'],
max_rows_per_query: 10000,
});
expect(result.success).toBe(true);
});
});

View File

@@ -0,0 +1,147 @@
/**
* Federation grant scope schema and validator.
*
* Source of truth: docs/federation/PRD.md §8.1
*
* This module is intentionally pure — no DB, no NestJS, no CA wiring.
* It is reusable from grant CRUD (M2-06) and scope enforcement (M3+).
*/
import { z } from 'zod';
// ---------------------------------------------------------------------------
// Allowlist of federation resources (canonical — M3+ will extend this list)
// ---------------------------------------------------------------------------
export const FEDERATION_RESOURCE_VALUES = [
'tasks',
'notes',
'memory',
'credentials',
'api_keys',
] as const;
export type FederationResource = (typeof FEDERATION_RESOURCE_VALUES)[number];
/**
* Sensitive resources require explicit admin approval (PRD §8.4).
* The parser warns when these appear in `resources`; M2-06 grant CRUD
* will add a hard gate on top of this warning.
*/
const SENSITIVE_RESOURCES: ReadonlySet<FederationResource> = new Set(['credentials', 'api_keys']);
// ---------------------------------------------------------------------------
// Sub-schemas
// ---------------------------------------------------------------------------
const ResourceArraySchema = z
.array(z.enum(FEDERATION_RESOURCE_VALUES))
.nonempty({ message: 'resources must contain at least one value' })
.refine((arr) => new Set(arr).size === arr.length, {
message: 'resources must not contain duplicate values',
});
const ResourceFilterSchema = z.object({
include_teams: z.array(z.string()).optional(),
include_personal: z.boolean().default(true),
});
// ---------------------------------------------------------------------------
// Top-level schema
// ---------------------------------------------------------------------------
export const FederationScopeSchema = z
.object({
resources: ResourceArraySchema,
excluded_resources: z
.array(z.enum(FEDERATION_RESOURCE_VALUES))
.default([])
.refine((arr) => new Set(arr).size === arr.length, {
message: 'excluded_resources must not contain duplicate values',
}),
filters: z.record(z.string(), ResourceFilterSchema).optional(),
max_rows_per_query: z
.number()
.int({ message: 'max_rows_per_query must be an integer' })
.min(1, { message: 'max_rows_per_query must be at least 1' })
.max(10000, { message: 'max_rows_per_query must be at most 10000' }),
})
.superRefine((data, ctx) => {
const resourceSet = new Set(data.resources);
// Intersection guard: a resource cannot be both granted and excluded
for (const r of data.excluded_resources) {
if (resourceSet.has(r)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Resource "${r}" appears in both resources and excluded_resources`,
path: ['excluded_resources'],
});
}
}
// Filter keys must be a subset of resources
if (data.filters) {
for (const key of Object.keys(data.filters)) {
if (!resourceSet.has(key as FederationResource)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `filters key "${key}" references a resource not present in resources`,
path: ['filters', key],
});
}
}
}
});
export type FederationScope = z.infer<typeof FederationScopeSchema>;
// ---------------------------------------------------------------------------
// Error class
// ---------------------------------------------------------------------------
export class FederationScopeError extends Error {
constructor(message: string) {
super(message);
this.name = 'FederationScopeError';
}
}
// ---------------------------------------------------------------------------
// Typed parser
// ---------------------------------------------------------------------------
/**
* Parse and validate an unknown value as a FederationScope.
*
* Throws `FederationScopeError` with aggregated Zod issues on failure.
*
* Emits `console.warn` when sensitive resources (`credentials`, `api_keys`)
* are present in `resources` — per PRD §8.4, these require explicit admin
* approval. M2-06 grant CRUD will add a hard gate on top of this warning.
*/
export function parseFederationScope(input: unknown): FederationScope {
const result = FederationScopeSchema.safeParse(input);
if (!result.success) {
const issues = result.error.issues
.map((e) => ` - [${e.path.join('.') || 'root'}] ${e.message}`)
.join('\n');
throw new FederationScopeError(`Invalid federation scope:\n${issues}`);
}
const scope = result.data;
// Sentinel warning for sensitive resources (PRD §8.4)
for (const resource of scope.resources) {
if (SENSITIVE_RESOURCES.has(resource)) {
console.warn(
`[FederationScope] WARNING: scope grants sensitive resource "${resource}". Per PRD §8.4 this requires explicit admin approval and is logged.`,
);
}
}
return scope;
}

View File

@@ -0,0 +1,70 @@
# deploy/portainer/
Portainer stack templates for Mosaic Stack deployments.
## Files
| File | Purpose |
| -------------------------- | -------------------------------------------------------------------------------------------------------------- |
| `federated-test.stack.yml` | Docker Swarm stack for federation end-to-end test instances (`mos-test-1.woltje.com`, `mos-test-2.woltje.com`) |
---
## federated-test.stack.yml
A self-contained Swarm stack that boots a federated-tier Mosaic gateway with co-located Postgres 17 (pgvector) and Valkey 8. This is a **test template** — production deployments will use a separate template with stricter resource limits and Docker secrets.
### Deploy via Portainer UI
1. Log into Portainer.
2. Navigate to **Stacks → Add stack**.
3. Set a stack name matching `STACK_NAME` below (e.g. `mos-test-1`).
4. Choose **Web editor** and paste the contents of `federated-test.stack.yml`.
5. Scroll to **Environment variables** and add each variable listed below.
6. Click **Deploy the stack**.
### Required environment variables
| Variable | Example | Notes |
| -------------------- | --------------------------------------- | -------------------------------------------------------- |
| `STACK_NAME` | `mos-test-1` | Unique per stack — used in Traefik router/service names. |
| `HOST_FQDN` | `mos-test-1.woltje.com` | Fully-qualified hostname served by this stack. |
| `POSTGRES_PASSWORD` | _(generate randomly)_ | Database password. Do **not** reuse between stacks. |
| `BETTER_AUTH_SECRET` | _(generate: `openssl rand -base64 32`)_ | BetterAuth session signing key. |
| `BETTER_AUTH_URL` | `https://mos-test-1.woltje.com` | Public base URL of the gateway. |
Optional variables (uncomment in the YAML or set in Portainer):
| Variable | Notes |
| ----------------------------- | ---------------------------------------------------------- |
| `ANTHROPIC_API_KEY` | Enable Claude models. |
| `OPENAI_API_KEY` | Enable OpenAI models. |
| `OTEL_EXPORTER_OTLP_ENDPOINT` | Forward traces to a collector (e.g. `http://jaeger:4318`). |
### Required external resources
Before deploying, ensure the following exist on the Swarm:
1. **`traefik-public` overlay network** — shared network Traefik uses to route traffic to stacks.
```bash
docker network create --driver overlay --attachable traefik-public
```
2. **`letsencrypt` cert resolver** — configured in the Traefik Swarm stack. The stack template references `tls.certresolver=letsencrypt`; the name must match your Traefik config.
3. **DNS A record** — `${HOST_FQDN}` must resolve to the Swarm ingress IP (or a Cloudflare-proxied address pointing there).
### Deployed instances
| Stack name | HOST_FQDN | Purpose |
| ------------ | ----------------------- | ---------------------------------- |
| `mos-test-1` | `mos-test-1.woltje.com` | DEPLOY-03 — first federation peer |
| `mos-test-2` | `mos-test-2.woltje.com` | DEPLOY-04 — second federation peer |
### Image
The gateway image is pinned by digest to `fed-v0.1.0-m1` (verified in DEPLOY-01). Update the digest in the YAML when promoting a new build — never use `:latest` or a mutable tag in Swarm.
### Notes
- This template boots a **vanilla M1-baseline gateway** in federated tier. Federation grants (Step-CA, mTLS) are M2+ scope and not included here.
- Each stack gets its own Postgres volume (`postgres-data`) and Valkey volume (`valkey-data`) scoped to the stack name by Swarm.
- `depends_on` is honoured by Compose but ignored by Swarm — healthchecks on Postgres and Valkey ensure the gateway retries until they are ready.

View File

@@ -0,0 +1,147 @@
# deploy/portainer/federated-test.stack.yml
#
# Portainer / Docker Swarm stack template — federated-tier test instance
#
# PURPOSE
# Deploys a single federated-tier Mosaic gateway with co-located Postgres
# (pgvector) and Valkey for end-to-end federation testing. Intended for
# mos-test-1.woltje.com and mos-test-2.woltje.com (DEPLOY-03/04).
#
# REQUIRED ENV VARS (set per-stack in Portainer → Stacks → Environment variables)
# STACK_NAME Unique name for Traefik router/service labels.
# Examples: mos-test-1, mos-test-2
# HOST_FQDN Fully-qualified domain name served by this stack.
# Examples: mos-test-1.woltje.com, mos-test-2.woltje.com
# POSTGRES_PASSWORD Database password — set per stack; do NOT commit a default.
# BETTER_AUTH_SECRET Random 32-char string for BetterAuth session signing.
# Generate: openssl rand -base64 32
# BETTER_AUTH_URL Public gateway base URL, e.g. https://mos-test-1.woltje.com
#
# OPTIONAL ENV VARS (uncomment and set in Portainer to enable features)
# ANTHROPIC_API_KEY sk-ant-...
# OPENAI_API_KEY sk-...
# OTEL_EXPORTER_OTLP_ENDPOINT http://<collector>:4318
# OTEL_SERVICE_NAME (default: mosaic-gateway)
#
# REQUIRED EXTERNAL RESOURCES
# traefik-public Docker overlay network — must exist before deploying.
# Create: docker network create --driver overlay --attachable traefik-public
# letsencrypt Traefik cert resolver configured on the Swarm manager.
# DNS A record ${HOST_FQDN} → Swarm ingress IP (or Cloudflare proxy).
#
# IMAGE
# Pinned to digest fed-v0.1.0-m1 (DEPLOY-01 verified).
# Update digest here when promoting a new build.
#
# NOTE: This is a TEST template — production deployments use a separate
# parameterised template with stricter resource limits and secrets.
version: '3.9'
services:
gateway:
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec
# Tag for human reference: fed-v0.1.0-m1
environment:
# ── Tier ───────────────────────────────────────────────────────────────
MOSAIC_TIER: federated
# ── Database ───────────────────────────────────────────────────────────
DATABASE_URL: postgres://gateway:${POSTGRES_PASSWORD}@postgres:5432/mosaic
# ── Queue ──────────────────────────────────────────────────────────────
VALKEY_URL: redis://valkey:6379
# ── Gateway ────────────────────────────────────────────────────────────
GATEWAY_PORT: '3000'
GATEWAY_CORS_ORIGIN: https://${HOST_FQDN}
# ── Auth ───────────────────────────────────────────────────────────────
BETTER_AUTH_SECRET: ${BETTER_AUTH_SECRET}
BETTER_AUTH_URL: https://${HOST_FQDN}
# ── Observability ──────────────────────────────────────────────────────
OTEL_SERVICE_NAME: ${STACK_NAME:-mosaic-gateway}
# OTEL_EXPORTER_OTLP_ENDPOINT: http://<collector>:4318
# ── AI Providers (uncomment to enable) ─────────────────────────────────
# ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
# OPENAI_API_KEY: ${OPENAI_API_KEY}
networks:
- federated-test
- traefik-public
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
labels:
- 'traefik.enable=true'
- 'traefik.docker.network=traefik-public'
- 'traefik.http.routers.${STACK_NAME}.rule=Host(`${HOST_FQDN}`)'
- 'traefik.http.routers.${STACK_NAME}.entrypoints=websecure'
- 'traefik.http.routers.${STACK_NAME}.tls=true'
- 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt'
- 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000'
healthcheck:
test: ['CMD', 'wget', '-qO-', 'http://localhost:3000/health']
interval: 30s
timeout: 5s
retries: 3
start_period: 20s
depends_on:
- postgres
- valkey
postgres:
image: pgvector/pgvector:pg17
environment:
POSTGRES_USER: gateway
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: mosaic
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- federated-test
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U gateway']
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
valkey:
image: valkey/valkey:8-alpine
volumes:
- valkey-data:/data
networks:
- federated-test
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
healthcheck:
test: ['CMD', 'valkey-cli', 'ping']
interval: 10s
timeout: 3s
retries: 5
start_period: 5s
volumes:
postgres-data:
valkey-data:
networks:
federated-test:
driver: overlay
traefik-public:
external: true

View File

@@ -55,6 +55,63 @@ services:
timeout: 3s
retries: 5
# ---------------------------------------------------------------------------
# Step-CA — Mosaic Federation internal certificate authority
#
# Image: pinned to 0.27.4 (latest stable as of late 2025).
# `latest` is forbidden per Mosaic image policy (immutable tag required for
# reproducible deployments and digest-first promotion in CI).
#
# Profile: `federated` — this service must not start in non-federated dev.
#
# Password:
# Dev: bind-mount ./infra/step-ca/dev-password (gitignored; copy from
# ./infra/step-ca/dev-password.example and customise locally).
# Prod: replace the bind-mount with a Docker secret:
# secrets:
# ca_password:
# external: true
# and reference it as `/run/secrets/ca_password` (same path the
# init script already uses).
#
# Provisioner: "mosaic-fed" (consumed by apps/gateway/src/federation/ca.service.ts)
# ---------------------------------------------------------------------------
step-ca:
image: smallstep/step-ca:0.27.4
profiles: [federated]
ports:
- '${STEP_CA_HOST_PORT:-9000}:9000'
volumes:
- step_ca_data:/home/step
# init script — executed as the container entrypoint
- ./infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
# X.509 template skeleton (wired in M2-04)
- ./infra/step-ca/templates:/etc/step-ca-templates:ro
# Dev password file — GITIGNORED; copy from dev-password.example
# In production, replace this with a Docker secret (see comment above).
- ./infra/step-ca/dev-password:/run/secrets/ca_password:ro
entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
healthcheck:
# The healthcheck requires the root cert to exist, which is only true
# after init.sh has completed on first boot. start_period gives init
# time to finish before Docker starts counting retries.
test:
[
'CMD',
'step',
'ca',
'health',
'--ca-url',
'https://localhost:9000',
'--root',
'/home/step/certs/root_ca.crt',
]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes:
pg_federated_data:
valkey_federated_data:
step_ca_data:

View File

@@ -7,13 +7,22 @@
**ID:** federation-v1-20260419
**Statement:** Jarvis operates across 34 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
**Phase:** M1 complete — federated tier infrastructure ready for testing
**Current Milestone:** FED-M2 (next; deferred to mission planning)
**Phase:** M2 active — Step-CA + grant schema + admin CLI; parallel test-deploy workstream stood up
**Current Milestone:** FED-M2
**Progress:** 1 / 7 milestones
**Status:** active
**Last Updated:** 2026-04-19 (M1 complete; tag `fed-v0.1.0-m1`)
**Last Updated:** 2026-04-21 (M2 decomposed; mos-test-1/-2 designated as federation E2E test hosts)
**Parent Mission:** None — new mission
## Test Infrastructure
| Host | Role | Image | Tier |
| ----------------------- | ----------------------------------- | ------------------------------------- | --------- |
| `mos-test-1.woltje.com` | Federation Server A (querying side) | `gateway:fed-v0.1.0-m1` (M1 baseline) | federated |
| `mos-test-2.woltje.com` | Federation Server B (serving side) | `gateway:fed-v0.1.0-m1` (M1 baseline) | federated |
These are TEST hosts for federation E2E (M3+). Distinct from PRD AC-12 production targets (`woltje.com``uscllc.com`). Deployment workstream tracked in `docs/federation/TASKS.md` under FED-M2-DEPLOY-\*.
## Context
Federation is the solution to what originally drove OpenBrain. The prior attempt coupled every agent session to a remote service, introduced cache/latency/opacity pain, and created a hard dependency that punished offline use. This redesign:
@@ -54,7 +63,7 @@ Key design references:
| # | ID | Name | Status | Branch | Issue | Started | Completed |
| --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
| 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 |
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | not-started | — | #461 | | — |
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | in-progress | (decomposition) | #461 | 2026-04-21 | — |
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — |
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
| 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — |
@@ -83,6 +92,10 @@ Key design references:
## Next Step
FED-M1 complete (12 PRs #470-#481, tag `fed-v0.1.0-m1`). Federated tier infrastructure is testable end-to-end: see `docs/federation/SETUP.md` and `docs/guides/migrate-tier.md`.
FED-M2 active. Decomposition landed in `docs/federation/TASKS.md` (M2-01..M2-13 code workstream + DEPLOY-01..DEPLOY-05 parallel test-deploy workstream, ~88K total). Tracking issue #482.
Begin FED-M2 (Step-CA + grant schema + admin CLI) when planning is greenlit. Issue #461 tracks scope; orchestrator decomposes M2 into per-task rows in `docs/federation/TASKS.md` at the start of M2.
Parallel execution plan:
- **CODE workstream**: M2-01 (DB migration) starts immediately — sonnet subagent on `feat/federation-m2-schema`. Then M2-02 → M2-09 sequentially with M2-04/M2-05/M2-06/M2-07 having interleaved CA/storage/grant dependencies.
- **DEPLOY workstream**: DEPLOY-01 (image verify) → DEPLOY-02 (stack template) → DEPLOY-03/04 (mos-test-1/-2 deploy) → DEPLOY-05 (TEST-INFRA.md). Gated on Portainer wrapper PR (`PORTAINER_INSECURE` flag) merging first.
- **Re-converge** at M2-10 (E2E test) once both workstreams ready.

View File

@@ -36,9 +36,51 @@ Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No
---
## Pre-M2 — Test deployment infrastructure (FED-M2-DEPLOY)
Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.com` and `mos-test-2.woltje.com` running the M1 release (`gateway:fed-v0.1.0-m1`). This is the test bed for M2 enrollment work and the M3 federation E2E harness. No federation logic exercised yet — pure infrastructure validation.
> **Why now:** M2 enrollment requires a real second gateway to test peer-add flows; standing the test hosts up before M2 code lands gives both code and deployment streams a fast feedback loop.
> **Parallelizable:** This workstream runs in parallel with the M2 code workstream (M2-01 → M2-13). They re-converge at M2-10 (E2E test).
> **Tracking issue:** #482.
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
| ---------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ------------------------------------- | ------------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| FED-M2-DEPLOY-01 | not-started | Verify `gateway:fed-v0.1.0-m1` image was published by `.woodpecker/publish.yml` on tag push; if not, investigate and remediate. Document image URI in deployment artifact. | #482 | sonnet | feat/federation-deploy-image-verify | — | 2K | publish.yml registers `gateway:$CI_COMMIT_TAG` destination; should already exist at `git.mosaicstack.dev/mosaicstack/stack/gateway:fed-v0.1.0-m1`. |
| FED-M2-DEPLOY-02 | not-started | Author Portainer git-stack compose file `deploy/portainer/federated-test.stack.yml` (gateway + PG-pgvector + Valkey, env-driven). Use immutable tag, not `latest`. | #482 | sonnet | feat/federation-deploy-stack-template | DEPLOY-01 | 5K | Stack must be parameterizable via env (`STACK_DOMAIN`, `BETTERAUTH_SECRET`, etc.) so one template serves both hosts. |
| FED-M2-DEPLOY-03 | not-started | Deploy stack to mos-test-1.woltje.com via `~/.config/mosaic/tools/portainer/`. Verify M1 acceptance: federated-tier boot succeeds; `mosaic gateway doctor --json` returns green; pgvector `vector(3)` round-trip works. | #482 | sonnet | feat/federation-deploy-test-1 | DEPLOY-02 | 3K | Requires `PORTAINER_URL` + `PORTAINER_API_KEY` env (vault-loaded). DNS for mos-test-1 must resolve before deploy. |
| FED-M2-DEPLOY-04 | not-started | Deploy stack to mos-test-2.woltje.com via Portainer wrapper. Same M1 acceptance probes as DEPLOY-03. | #482 | sonnet | feat/federation-deploy-test-2 | DEPLOY-02 | 3K | Independent of DEPLOY-03 (parallelizable). Same secret material with distinct domain + secrets per host. |
| FED-M2-DEPLOY-05 | not-started | Document deployment in `docs/federation/TEST-INFRA.md`: hosts, image tags, secrets sourcing, redeploy procedure, teardown. Update MISSION-MANIFEST with deployment status. | #482 | haiku | feat/federation-deploy-docs | DEPLOY-03,04 | 3K | Operator-facing doc; mentions but does not duplicate `tools/portainer/README.md`. |
**Deploy workstream estimate:** ~16K tokens
---
## Milestone 2 — Step-CA + grant schema + admin CLI (FED-M2)
_Deferred to mission planning when M1 is complete. Issue #461 tracks scope._
Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3).
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
| FED-M2-01 | not-started | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests. | #461 | sonnet | feat/federation-m2-schema | — | 5K | `federation_audit_log` is created but not yet written to (audit logic is M4). Reserve `query_hash`, `outcome`, `bytes_out` columns. |
| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script. | #461 | sonnet | feat/federation-m2-stepca | DEPLOY-02 | 4K | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file. |
| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes. | #461 | sonnet | feat/federation-m2-scope-schema | — | 4K | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement. |
| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container. | #461 | sonnet | feat/federation-m2-ca-service | M2-02 | 6K | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. |
| FED-M2-05 | not-started | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl). | #461 | sonnet | feat/federation-m2-key-sealing | M2-01 | 5K | Separate from M2-06 to keep crypto seam isolated; reviewer focus is sealing only. |
| FED-M2-06 | not-started | `grants.service.ts`: CRUD + status transitions (`pending``active``revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones. | #461 | sonnet | feat/federation-m2-grants-service | M2-03, M2-05 | 6K | Business logic only — CSR + cert work delegated to M2-04. Revocation handler is M6. |
| FED-M2-07 | not-started | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending``active`; emits enrollment audit (table-only write, M4 tightens). | #461 | sonnet | feat/federation-m2-enrollment | M2-04, M2-06 | 6K | Tokens single-use with 410 on replay; tokens TTL'd at 15min; rate-limited at request layer (M4 introduces guard, M2 uses simple lock). |
| FED-M2-08 | not-started | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option. | #461 | sonnet | feat/federation-m2-cli | M2-06, M2-07 | 7K | `peer add <enrollment-url>` is the client-side flow; resolves enrollment URL → CSR → store sealed key + cert. |
| FED-M2-09 | not-started | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`. | #461 | sonnet | feat/federation-m2-integration | M2-08 | 8K | Tests #4 (cert OID match) + #6 (two-gateway peer-add) handled separately by M2-10 (E2E). |
| FED-M2-10 | not-started | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461 | sonnet | feat/federation-m2-e2e | M2-08, DEPLOY-04 | 6K | Falls back to local docker-compose-two-gateways if remote test hosts not yet available. Documents both paths. |
| FED-M2-11 | not-started | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths. | #461 | sonnet | feat/federation-m2-security-review | M2-10 | 8K | Apply M1 two-round pattern. Reviewer should explicitly attempt enrollment-token replay, OID-spoofing CSR, and key leak in error messages. |
| FED-M2-12 | not-started | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred. | #461 | haiku | feat/federation-m2-docs | M2-11 | 4K | Adds CA bootstrap section to SETUP.md with `docker compose --profile federated up step-ca` example. |
| FED-M2-13 | not-started | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row. | #461 | sonnet | feat/federation-m2-close | M2-12 | 3K | Same close pattern as M1-12; queue-guard before merge; tea release-create with notes including deploy-stream PRs. |
**M2 code workstream estimate:** ~72K tokens (vs MILESTONES.md 30K — same over-budget pattern as M1, where per-task breakdown including tests/review/docs catches the real cost).
**Deploy + code combined:** ~88K tokens.
## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3)

View File

@@ -0,0 +1 @@
dev-only-step-ca-password-do-not-use-in-production

60
infra/step-ca/init.sh Executable file
View File

@@ -0,0 +1,60 @@
#!/bin/sh
# infra/step-ca/init.sh
#
# Idempotent first-boot initialiser for the Mosaic Federation CA.
#
# On the first run (no /home/step/config/ca.json present) this script:
# 1. Initialises Step-CA with a JWK provisioner named "mosaic-fed".
# 2. Writes the CA configuration to the persistent volume at /home/step.
#
# On subsequent runs (config already exists) this script skips init and
# starts the CA directly.
#
# The provisioner name "mosaic-fed" is consumed by:
# apps/gateway/src/federation/ca.service.ts (added in M2-04)
#
# Password source:
# Dev: mounted from ./infra/step-ca/dev-password via bind mount.
# Prod: mounted from a Docker secret at /run/secrets/ca_password.
#
# OID template:
# infra/step-ca/templates/federation.tpl is copied into the CA config
# directory so the JWK provisioner can reference it. The template
# skeleton is wired in M2-04 when the CA service lands the SAN-bearing
# CSR work.
set -e
CA_CONFIG="/home/step/config/ca.json"
PASSWORD_FILE="/run/secrets/ca_password"
if [ ! -f "${CA_CONFIG}" ]; then
echo "[step-ca init] First boot detected — initialising Mosaic Federation CA..."
step ca init \
--name "Mosaic Federation CA" \
--dns "localhost" \
--dns "step-ca" \
--address ":9000" \
--provisioner "mosaic-fed" \
--password-file "${PASSWORD_FILE}" \
--provisioner-password-file "${PASSWORD_FILE}" \
--no-db
echo "[step-ca init] CA initialised."
# Copy the X.509 template into the Step-CA config directory so the
# provisioner can reference it in M2-04.
if [ -f "/etc/step-ca-templates/federation.tpl" ]; then
mkdir -p /home/step/templates
cp /etc/step-ca-templates/federation.tpl /home/step/templates/federation.tpl
echo "[step-ca init] Federation X.509 template copied to /home/step/templates/."
fi
echo "[step-ca init] Startup complete."
else
echo "[step-ca init] Config already exists — skipping init."
fi
echo "[step-ca init] Starting Step-CA on :9000..."
exec step-ca /home/step/config/ca.json --password-file "${PASSWORD_FILE}"

View File

@@ -0,0 +1,48 @@
{
"subject": {{ toJson .Subject }},
"sans": {{ toJson .SANs }},
{{- /*
Mosaic Federation X.509 Certificate Template
============================================
This template is used by the "mosaic-fed" JWK provisioner to sign
federation client certificates.
Custom OID extensions (per PRD §6):
1.3.6.1.4.1.99999.1 mosaic.federation.grantId (UUID string)
1.3.6.1.4.1.99999.2 mosaic.federation.subjectUserId (UUID string)
TODO (M2-04): Wire actual OID extensions below once the CA service
(apps/gateway/src/federation/ca.service.ts) lands the SAN-bearing CSR
work and the template can be exercised end-to-end.
Step-CA template reference:
https://smallstep.com/docs/step-ca/templates
Expected final shape of the extensions block (placeholder not yet
activated):
"extensions": [
{
"id": "1.3.6.1.4.1.99999.1",
"critical": false,
"value": {{ toJson (first .Token.mosaic_grant_id) }}
},
{
"id": "1.3.6.1.4.1.99999.2",
"critical": false,
"value": {{ toJson (first .Token.mosaic_subject_user_id) }}
}
],
The provisioner must pass these values in the ACME/JWK token payload
(token claims `mosaic_grant_id` and `mosaic_subject_user_id`) when
submitting the CSR. M2-04 owns that work.
*/ -}}
"keyUsage": ["digitalSignature"],
"extKeyUsage": ["clientAuth"],
"basicConstraints": {
"isCA": false
}
}

View File

@@ -13,6 +13,14 @@ export PORTAINER_URL="https://portainer.example.com:9443"
export PORTAINER_API_KEY="your-api-key-here"
```
If your Portainer instance uses a self-signed TLS certificate (e.g. internal LAN), set:
```bash
export PORTAINER_INSECURE=1
```
This passes `-k` to all curl calls, bypassing certificate verification. Do not set this against public/production instances.
You can add these to your shell profile (`~/.bashrc`, `~/.zshrc`) or use a `.env` file.
### Creating an API Key

View File

@@ -46,8 +46,14 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Fetch endpoints
response=$(curl -s -w "\n%{http_code}" \
response=$(curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/endpoints")

View File

@@ -52,8 +52,14 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Fetch stacks
response=$(curl -s -w "\n%{http_code}" \
response=$(curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/stacks")

View File

@@ -64,12 +64,18 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests
api_request() {
local method="$1"
local endpoint="$2"
curl -s -w "\n%{http_code}" -X "$method" \
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}"
}
@@ -165,7 +171,7 @@ fi
# Note: Docker API returns raw log stream, not JSON
if [[ "$FOLLOW" == "true" ]]; then
# Stream logs
curl -s -N \
curl -s "${CURL_OPTS[@]}" -N \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/endpoints/${ENDPOINT_ID}/docker/containers/${CONTAINER_ID}/logs?${params}" | \
# Docker log format has 8-byte header per line, strip it
@@ -175,7 +181,7 @@ if [[ "$FOLLOW" == "true" ]]; then
done
else
# Get logs (non-streaming)
curl -s \
curl -s "${CURL_OPTS[@]}" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/endpoints/${ENDPOINT_ID}/docker/containers/${CONTAINER_ID}/logs?${params}" | \
# Docker log format has 8-byte header per line, attempt to strip it

View File

@@ -63,13 +63,19 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests
api_request() {
local method="$1"
local endpoint="$2"
local data="${3:-}"
local args=(-s -w "\n%{http_code}" -X "$method" -H "X-API-Key: ${PORTAINER_API_KEY}")
local args=(-s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" -H "X-API-Key: ${PORTAINER_API_KEY}")
if [[ -n "$data" ]]; then
args+=(-H "Content-Type: application/json" -d "$data")

View File

@@ -54,12 +54,18 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests
api_request() {
local method="$1"
local endpoint="$2"
curl -s -w "\n%{http_code}" -X "$method" \
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}"
}

View File

@@ -57,12 +57,18 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests
api_request() {
local method="$1"
local endpoint="$2"
curl -s -w "\n%{http_code}" -X "$method" \
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}"
}

View File

@@ -54,12 +54,18 @@ fi
# Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests
api_request() {
local method="$1"
local endpoint="$2"
curl -s -w "\n%{http_code}" -X "$method" \
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}"
}