Compare commits

..

1 Commits

Author SHA1 Message Date
Jarvis
4cbb5eff8e docs(federation): S21 tracking — DEPLOY-01/02 done, IMG-FIX in flight, M2-01 in remediation
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
- TASKS.md: mark DEPLOY-01 (image verify) + DEPLOY-02 (stack template, PR #485) done
- TASKS.md: add DEPLOY-IMG-FIX row (gateway Dockerfile pnpm-deploy fix in flight)
- TASKS.md: DEPLOY-03/04 → blocked on IMG-FIX; M2-01 → needs-qa (PR #486 in remediation)
- scratchpads/mvp-20260312.md: Session 20 entry (M2 kickoff, workstream split, Portainer access)
- scratchpads/mvp-20260312.md: Session 21 entry (DEPLOY-02 merged, gateway image runtime bug
  discovered via stripped-container test, M2-01 review verdict + remediation in flight,
  process notes on branch races + worktree isolation rule)
2026-04-21 20:52:20 -05:00
9 changed files with 112 additions and 525 deletions

3
.gitignore vendored
View File

@@ -9,6 +9,3 @@ coverage
*.tsbuildinfo *.tsbuildinfo
.pnpm-store .pnpm-store
docs/reports/ docs/reports/
# Step-CA dev password — real file is gitignored; commit only the .example
infra/step-ca/dev-password

View File

@@ -1,187 +0,0 @@
/**
* Unit tests for FederationScopeSchema and parseFederationScope.
*
* Coverage:
* - Valid: minimal scope
* - Valid: full PRD §8.1 example
* - Valid: resources + excluded_resources (no overlap)
* - Invalid: empty resources
* - Invalid: unknown resource value
* - Invalid: resources / excluded_resources intersection
* - Invalid: filter key not in resources
* - Invalid: max_rows_per_query = 0
* - Invalid: max_rows_per_query = 10001
* - Invalid: not an object / null
* - Defaults: include_personal defaults to true; excluded_resources defaults to []
* - Sentinel: console.warn fires for sensitive resources
*/
import { describe, it, expect, vi, afterEach } from 'vitest';
import {
parseFederationScope,
FederationScopeError,
FederationScopeSchema,
} from './scope-schema.js';
afterEach(() => {
vi.restoreAllMocks();
});
describe('parseFederationScope — valid inputs', () => {
it('accepts a minimal scope (resources + max_rows_per_query only)', () => {
const scope = parseFederationScope({
resources: ['tasks'],
max_rows_per_query: 100,
});
expect(scope.resources).toEqual(['tasks']);
expect(scope.max_rows_per_query).toBe(100);
expect(scope.excluded_resources).toEqual([]);
expect(scope.filters).toBeUndefined();
});
it('accepts the full PRD §8.1 example', () => {
const scope = parseFederationScope({
resources: ['tasks', 'notes', 'memory'],
filters: {
tasks: { include_teams: ['team_uuid_1', 'team_uuid_2'], include_personal: true },
notes: { include_personal: true, include_teams: [] },
memory: { include_personal: true },
},
excluded_resources: ['credentials', 'api_keys'],
max_rows_per_query: 500,
});
expect(scope.resources).toEqual(['tasks', 'notes', 'memory']);
expect(scope.excluded_resources).toEqual(['credentials', 'api_keys']);
expect(scope.filters?.tasks?.include_teams).toEqual(['team_uuid_1', 'team_uuid_2']);
expect(scope.max_rows_per_query).toBe(500);
});
it('accepts a scope with excluded_resources and no filter overlap', () => {
const scope = parseFederationScope({
resources: ['tasks', 'notes'],
excluded_resources: ['memory'],
max_rows_per_query: 250,
});
expect(scope.resources).toEqual(['tasks', 'notes']);
expect(scope.excluded_resources).toEqual(['memory']);
});
});
describe('parseFederationScope — defaults', () => {
it('defaults excluded_resources to []', () => {
const scope = parseFederationScope({ resources: ['tasks'], max_rows_per_query: 1 });
expect(scope.excluded_resources).toEqual([]);
});
it('defaults include_personal to true when filter is provided without it', () => {
const scope = parseFederationScope({
resources: ['tasks'],
filters: { tasks: { include_teams: ['t1'] } },
max_rows_per_query: 10,
});
expect(scope.filters?.tasks?.include_personal).toBe(true);
});
});
describe('parseFederationScope — invalid inputs', () => {
it('throws FederationScopeError for empty resources array', () => {
expect(() => parseFederationScope({ resources: [], max_rows_per_query: 100 })).toThrow(
FederationScopeError,
);
});
it('throws for unknown resource value in resources', () => {
expect(() =>
parseFederationScope({ resources: ['unknown_resource'], max_rows_per_query: 100 }),
).toThrow(FederationScopeError);
});
it('throws when resources and excluded_resources intersect', () => {
expect(() =>
parseFederationScope({
resources: ['tasks', 'memory'],
excluded_resources: ['memory'],
max_rows_per_query: 100,
}),
).toThrow(FederationScopeError);
});
it('throws when filters references a resource not in resources', () => {
expect(() =>
parseFederationScope({
resources: ['tasks'],
filters: { notes: { include_personal: true } },
max_rows_per_query: 100,
}),
).toThrow(FederationScopeError);
});
it('throws for max_rows_per_query = 0', () => {
expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 0 })).toThrow(
FederationScopeError,
);
});
it('throws for max_rows_per_query = 10001', () => {
expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 10001 })).toThrow(
FederationScopeError,
);
});
it('throws for null input', () => {
expect(() => parseFederationScope(null)).toThrow(FederationScopeError);
});
it('throws for non-object input (string)', () => {
expect(() => parseFederationScope('not-an-object')).toThrow(FederationScopeError);
});
});
describe('parseFederationScope — sentinel warning', () => {
it('emits console.warn when resources includes "credentials"', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({
resources: ['tasks', 'credentials'],
max_rows_per_query: 100,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining(
'[FederationScope] WARNING: scope grants sensitive resource "credentials"',
),
);
});
it('emits console.warn when resources includes "api_keys"', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({
resources: ['tasks', 'api_keys'],
max_rows_per_query: 100,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining(
'[FederationScope] WARNING: scope grants sensitive resource "api_keys"',
),
);
});
it('does NOT emit console.warn for non-sensitive resources', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({ resources: ['tasks', 'notes', 'memory'], max_rows_per_query: 100 });
expect(warnSpy).not.toHaveBeenCalled();
});
});
describe('FederationScopeSchema — boundary values', () => {
it('accepts max_rows_per_query = 1 (lower bound)', () => {
const result = FederationScopeSchema.safeParse({ resources: ['tasks'], max_rows_per_query: 1 });
expect(result.success).toBe(true);
});
it('accepts max_rows_per_query = 10000 (upper bound)', () => {
const result = FederationScopeSchema.safeParse({
resources: ['tasks'],
max_rows_per_query: 10000,
});
expect(result.success).toBe(true);
});
});

View File

@@ -1,147 +0,0 @@
/**
* Federation grant scope schema and validator.
*
* Source of truth: docs/federation/PRD.md §8.1
*
* This module is intentionally pure — no DB, no NestJS, no CA wiring.
* It is reusable from grant CRUD (M2-06) and scope enforcement (M3+).
*/
import { z } from 'zod';
// ---------------------------------------------------------------------------
// Allowlist of federation resources (canonical — M3+ will extend this list)
// ---------------------------------------------------------------------------
export const FEDERATION_RESOURCE_VALUES = [
'tasks',
'notes',
'memory',
'credentials',
'api_keys',
] as const;
export type FederationResource = (typeof FEDERATION_RESOURCE_VALUES)[number];
/**
* Sensitive resources require explicit admin approval (PRD §8.4).
* The parser warns when these appear in `resources`; M2-06 grant CRUD
* will add a hard gate on top of this warning.
*/
const SENSITIVE_RESOURCES: ReadonlySet<FederationResource> = new Set(['credentials', 'api_keys']);
// ---------------------------------------------------------------------------
// Sub-schemas
// ---------------------------------------------------------------------------
const ResourceArraySchema = z
.array(z.enum(FEDERATION_RESOURCE_VALUES))
.nonempty({ message: 'resources must contain at least one value' })
.refine((arr) => new Set(arr).size === arr.length, {
message: 'resources must not contain duplicate values',
});
const ResourceFilterSchema = z.object({
include_teams: z.array(z.string()).optional(),
include_personal: z.boolean().default(true),
});
// ---------------------------------------------------------------------------
// Top-level schema
// ---------------------------------------------------------------------------
export const FederationScopeSchema = z
.object({
resources: ResourceArraySchema,
excluded_resources: z
.array(z.enum(FEDERATION_RESOURCE_VALUES))
.default([])
.refine((arr) => new Set(arr).size === arr.length, {
message: 'excluded_resources must not contain duplicate values',
}),
filters: z.record(z.string(), ResourceFilterSchema).optional(),
max_rows_per_query: z
.number()
.int({ message: 'max_rows_per_query must be an integer' })
.min(1, { message: 'max_rows_per_query must be at least 1' })
.max(10000, { message: 'max_rows_per_query must be at most 10000' }),
})
.superRefine((data, ctx) => {
const resourceSet = new Set(data.resources);
// Intersection guard: a resource cannot be both granted and excluded
for (const r of data.excluded_resources) {
if (resourceSet.has(r)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Resource "${r}" appears in both resources and excluded_resources`,
path: ['excluded_resources'],
});
}
}
// Filter keys must be a subset of resources
if (data.filters) {
for (const key of Object.keys(data.filters)) {
if (!resourceSet.has(key as FederationResource)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `filters key "${key}" references a resource not present in resources`,
path: ['filters', key],
});
}
}
}
});
export type FederationScope = z.infer<typeof FederationScopeSchema>;
// ---------------------------------------------------------------------------
// Error class
// ---------------------------------------------------------------------------
export class FederationScopeError extends Error {
constructor(message: string) {
super(message);
this.name = 'FederationScopeError';
}
}
// ---------------------------------------------------------------------------
// Typed parser
// ---------------------------------------------------------------------------
/**
* Parse and validate an unknown value as a FederationScope.
*
* Throws `FederationScopeError` with aggregated Zod issues on failure.
*
* Emits `console.warn` when sensitive resources (`credentials`, `api_keys`)
* are present in `resources` — per PRD §8.4, these require explicit admin
* approval. M2-06 grant CRUD will add a hard gate on top of this warning.
*/
export function parseFederationScope(input: unknown): FederationScope {
const result = FederationScopeSchema.safeParse(input);
if (!result.success) {
const issues = result.error.issues
.map((e) => ` - [${e.path.join('.') || 'root'}] ${e.message}`)
.join('\n');
throw new FederationScopeError(`Invalid federation scope:\n${issues}`);
}
const scope = result.data;
// Sentinel warning for sensitive resources (PRD §8.4)
for (const resource of scope.resources) {
if (SENSITIVE_RESOURCES.has(resource)) {
console.warn(
`[FederationScope] WARNING: scope grants sensitive resource "${resource}". Per PRD §8.4 this requires explicit admin approval and is logged.`,
);
}
}
return scope;
}

View File

@@ -55,63 +55,6 @@ services:
timeout: 3s timeout: 3s
retries: 5 retries: 5
# ---------------------------------------------------------------------------
# Step-CA — Mosaic Federation internal certificate authority
#
# Image: pinned to 0.27.4 (latest stable as of late 2025).
# `latest` is forbidden per Mosaic image policy (immutable tag required for
# reproducible deployments and digest-first promotion in CI).
#
# Profile: `federated` — this service must not start in non-federated dev.
#
# Password:
# Dev: bind-mount ./infra/step-ca/dev-password (gitignored; copy from
# ./infra/step-ca/dev-password.example and customise locally).
# Prod: replace the bind-mount with a Docker secret:
# secrets:
# ca_password:
# external: true
# and reference it as `/run/secrets/ca_password` (same path the
# init script already uses).
#
# Provisioner: "mosaic-fed" (consumed by apps/gateway/src/federation/ca.service.ts)
# ---------------------------------------------------------------------------
step-ca:
image: smallstep/step-ca:0.27.4
profiles: [federated]
ports:
- '${STEP_CA_HOST_PORT:-9000}:9000'
volumes:
- step_ca_data:/home/step
# init script — executed as the container entrypoint
- ./infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
# X.509 template skeleton (wired in M2-04)
- ./infra/step-ca/templates:/etc/step-ca-templates:ro
# Dev password file — GITIGNORED; copy from dev-password.example
# In production, replace this with a Docker secret (see comment above).
- ./infra/step-ca/dev-password:/run/secrets/ca_password:ro
entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
healthcheck:
# The healthcheck requires the root cert to exist, which is only true
# after init.sh has completed on first boot. start_period gives init
# time to finish before Docker starts counting retries.
test:
[
'CMD',
'step',
'ca',
'health',
'--ca-url',
'https://localhost:9000',
'--root',
'/home/step/certs/root_ca.crt',
]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes: volumes:
pg_federated_data: pg_federated_data:
valkey_federated_data: valkey_federated_data:
step_ca_data:

View File

@@ -47,11 +47,12 @@ Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.co
> **Tracking issue:** #482. > **Tracking issue:** #482.
| id | status | description | issue | agent | branch | depends_on | estimate | notes | | id | status | description | issue | agent | branch | depends_on | estimate | notes |
| ---------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ------------------------------------- | ------------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ------------------------------------- | ------------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| FED-M2-DEPLOY-01 | not-started | Verify `gateway:fed-v0.1.0-m1` image was published by `.woodpecker/publish.yml` on tag push; if not, investigate and remediate. Document image URI in deployment artifact. | #482 | sonnet | feat/federation-deploy-image-verify | — | 2K | publish.yml registers `gateway:$CI_COMMIT_TAG` destination; should already exist at `git.mosaicstack.dev/mosaicstack/stack/gateway:fed-v0.1.0-m1`. | | FED-M2-DEPLOY-01 | done | Verify `gateway:fed-v0.1.0-m1` image was published by `.woodpecker/publish.yml` on tag push; if not, investigate and remediate. Document image URI in deployment artifact. | #482 | sonnet | (verified inline, no PR) | — | 2K | Tag exists; digest `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec` captured for digest-pinned deploys. |
| FED-M2-DEPLOY-02 | not-started | Author Portainer git-stack compose file `deploy/portainer/federated-test.stack.yml` (gateway + PG-pgvector + Valkey, env-driven). Use immutable tag, not `latest`. | #482 | sonnet | feat/federation-deploy-stack-template | DEPLOY-01 | 5K | Stack must be parameterizable via env (`STACK_DOMAIN`, `BETTERAUTH_SECRET`, etc.) so one template serves both hosts. | | FED-M2-DEPLOY-02 | done | Author Portainer git-stack compose file `deploy/portainer/federated-test.stack.yml` (gateway + PG-pgvector + Valkey, env-driven). Use immutable tag, not `latest`. | #482 | sonnet | feat/federation-deploy-stack-template | DEPLOY-01 | 5K | Shipped in PR #485. Digest-pinned. Env: STACK_NAME, HOST_FQDN, POSTGRES_PASSWORD, BETTER_AUTH_SECRET, BETTER_AUTH_URL. |
| FED-M2-DEPLOY-03 | not-started | Deploy stack to mos-test-1.woltje.com via `~/.config/mosaic/tools/portainer/`. Verify M1 acceptance: federated-tier boot succeeds; `mosaic gateway doctor --json` returns green; pgvector `vector(3)` round-trip works. | #482 | sonnet | feat/federation-deploy-test-1 | DEPLOY-02 | 3K | Requires `PORTAINER_URL` + `PORTAINER_API_KEY` env (vault-loaded). DNS for mos-test-1 must resolve before deploy. | | FED-M2-DEPLOY-IMG-FIX | in-progress | Gateway image runtime broken (ERR_MODULE_NOT_FOUND for `dotenv`); Dockerfile copies `.pnpm/` store but not `apps/gateway/node_modules` symlinks. Switch to `pnpm deploy` for self-contained runtime. | #482 | sonnet | (subagent in flight) | DEPLOY-02 | 4K | Subagent `a78a9ab0ddae91fbc` in flight. Triggers Kaniko rebuild on merge; capture new digest; bump stack template in follow-up PR before redeploy. |
| FED-M2-DEPLOY-04 | not-started | Deploy stack to mos-test-2.woltje.com via Portainer wrapper. Same M1 acceptance probes as DEPLOY-03. | #482 | sonnet | feat/federation-deploy-test-2 | DEPLOY-02 | 3K | Independent of DEPLOY-03 (parallelizable). Same secret material with distinct domain + secrets per host. | | FED-M2-DEPLOY-03 | blocked | Deploy stack to mos-test-1.woltje.com via `~/.config/mosaic/tools/portainer/`. Verify M1 acceptance: federated-tier boot succeeds; `mosaic gateway doctor --json` returns green; pgvector `vector(3)` round-trip works. | #482 | sonnet | feat/federation-deploy-test-1 | IMG-FIX | 3K | Stack created on Portainer endpoint 3 (Swarm `local`), but blocked on image fix. Container fails on boot until IMG-FIX merges + redeploy. |
| FED-M2-DEPLOY-04 | blocked | Deploy stack to mos-test-2.woltje.com via Portainer wrapper. Same M1 acceptance probes as DEPLOY-03. | #482 | sonnet | feat/federation-deploy-test-2 | IMG-FIX | 3K | Same status as DEPLOY-03. Stack created; blocked on image fix. |
| FED-M2-DEPLOY-05 | not-started | Document deployment in `docs/federation/TEST-INFRA.md`: hosts, image tags, secrets sourcing, redeploy procedure, teardown. Update MISSION-MANIFEST with deployment status. | #482 | haiku | feat/federation-deploy-docs | DEPLOY-03,04 | 3K | Operator-facing doc; mentions but does not duplicate `tools/portainer/README.md`. | | FED-M2-DEPLOY-05 | not-started | Document deployment in `docs/federation/TEST-INFRA.md`: hosts, image tags, secrets sourcing, redeploy procedure, teardown. Update MISSION-MANIFEST with deployment status. | #482 | haiku | feat/federation-deploy-docs | DEPLOY-03,04 | 3K | Operator-facing doc; mentions but does not duplicate `tools/portainer/README.md`. |
**Deploy workstream estimate:** ~16K tokens **Deploy workstream estimate:** ~16K tokens
@@ -63,8 +64,8 @@ Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.co
Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3). Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3).
| id | status | description | issue | agent | branch | depends_on | estimate | notes | | id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------- | | --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| FED-M2-01 | not-started | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests. | #461 | sonnet | feat/federation-m2-schema | — | 5K | `federation_audit_log` is created but not yet written to (audit logic is M4). Reserve `query_hash`, `outcome`, `bytes_out` columns. | | FED-M2-01 | needs-qa | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests. | #461 | sonnet | feat/federation-m2-schema | — | 5K | PR #486 open. First review NEEDS CHANGES (missing DESC indexes + reserved cols). Remediation subagent `a673dd9355dc26f82` in flight in worktree `agent-a4404ac1`. |
| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script. | #461 | sonnet | feat/federation-m2-stepca | DEPLOY-02 | 4K | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file. | | FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script. | #461 | sonnet | feat/federation-m2-stepca | DEPLOY-02 | 4K | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file. |
| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes. | #461 | sonnet | feat/federation-m2-scope-schema | — | 4K | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement. | | FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes. | #461 | sonnet | feat/federation-m2-scope-schema | — | 4K | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement. |
| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container. | #461 | sonnet | feat/federation-m2-ca-service | M2-02 | 6K | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. | | FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container. | #461 | sonnet | feat/federation-m2-ca-service | M2-02 | 6K | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. |

View File

@@ -523,3 +523,92 @@ Independent security review surfaced three high-impact and four medium findings;
- #8: confirm `packages/config/dist` not git-tracked - #8: confirm `packages/config/dist` not git-tracked
**Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope. **Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope.
## Session 20 — 2026-04-21 — FED-M2 kickoff
### Decisions
- **Workstream split**: parallel CODE (M2-01..M2-13, ~72K) + DEPLOY (DEPLOY-01..DEPLOY-05, ~16K) tracks; re-converge at M2-10 E2E.
- **Test hosts**: `mos-test-1.woltje.com` (querying side / Server A), `mos-test-2.woltje.com` (serving side / Server B). Wildcard `*.woltje.com` A→174.137.97.162 already exists; Traefik wildcard cert covers both subdomains. No DNS or cert work needed pre-deploy.
- **Portainer access**: requires `PORTAINER_INSECURE=1` flag added to mosaic wrappers (self-signed cert at `https://10.1.1.43:9443`). PR pending on `feat/mosaic-portainer-tls-flag`.
- **Image policy**: deploy by digest (immutable) per Mosaic policy. `gateway:fed-v0.1.0-m1` digest = `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`.
### DEPLOY-01 — image manifest verified
- Tag `fed-v0.1.0-m1` exists at `git.mosaicstack.dev/mosaicstack/stack/gateway`
- Digest: `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`
- 9 layers, ~530MB total
- Use this digest in DEPLOY-02 stack template (do NOT reference `:fed-v0.1.0-m1` tag in stack — pin to digest)
### Registry auth note
- Gitea container registry uses Bearer token flow (`/v2/token?service=container_registry&scope=repository:<repo>:pull`)
- Username: `jarvis` (NOT `mosaicstack`); password: `gitea.mosaicstack.token` from credentials.json
- Direct `Authorization: Bearer <pat>` does NOT work — must exchange PAT for registry token first
### Active PRs
- #483 — docs: M2 mission planning (TASKS decomposition + manifest update) — CI running
- (pending) `feat/mosaic-portainer-tls-flag` — wrapper PORTAINER_INSECURE flag (sonnet subagent in progress)
- (pending) `feat/federation-m2-schema` — FED-M2-01 DB schema migration (sonnet subagent in progress)
### MISSION-MANIFEST layout fix
- Initial M2 commit had Test Infrastructure block inserted by lint-staged prettier between "Last Updated" and "Parent Mission" — split mission frontmatter
- Fixed in 3d001fdb: moved Parent Mission back to frontmatter, kept Test Infrastructure as standalone H2 between Mission and Context
## Session 21 — 2026-04-21/22 — DEPLOY-02 merged, gateway image bug discovered, M2-01 in remediation
### PRs merged
- **#483** — docs(federation): M2 mission planning (TASKS decomposition + manifest update)
- **#484** — feat(mosaic-portainer): PORTAINER_INSECURE flag for self-signed TLS (wrapper sync to `~/.config/mosaic/tools/portainer/` done manually due to broken `mosaic upgrade` `set -o pipefail` on dash)
- **#485** — feat(deploy): portainer stack template `deploy/portainer/federated-test.stack.yml` for federation test instances [DEPLOY-02]
### Stack deployed (mos-test-1, mos-test-2)
- Both stacks created on Portainer endpoint 3 (`local` Swarm @ 10.1.1.43, the only endpoint with traefik-public + woltje.com wildcard cert)
- Swarm ID `l7z67tfpd4bvj4979ufpkyi50`
- Image pinned to digest `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`
- Traefik labels target `${HOST_FQDN}` per env
### CRITICAL FINDING — gateway image runtime-broken
- `docker run` against `gateway:fed-v0.1.0-m1` fails immediately:
`Error [ERR_MODULE_NOT_FOUND]: Cannot find package 'dotenv' imported from /app/dist/main.js`
- Root cause: `docker/gateway.Dockerfile` copies `/app/node_modules` from builder — but pnpm puts deps in the content-addressed `.pnpm/` store with symlinks at `apps/gateway/node_modules/*`. The runner stage misses the symlinks → Node can't resolve workspace deps.
- M1 release was never runtime-tested as a stripped container; CI passed because tests run in dev tree where pnpm symlinks are intact.
- **Fix in flight** (subagent `a78a9ab0ddae91fbc`): switch builder to `pnpm --filter @mosaic/gateway --prod deploy /deploy`, then runner copies `/deploy/node_modules` + `/deploy/dist` + `/deploy/package.json`.
### M2-01 schema review verdict — NEEDS CHANGES
- PR #486 (`feat/federation-m2-schema`) — independent reviewer (sonnet) found 2 real issues:
1. `federation_audit_log` time-range indexes missing `.desc()` on `created_at` (3 places)
2. Reserved columns missing per TASKS.md M2-01 spec: `query_hash`, `outcome`, `bytes_out` (M4 will write; spec said reserve now)
- Also notes (advisory): subject_user_id correctly `text` (matches BetterAuth users.id; spec defect, not code defect); peer→grant cascade test not present (would be trivial to add)
- **Remediation in flight** (subagent `a673dd9355dc26f82` in worktree `agent-a4404ac1`): apply DESC + reserved cols, regenerate migration in place (preferred) or stack 0009 (fallback), force-push, post PR comment.
### Process notes
- Branch race incident: schema subagent + wrapper subagent both ran in main checkout → schema files appeared on wrapper branch. Recovered by TaskStop, `git checkout --` to clean, respawned schema subagent with `isolation: "worktree"`. **Rule going forward:** any subagent doing code edits gets `isolation: "worktree"` unless work is single-file and the orchestrator confirms no other branch will touch overlapping files.
- `pr-create.sh` shell-quotes backticks badly → use `tea pr create --repo mosaicstack/stack` directly (matches CLI-skill behavior). Will leave a followup to harden pr-create.sh.
- Gitea registry auth: bearer-token exchange flow (`/v2/token?service=container_registry&scope=repository:<repo>:pull`) — direct `Authorization: Bearer <pat>` returns 401.
- Portainer Swarm stack create endpoint: `POST /api/stacks/create/swarm/string?endpointId=<id>` (NOT `/api/stacks?type=1` — deprecated and rejected with 400).
### In-flight at compaction boundary
- Subagent `a78a9ab0ddae91fbc` — Dockerfile pnpm-deploy fix → PR (not yet opened at handoff)
- Subagent `a673dd9355dc26f82` — M2-01 schema remediation (DESC + reserved cols) → force-push to PR #486
- Both will trigger CI; orchestrator must independently re-review fixes (especially the security-adjacent schema work) per "always verify subagent claims" rule.
### Next after subagents return
1. Independent re-review of schema remediation (different subagent, fresh context)
2. Merge #486 if green
3. Merge Dockerfile fix PR if green → triggers Kaniko CI rebuild → capture new digest
4. Update `deploy/portainer/federated-test.stack.yml` to new digest in a small PR
5. Redeploy mos-test-1 + mos-test-2 (Portainer stack update via API)
6. Verify HTTPS reachability + `/health` endpoint at both hosts
7. DEPLOY-03/04 acceptance probes (`mosaic gateway doctor --json`, pgvector `vector(3)` round-trip)
8. DEPLOY-05: author `docs/federation/TEST-INFRA.md`
9. M2-02 (Step-CA sidecar) kicks off after image health is green

View File

@@ -1 +0,0 @@
dev-only-step-ca-password-do-not-use-in-production

View File

@@ -1,60 +0,0 @@
#!/bin/sh
# infra/step-ca/init.sh
#
# Idempotent first-boot initialiser for the Mosaic Federation CA.
#
# On the first run (no /home/step/config/ca.json present) this script:
# 1. Initialises Step-CA with a JWK provisioner named "mosaic-fed".
# 2. Writes the CA configuration to the persistent volume at /home/step.
#
# On subsequent runs (config already exists) this script skips init and
# starts the CA directly.
#
# The provisioner name "mosaic-fed" is consumed by:
# apps/gateway/src/federation/ca.service.ts (added in M2-04)
#
# Password source:
# Dev: mounted from ./infra/step-ca/dev-password via bind mount.
# Prod: mounted from a Docker secret at /run/secrets/ca_password.
#
# OID template:
# infra/step-ca/templates/federation.tpl is copied into the CA config
# directory so the JWK provisioner can reference it. The template
# skeleton is wired in M2-04 when the CA service lands the SAN-bearing
# CSR work.
set -e
CA_CONFIG="/home/step/config/ca.json"
PASSWORD_FILE="/run/secrets/ca_password"
if [ ! -f "${CA_CONFIG}" ]; then
echo "[step-ca init] First boot detected — initialising Mosaic Federation CA..."
step ca init \
--name "Mosaic Federation CA" \
--dns "localhost" \
--dns "step-ca" \
--address ":9000" \
--provisioner "mosaic-fed" \
--password-file "${PASSWORD_FILE}" \
--provisioner-password-file "${PASSWORD_FILE}" \
--no-db
echo "[step-ca init] CA initialised."
# Copy the X.509 template into the Step-CA config directory so the
# provisioner can reference it in M2-04.
if [ -f "/etc/step-ca-templates/federation.tpl" ]; then
mkdir -p /home/step/templates
cp /etc/step-ca-templates/federation.tpl /home/step/templates/federation.tpl
echo "[step-ca init] Federation X.509 template copied to /home/step/templates/."
fi
echo "[step-ca init] Startup complete."
else
echo "[step-ca init] Config already exists — skipping init."
fi
echo "[step-ca init] Starting Step-CA on :9000..."
exec step-ca /home/step/config/ca.json --password-file "${PASSWORD_FILE}"

View File

@@ -1,48 +0,0 @@
{
"subject": {{ toJson .Subject }},
"sans": {{ toJson .SANs }},
{{- /*
Mosaic Federation X.509 Certificate Template
============================================
This template is used by the "mosaic-fed" JWK provisioner to sign
federation client certificates.
Custom OID extensions (per PRD §6):
1.3.6.1.4.1.99999.1 mosaic.federation.grantId (UUID string)
1.3.6.1.4.1.99999.2 mosaic.federation.subjectUserId (UUID string)
TODO (M2-04): Wire actual OID extensions below once the CA service
(apps/gateway/src/federation/ca.service.ts) lands the SAN-bearing CSR
work and the template can be exercised end-to-end.
Step-CA template reference:
https://smallstep.com/docs/step-ca/templates
Expected final shape of the extensions block (placeholder not yet
activated):
"extensions": [
{
"id": "1.3.6.1.4.1.99999.1",
"critical": false,
"value": {{ toJson (first .Token.mosaic_grant_id) }}
},
{
"id": "1.3.6.1.4.1.99999.2",
"critical": false,
"value": {{ toJson (first .Token.mosaic_subject_user_id) }}
}
],
The provisioner must pass these values in the ACME/JWK token payload
(token claims `mosaic_grant_id` and `mosaic_subject_user_id`) when
submitting the CSR. M2-04 owns that work.
*/ -}}
"keyUsage": ["digitalSignature"],
"extKeyUsage": ["clientAuth"],
"basicConstraints": {
"isCA": false
}
}