docs(federation): record M3-07/09 merges + fix M3-11 dependency DAG

- FED-M3-09 (#673) and FED-M3-07 (#674) merged → mark done - FED-M3-05 dispatched → in-progress - Fix FED-M3-11 depends_on: M3-02,M3-09 → M3-02,M3-04,M3-05,M3-06,M3-09 The original M3-11 edge set omitted the server verbs + scope service even though its E2E acceptance cases (#1-5, #8-10) exercise list/get over mTLS. The under-specified DAG caused a premature M3-11 dispatch this session. Also records the M3 read-path invariant scope (no-persist + enrollment audit only; read audit-log writes deferred to M4). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 16:44:26 -05:00
12 changed files with 39 additions and 1549 deletions
--- a/apps/gateway/src/federation/federation.module.ts
+++ b/apps/gateway/src/federation/federation.module.ts
@@ -7,7 +7,7 @@ import { FederationController } from './federation.controller.js';
 import { CapabilitiesController } from './server/verbs/capabilities.controller.js';
 import { GrantsService } from './grants.service.js';
 import { FederationClientService, QuerySourceService } from './client/index.js';
-import { FederationAuthGuard, FederationScopeService } from './server/index.js';
+import { FederationAuthGuard } from './server/index.js';

@Module({
  controllers: [EnrollmentController, FederationController, CapabilitiesController],
@@ -19,7 +19,6 @@ import { FederationAuthGuard, FederationScopeService } from './server/index.js';
    FederationClientService,
    QuerySourceService,
    FederationAuthGuard,
-    FederationScopeService,
  ],
  exports: [
    CaService,
@@ -28,7 +27,6 @@ import { FederationAuthGuard, FederationScopeService } from './server/index.js';
    FederationClientService,
    QuerySourceService,
    FederationAuthGuard,
-    FederationScopeService,
  ],
 })
 export class FederationModule {}
--- a/apps/gateway/src/federation/server/tests/scope.service.spec.ts
+++ b/apps/gateway/src/federation/server/tests/scope.service.spec.ts
@@ -1,324 +0,0 @@
-/**
- * Unit tests for FederationScopeService (FED-M3-04).
- *
- * Coverage:
- *  - resource allowlist deny
- *  - excluded resource deny
- *  - invalid scope deny
- *  - invalid requested limit deny
- *  - native RBAC deny as subjectUserId
- *  - scope/native filter intersection for personal and team rows
- *  - native RBAC personal deny wins over scope include_personal allow/default
- *  - max_rows_per_query cap
- */
-
-import { beforeEach, describe, expect, it, vi } from 'vitest';
-import { FederationScopeService, type FederationNativeRbacEvaluator } from '../scope.service.js';
-import type { FederationContext } from '../federation-context.js';
-
-const GRANT_ID = 'grant-1';
-const PEER_ID = 'peer-1';
-const SUBJECT_USER_ID = 'user-1';
-
-function makeContext(scope: Record<string, unknown>): FederationContext {
-  return {
-    grantId: GRANT_ID,
-    peerId: PEER_ID,
-    subjectUserId: SUBJECT_USER_ID,
-    scope,
-  };
-}
-
-function makeNativeRbac(
-  result: Awaited<ReturnType<FederationNativeRbacEvaluator['evaluateReadAccess']>>,
-): FederationNativeRbacEvaluator {
-  return {
-    evaluateReadAccess: vi.fn().mockResolvedValue(result),
-  };
-}
-
-describe('FederationScopeService', () => {
-  let service: FederationScopeService;
-
-  beforeEach(() => {
-    service = new FederationScopeService();
-  });
-
-  it('allows a granted resource and returns a capped query filter', async () => {
-    const nativeRbac = makeNativeRbac({
-      allowed: true,
-      access: { includePersonal: true, teamIds: ['team-1', 'team-2'] },
-    });
-
-    const result = await service.evaluateAccess({
-      context: makeContext({
-        resources: ['tasks'],
-        filters: { tasks: { include_teams: ['team-1', 'team-3'], include_personal: true } },
-        max_rows_per_query: 50,
-      }),
-      resource: 'tasks',
-      requestedLimit: 500,
-      nativeRbac,
-    });
-
-    expect(result).toEqual({
-      allowed: true,
-      filter: {
-        resource: 'tasks',
-        subjectUserId: SUBJECT_USER_ID,
-        includePersonal: true,
-        teamIds: ['team-1'],
-        limit: 50,
-        maxRowsPerQuery: 50,
-      },
-    });
-    expect(nativeRbac.evaluateReadAccess).toHaveBeenCalledWith({
-      grantId: GRANT_ID,
-      peerId: PEER_ID,
-      subjectUserId: SUBJECT_USER_ID,
-      resource: 'tasks',
-    });
-  });
-
-  it('defaults absent resource filters to native RBAC personal and team visibility', async () => {
-    const result = await service.evaluateAccess({
-      context: makeContext({ resources: ['notes'], max_rows_per_query: 100 }),
-      resource: 'notes',
-      nativeRbac: makeNativeRbac({
-        allowed: true,
-        access: { includePersonal: true, teamIds: ['team-1', 'team-2'] },
-      }),
-    });
-
-    expect(result).toMatchObject({
-      allowed: true,
-      filter: {
-        includePersonal: true,
-        teamIds: ['team-1', 'team-2'],
-        limit: 100,
-      },
-    });
-  });
-
-  it('honors include_personal false even when native RBAC allows personal rows', async () => {
-    const result = await service.evaluateAccess({
-      context: makeContext({
-        resources: ['memory'],
-        filters: { memory: { include_personal: false } },
-        max_rows_per_query: 25,
-      }),
-      resource: 'memory',
-      nativeRbac: makeNativeRbac({
-        allowed: true,
-        access: { includePersonal: true, teamIds: [] },
-      }),
-    });
-
-    expect(result).toMatchObject({
-      allowed: true,
-      filter: {
-        includePersonal: false,
-        teamIds: [],
-      },
-    });
-  });
-
-  it('does not leak personal rows when scope allows personal but native RBAC denies personal', async () => {
-    const result = await service.evaluateAccess({
-      context: makeContext({
-        resources: ['tasks'],
-        filters: { tasks: { include_personal: true } },
-        max_rows_per_query: 25,
-      }),
-      resource: 'tasks',
-      nativeRbac: makeNativeRbac({
-        allowed: true,
-        access: { includePersonal: false, teamIds: ['team-1'] },
-      }),
-    });
-
-    expect(result).toMatchObject({
-      allowed: true,
-      filter: {
-        includePersonal: false,
-        teamIds: ['team-1'],
-      },
-    });
-  });
-
-  it('does not widen native RBAC when scope includes teams the user cannot access', async () => {
-    const result = await service.evaluateAccess({
-      context: makeContext({
-        resources: ['tasks'],
-        filters: { tasks: { include_teams: ['team-2'], include_personal: false } },
-        max_rows_per_query: 25,
-      }),
-      resource: 'tasks',
-      nativeRbac: makeNativeRbac({
-        allowed: true,
-        access: { includePersonal: true, teamIds: ['team-1'] },
-      }),
-    });
-
-    expect(result).toMatchObject({
-      allowed: true,
-      filter: {
-        includePersonal: false,
-        teamIds: [],
-      },
-    });
-  });
-
-  it('denies invalid grant scope before RBAC evaluation', async () => {
-    const nativeRbac = makeNativeRbac({
-      allowed: true,
-      access: { includePersonal: true, teamIds: [] },
-    });
-
-    const result = await service.evaluateAccess({
-      context: makeContext({ resources: [], max_rows_per_query: 100 }),
-      resource: 'tasks',
-      nativeRbac,
-    });
-
-    expect(result).toMatchObject({
-      allowed: false,
-      deny: {
-        code: 'invalid_scope',
-        stage: 'scope_parse',
-        statusCode: 400,
-        grantId: GRANT_ID,
-        subjectUserId: SUBJECT_USER_ID,
-        resource: 'tasks',
-      },
-    });
-    expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
-  });
-
-  it('denies unsupported resource names before RBAC evaluation', async () => {
-    const nativeRbac = makeNativeRbac({
-      allowed: true,
-      access: { includePersonal: true, teamIds: [] },
-    });
-
-    const result = await service.evaluateAccess({
-      context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
-      resource: 'unknown_resource',
-      nativeRbac,
-    });
-
-    expect(result).toMatchObject({
-      allowed: false,
-      deny: {
-        code: 'invalid_resource',
-        stage: 'resource_allowlist',
-        statusCode: 403,
-      },
-    });
-    expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
-  });
-
-  it('denies resources explicitly present in excluded_resources before allowlist miss', async () => {
-    const nativeRbac = makeNativeRbac({
-      allowed: true,
-      access: { includePersonal: true, teamIds: [] },
-    });
-
-    const result = await service.evaluateAccess({
-      context: makeContext({
-        resources: ['tasks'],
-        excluded_resources: ['credentials'],
-        max_rows_per_query: 100,
-      }),
-      resource: 'credentials',
-      nativeRbac,
-    });
-
-    expect(result).toMatchObject({
-      allowed: false,
-      deny: {
-        code: 'resource_excluded',
-        stage: 'resource_exclusion',
-        statusCode: 403,
-        resource: 'credentials',
-      },
-    });
-    expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
-  });
-
-  it('denies supported resources that are not granted by scope', async () => {
-    const nativeRbac = makeNativeRbac({
-      allowed: true,
-      access: { includePersonal: true, teamIds: [] },
-    });
-
-    const result = await service.evaluateAccess({
-      context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
-      resource: 'notes',
-      nativeRbac,
-    });
-
-    expect(result).toMatchObject({
-      allowed: false,
-      deny: {
-        code: 'resource_not_granted',
-        stage: 'resource_allowlist',
-        statusCode: 403,
-        resource: 'notes',
-      },
-    });
-    expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
-  });
-
-  it('denies invalid requested row limits before RBAC evaluation', async () => {
-    const nativeRbac = makeNativeRbac({
-      allowed: true,
-      access: { includePersonal: true, teamIds: [] },
-    });
-
-    const result = await service.evaluateAccess({
-      context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
-      resource: 'tasks',
-      requestedLimit: 0,
-      nativeRbac,
-    });
-
-    expect(result).toMatchObject({
-      allowed: false,
-      deny: {
-        code: 'invalid_limit',
-        stage: 'row_cap',
-        statusCode: 400,
-        details: { requestedLimit: 0 },
-      },
-    });
-    expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
-  });
-
-  it('denies when native RBAC rejects subjectUserId access to the resource', async () => {
-    const result = await service.evaluateAccess({
-      context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
-      resource: 'tasks',
-      nativeRbac: makeNativeRbac({
-        allowed: false,
-        reason: 'read:tasks denied',
-        details: { permission: 'tasks:read' },
-      }),
-    });
-
-    expect(result).toEqual({
-      allowed: false,
-      deny: {
-        code: 'native_rbac_denied',
-        stage: 'native_rbac',
-        statusCode: 403,
-        message: 'read:tasks denied',
-        grantId: GRANT_ID,
-        peerId: PEER_ID,
-        subjectUserId: SUBJECT_USER_ID,
-        resource: 'tasks',
-        details: { permission: 'tasks:read' },
-      },
-    });
-  });
-});
--- a/apps/gateway/src/federation/server/index.ts
+++ b/apps/gateway/src/federation/server/index.ts
@@ -10,22 +10,4 @@
 */

 export { FederationAuthGuard } from './federation-auth.guard.js';
-export { FederationScopeService } from './scope.service.js';
 export type { FederationContext } from './federation-context.js';
-export type {
-  FederationNativeRbacAccess,
-  FederationNativeRbacAllowedResult,
-  FederationNativeRbacDeniedResult,
-  FederationNativeRbacEvaluator,
-  FederationNativeRbacRequest,
-  FederationNativeRbacResult,
-  FederationScopeAllowedResult,
-  FederationScopeDeniedResult,
-  FederationScopeDenyCode,
-  FederationScopeDenyDetails,
-  FederationScopeDenyReason,
-  FederationScopeDenyStage,
-  FederationScopeEvaluationInput,
-  FederationScopeEvaluationResult,
-  FederationScopeQueryFilter,
-} from './scope.service.js';
--- a/apps/gateway/src/federation/server/scope.service.ts
+++ b/apps/gateway/src/federation/server/scope.service.ts
@@ -1,272 +0,0 @@
-/**
- * FederationScopeService — M3 server-side scope enforcement pipeline.
- *
- * Pure trust-boundary service: it validates the grant scope, asks an injected
- * native RBAC evaluator what the subject user can read locally, intersects that
- * answer with the federation scope filters, and returns a query filter for the
- * verb controllers. The service performs no DB calls directly.
- */
-
-import { Injectable } from '@nestjs/common';
-import {
-  FEDERATION_RESOURCE_VALUES,
-  type FederationResource,
-  FederationScopeError,
-  parseFederationScope,
-} from '../scope-schema.js';
-import type { FederationContext } from './federation-context.js';
-
-const federationResourceSet: ReadonlySet<string> = new Set<string>(FEDERATION_RESOURCE_VALUES);
-
-export type FederationScopeDenyStage =
-  | 'scope_parse'
-  | 'resource_allowlist'
-  | 'resource_exclusion'
-  | 'native_rbac'
-  | 'row_cap';
-
-export type FederationScopeDenyCode =
-  | 'invalid_scope'
-  | 'invalid_resource'
-  | 'resource_not_granted'
-  | 'resource_excluded'
-  | 'native_rbac_denied'
-  | 'invalid_limit';
-
-export type FederationScopeDenyStatus = 400 | 403;
-
-export interface FederationScopeDenyDetails {
-  readonly [key: string]: string | number | boolean | readonly string[];
-}
-
-export interface FederationScopeDenyReason {
-  readonly code: FederationScopeDenyCode;
-  readonly stage: FederationScopeDenyStage;
-  readonly statusCode: FederationScopeDenyStatus;
-  readonly message: string;
-  readonly grantId: string;
-  readonly peerId: string;
-  readonly subjectUserId: string;
-  readonly resource: string;
-  readonly details?: FederationScopeDenyDetails;
-}
-
-export interface FederationNativeRbacRequest {
-  readonly grantId: string;
-  readonly peerId: string;
-  readonly subjectUserId: string;
-  readonly resource: FederationResource;
-}
-
-export interface FederationNativeRbacAccess {
-  /** Whether this user may read personal rows for this resource. */
-  readonly includePersonal: boolean;
-
-  /** Team IDs this user may read for this resource under native RBAC. */
-  readonly teamIds: readonly string[];
-}
-
-export interface FederationNativeRbacAllowedResult {
-  readonly allowed: true;
-  readonly access: FederationNativeRbacAccess;
-}
-
-export interface FederationNativeRbacDeniedResult {
-  readonly allowed: false;
-  readonly reason?: string;
-  readonly details?: FederationScopeDenyDetails;
-}
-
-export type FederationNativeRbacResult =
-  | FederationNativeRbacAllowedResult
-  | FederationNativeRbacDeniedResult;
-
-export interface FederationNativeRbacEvaluator {
-  evaluateReadAccess(request: FederationNativeRbacRequest): Promise<FederationNativeRbacResult>;
-}
-
-export interface FederationScopeEvaluationInput {
-  readonly context: FederationContext;
-  readonly resource: string;
-  readonly requestedLimit?: number;
-  readonly nativeRbac: FederationNativeRbacEvaluator;
-}
-
-export interface FederationScopeQueryFilter {
-  readonly resource: FederationResource;
-  readonly subjectUserId: string;
-  readonly includePersonal: boolean;
-  readonly teamIds: readonly string[];
-  readonly limit: number;
-  readonly maxRowsPerQuery: number;
-}
-
-export interface FederationScopeAllowedResult {
-  readonly allowed: true;
-  readonly filter: FederationScopeQueryFilter;
-}
-
-export interface FederationScopeDeniedResult {
-  readonly allowed: false;
-  readonly deny: FederationScopeDenyReason;
-}
-
-export type FederationScopeEvaluationResult =
-  | FederationScopeAllowedResult
-  | FederationScopeDeniedResult;
-
-function isFederationResource(resource: string): resource is FederationResource {
-  return federationResourceSet.has(resource);
-}
-
-function uniqueStrings(values: readonly string[]): readonly string[] {
-  return Array.from(new Set<string>(values));
-}
-
-function intersectTeamIds(
-  nativeTeamIds: readonly string[],
-  scopedTeamIds: readonly string[] | undefined,
-): readonly string[] {
-  const uniqueNativeTeamIds = uniqueStrings(nativeTeamIds);
-
-  if (scopedTeamIds === undefined) {
-    return uniqueNativeTeamIds;
-  }
-
-  const nativeSet = new Set<string>(uniqueNativeTeamIds);
-  return uniqueStrings(scopedTeamIds).filter((teamId: string): boolean => nativeSet.has(teamId));
-}
-
-function makeDenyReason(params: {
-  readonly code: FederationScopeDenyCode;
-  readonly stage: FederationScopeDenyStage;
-  readonly statusCode?: FederationScopeDenyStatus;
-  readonly message: string;
-  readonly context: FederationContext;
-  readonly resource: string;
-  readonly details?: FederationScopeDenyDetails;
-}): FederationScopeDeniedResult {
-  return {
-    allowed: false,
-    deny: {
-      code: params.code,
-      stage: params.stage,
-      statusCode: params.statusCode ?? 403,
-      message: params.message,
-      grantId: params.context.grantId,
-      peerId: params.context.peerId,
-      subjectUserId: params.context.subjectUserId,
-      resource: params.resource,
-      ...(params.details !== undefined ? { details: params.details } : {}),
-    },
-  };
-}
-
-@Injectable()
-export class FederationScopeService {
-  async evaluateAccess(
-    input: FederationScopeEvaluationInput,
-  ): Promise<FederationScopeEvaluationResult> {
-    const { context, resource, requestedLimit, nativeRbac } = input;
-
-    let scope: ReturnType<typeof parseFederationScope>;
-    try {
-      scope = parseFederationScope(context.scope);
-    } catch (error: unknown) {
-      const message =
-        error instanceof FederationScopeError
-          ? 'Federation grant scope is invalid'
-          : 'Federation grant scope could not be parsed';
-      const details = error instanceof Error ? { reason: error.message } : undefined;
-      return makeDenyReason({
-        code: 'invalid_scope',
-        stage: 'scope_parse',
-        statusCode: 400,
-        message,
-        context,
-        resource,
-        ...(details !== undefined ? { details } : {}),
-      });
-    }
-
-    if (!isFederationResource(resource)) {
-      return makeDenyReason({
-        code: 'invalid_resource',
-        stage: 'resource_allowlist',
-        message: 'Requested federation resource is not supported',
-        context,
-        resource,
-        details: { supportedResources: FEDERATION_RESOURCE_VALUES },
-      });
-    }
-
-    if (scope.excluded_resources.includes(resource)) {
-      return makeDenyReason({
-        code: 'resource_excluded',
-        stage: 'resource_exclusion',
-        message: 'Requested federation resource is explicitly excluded by grant scope',
-        context,
-        resource,
-      });
-    }
-
-    if (!scope.resources.includes(resource)) {
-      return makeDenyReason({
-        code: 'resource_not_granted',
-        stage: 'resource_allowlist',
-        message: 'Requested federation resource is not granted by scope',
-        context,
-        resource,
-        details: { grantedResources: scope.resources },
-      });
-    }
-
-    if (requestedLimit !== undefined && (!Number.isInteger(requestedLimit) || requestedLimit < 1)) {
-      return makeDenyReason({
-        code: 'invalid_limit',
-        stage: 'row_cap',
-        statusCode: 400,
-        message: 'Requested row limit must be a positive integer',
-        context,
-        resource,
-        details: { requestedLimit },
-      });
-    }
-
-    const nativeResult = await nativeRbac.evaluateReadAccess({
-      grantId: context.grantId,
-      peerId: context.peerId,
-      subjectUserId: context.subjectUserId,
-      resource,
-    });
-
-    if (!nativeResult.allowed) {
-      return makeDenyReason({
-        code: 'native_rbac_denied',
-        stage: 'native_rbac',
-        message: nativeResult.reason ?? 'Subject user is not allowed to read this resource',
-        context,
-        resource,
-        ...(nativeResult.details !== undefined ? { details: nativeResult.details } : {}),
-      });
-    }
-
-    const scopeFilter = scope.filters?.[resource];
-    const includePersonal =
-      Boolean(scopeFilter?.include_personal ?? true) && nativeResult.access.includePersonal;
-    const teamIds = intersectTeamIds(nativeResult.access.teamIds, scopeFilter?.include_teams);
-    const limit = Math.min(requestedLimit ?? scope.max_rows_per_query, scope.max_rows_per_query);
-
-    return {
-      allowed: true,
-      filter: {
-        resource,
-        subjectUserId: context.subjectUserId,
-        includePersonal,
-        teamIds,
-        limit,
-        maxRowsPerQuery: scope.max_rows_per_query,
-      },
-    };
-  }
-}
--- a/docs/federation/TASKS.md
+++ b/docs/federation/TASKS.md
@@ -91,22 +91,22 @@ Goal: Two federated gateways exchange real data over mTLS. Inbound requests pass
 >
 > **Tracking issue:** #462.

-| id        | status      | description                                                                                                                                                                                                                                                                                            | issue | agent  | branch                               | depends_on                        | estimate | notes                                                                                                                                                    |
-| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| FED-M3-01 | done        | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462  | sonnet | feat/federation-m3-types             | —                                 | 4K       | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS.                                                                         |
-| FED-M3-02 | done        | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use.               | #462  | sonnet | feat/federation-m3-harness           | DEPLOY-04 (soft)                  | 8K       | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+.            |
-| FED-M3-03 | done        | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request.                  | #462  | sonnet | feat/federation-m3-auth-guard        | M3-01                             | 8K       | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant.  |
-| FED-M3-04 | in-progress | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected.                | #462  | sonnet | feat/federation-m3-scope-service     | M3-01                             | 10K      | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
-| FED-M3-05 | in-progress | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param.                                                                                           | #462  | sonnet | feat/federation-m3-verb-list         | M3-03, M3-04                      | 6K       | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4.                                                       |
-| FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way.                                                                                                                | #462  | sonnet | feat/federation-m3-verb-get          | M3-03, M3-04                      | 6K       | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns.                                                                           |
-| FED-M3-07 | done        | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval.                                            | #462  | sonnet | feat/federation-m3-verb-capabilities | M3-03                             | 4K       | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end.                                        |
-| FED-M3-08 | done        | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`.                  | #462  | sonnet | feat/federation-m3-client            | M3-01                             | 8K       | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic.                              |
-| FED-M3-09 | done        | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`.                                                 | #462  | sonnet | feat/federation-m3-query-source      | M3-08                             | 8K       | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top.           |
-| FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim).                                                                                                       | #462  | sonnet | feat/federation-m3-integration       | M3-05, M3-06                      | 8K       | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required.                                                            |
+| id        | status      | description                                                                                                                                                                                                                                                                                            | issue | agent  | branch                               | depends_on       | estimate | notes                                                                                                                                                    |
+| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | ---------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| FED-M3-01 | done        | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462  | sonnet | feat/federation-m3-types             | —                | 4K       | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS.                                                                         |
+| FED-M3-02 | done        | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use.               | #462  | sonnet | feat/federation-m3-harness           | DEPLOY-04 (soft) | 8K       | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+.            |
+| FED-M3-03 | done        | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request.                  | #462  | sonnet | feat/federation-m3-auth-guard        | M3-01            | 8K       | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant.  |
+| FED-M3-04 | in-progress | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected.                | #462  | sonnet | feat/federation-m3-scope-service     | M3-01            | 10K      | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
+| FED-M3-05 | in-progress | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param.                                                                                           | #462  | sonnet | feat/federation-m3-verb-list         | M3-03, M3-04     | 6K       | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4.                                                       |
+| FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way.                                                                                                                | #462  | sonnet | feat/federation-m3-verb-get          | M3-03, M3-04     | 6K       | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns.                                                                           |
+| FED-M3-07 | done        | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval.                                            | #462  | sonnet | feat/federation-m3-verb-capabilities | M3-03            | 4K       | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end.                                        |
+| FED-M3-08 | done        | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`.                  | #462  | sonnet | feat/federation-m3-client            | M3-01            | 8K       | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic.                              |
+| FED-M3-09 | done        | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`.                                                 | #462  | sonnet | feat/federation-m3-query-source      | M3-08            | 8K       | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top.           |
+| FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim).                                                                                                       | #462  | sonnet | feat/federation-m3-integration       | M3-05, M3-06     | 8K       | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required.                                                            |
 | FED-M3-11 | not-started | E2E tests for MILESTONES.md M3 acceptance #1, #2, #3, #4, #5, #8, #9, #10 (8 cases). Uses harness from M3-02; two real gateways, real Step-CA, real mTLS. Each test asserts both happy-path response and audit/no-persist invariants.                                                                  | #462  | sonnet | feat/federation-m3-e2e               | M3-02, M3-04, M3-05, M3-06, M3-09 | 12K      | Largest single task. Each acceptance gets its own `it(...)` for clear failure attribution.                                                               |
-| FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny.                                                          | #462  | sonnet | feat/federation-m3-security-review   | M3-11                             | 10K      | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage.                                                |
-| FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred.                                                                         | #462  | haiku  | feat/federation-m3-docs              | M3-12                             | 5K       | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths.                                          |
-| FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins.                                                                                                                                    | #462  | sonnet | chore/federation-m3-close            | M3-13                             | 3K       | Same close pattern as M1-12 / M2-13.                                                                                                                     |
+| FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny.                                                          | #462  | sonnet | feat/federation-m3-security-review   | M3-11            | 10K      | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage.                                                |
+| FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred.                                                                         | #462  | haiku  | feat/federation-m3-docs              | M3-12            | 5K       | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths.                                          |
+| FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins.                                                                                                                                    | #462  | sonnet | chore/federation-m3-close            | M3-13            | 3K       | Same close pattern as M1-12 / M2-13.                                                                                                                     |

 **M3 estimate:** ~100K tokens (vs MILESTONES.md 40K — same per-task breakdown pattern as M1/M2: tests, review, and docs split out from implementation cost). Largest milestone in the federation mission.

--- a/docs/scratchpads/462-fed-m3-04-scope-service.md
+++ b/docs/scratchpads/462-fed-m3-04-scope-service.md
@@ -1,60 +0,0 @@
-# Scratchpad — FED-M3-04 Scope Service
-
-## Objective
-
-Implement `apps/gateway/src/federation/server/scope.service.ts` for the M3 inbound federation scope-enforcement pipeline.
-
-## Scope / Constraints
-
- Task: FED-M3-04, issue #462.
- Branch: `feat/federation-m3-scope-service` from `origin/main` @ 0.0.48.
- Pure service: no direct DB access; native RBAC/data access is injected per evaluation call.
- Reuse `parseFederationScope` from M2-03.
- Workers do not edit `docs/federation/TASKS.md` per repo AGENTS.md.
-
-## Acceptance Criteria
-
-1. Resource allowlist and `excluded_resources` enforced.
-2. Native RBAC evaluated as `subjectUserId` through an injected evaluator.
-3. Scope filter intersection supports `include_teams` and `include_personal` without widening native RBAC.
-4. `max_rows_per_query` caps requested limits.
-5. Service returns `{ allowed: true, filter }` or a structured deny reason usable by M4 audit.
-6. Unit tests cover every deny path.
-
-## Plan
-
-1. Inspect existing federation scope/schema/auth guard contracts.
-2. Add pure `FederationScopeService` plus typed result/filter/deny interfaces.
-3. Add focused unit tests for happy paths, filter intersection, row cap, and deny paths.
-4. Export/register service for future verb controllers.
-5. Run situational tests, baseline gates, code review, then PR.
-
-## Budget
-
- Provided model tier: sonnet.
- Estimate from task row: 10K tokens.
- Working cap assumption: keep implementation focused to FED-M3-04 surfaces only.
-
-## Progress
-
- Intake complete; dirty base worktree avoided by creating isolated worktree at `/home/jarvis/src/mosaic-mono-v1-fed-m3-04`.
- Project PRD and federation task spec reviewed.
- Added `FederationScopeService` with structured allow/deny result types and injected native RBAC evaluator contract.
- Added unit coverage for happy path, row cap, filter intersection, and every deny path.
- Exported/registered the service for upcoming M3 verb controllers.
-
-## Verification Evidence
-
- `pnpm --filter @mosaicstack/gateway test -- src/federation/server/__tests__/scope.service.spec.ts` — pass (10 tests before review update; 11 tests after adding include_personal no-leak coverage).
- `pnpm build` — pass (23 successful tasks).
- `pnpm typecheck` — pass (41 successful tasks; re-run after review update).
- `pnpm lint` — pass (23 successful tasks; re-run after review update).
- `pnpm format:check` — pass (re-run after review update).
- `pnpm test` — pass after starting local `postgres`/`valkey` and running `pnpm --filter @mosaicstack/db db:push` for the DB-backed cross-user isolation suite (41 successful tasks; gateway 477 passed / 11 skipped).
- Code review: `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — approve, 0 findings.
- Security review: `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — risk none, 0 findings.
-
-## Risks / Blockers
-
- Issue #462 is already closed in provider output; likely milestone tracking mismatch. Will still reference #462 in PR body unless orchestrator redirects.
- Local full-test setup required `docker compose up -d postgres valkey` + `db:push`; containers were stopped with `docker compose down` after verification.
--- a/docs/scratchpads/672-fleet-personas-timeout.md
+++ b/docs/scratchpads/672-fleet-personas-timeout.md
@@ -1,25 +0,0 @@
-# Scratchpad — fleet-personas spec timeout
-
-## Objective
-
-Raise the `@mosaicstack/mosaic` Vitest timeout to 30s at config level so filesystem-backed fleet drift-guard specs (`fleet-personas`, `fleet-profiles`, and siblings) stop false-reding under contended CI.
-
-## Plan
-
-1. Move timeout policy into `packages/mosaic/vitest.config.ts` with `testTimeout: 30_000`.
-2. Remove the narrower `fleet-personas.spec.ts` local override so PR #677 fixes the suite class, not one file.
-3. Run targeted fleet specs plus typecheck/lint/format gates.
-4. Commit, queue guard, push, PR update.
-
-## Evidence
-
- `pnpm --filter @mosaicstack/mosaic test -- src/commands/fleet-personas.spec.ts` — pass (8 tests; initial narrow fix).
- `pnpm typecheck` — pass (41 tasks; initial narrow fix).
- `pnpm lint` — pass (23 tasks; initial narrow fix).
- `pnpm format:check` — pass after formatting this scratchpad (initial narrow fix).
- Package-wide timeout follow-up:
-  - `pnpm --filter @mosaicstack/mosaic test -- src/commands/fleet-personas.spec.ts src/commands/fleet-profiles.spec.ts` — pass (24 tests).
-  - `pnpm --filter @mosaicstack/mosaic test` — pass (44 files / 618 tests).
-  - `pnpm typecheck` — pass (41 tasks).
-  - `pnpm lint` — pass (23 tasks).
-  - `pnpm format:check` — pass.
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -30,7 +30,6 @@ export default tseslint.config(
            'apps/gateway/vitest.config.ts',
            'packages/db/vitest.config.ts',
            'packages/storage/vitest.config.ts',
-            'packages/mosaic/vitest.config.ts',
            'packages/mosaic/__tests__/*.ts',
            'tools/federation-harness/*.ts',
          ],
--- a/packages/mosaic/src/commands/fleet.spec.ts
+++ b/packages/mosaic/src/commands/fleet.spec.ts
@@ -4,7 +4,6 @@ import { dirname, join, resolve } from 'node:path';
 import { Command } from 'commander';
 import { afterEach, describe, expect, it, vi } from 'vitest';
 import {
-  acquireRestartLock,
  addAgentToRoster,
  buildAgentSendCommand,
  buildAgentWatchAttachCommand,
@@ -46,8 +45,6 @@ import {
  removeAgentFromRoster,
  resolveFleetPaths,
  resolvePresetFilename,
-  restartLockPath,
-  RESTART_LOCK_STALE_MS,
  RUNTIME_ACCEPTABLE_COMMANDS,
  serializeRosterToYaml,
  VERIFY_DEFAULT_TIMEOUT_MS,
@@ -681,364 +678,6 @@ describe('fleet command construction', () => {
    }
  });

-  it('waits for an in-flight restart to clear before relaunching (re-entry guard)', async () => {
-    const home = await tempDir();
-    const rosterPath = join(home, 'fleet', 'roster.yaml');
-    await mkdir(join(home, 'fleet'), { recursive: true });
-    await writeFile(
-      rosterPath,
-      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
-        '\n',
-      ),
-    );
-
-    // Simulate another `mosaic fleet restart` process mid-teardown: a fresh lock
-    // (recent timestamp, so it is NOT treated as stale) already held.
-    const lockPath = restartLockPath(home);
-    await mkdir(dirname(lockPath), { recursive: true });
-    await writeFile(lockPath, `4242\n${Date.now()}\n`);
-
-    const events: string[] = [];
-    const runner: CommandRunner = async (command, args) => {
-      events.push(`run:${args[args.length - 1]}`);
-      return { stdout: '', stderr: '', exitCode: 0 };
-    };
-    // The injected sleep stands in for time passing while we wait; the in-flight
-    // restart "finishes" (releases its lock) after the first poll.
-    let sleeps = 0;
-    const sleepFn: SleepFn = async () => {
-      sleeps += 1;
-      events.push(`sleep:${sleeps}`);
-      await rm(lockPath, { force: true });
-    };
-
-    const program = new Command();
-    program.exitOverride();
-    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
-
-    try {
-      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
-
-      // It must have waited at least once before issuing any systemctl restart.
-      expect(sleeps).toBeGreaterThan(0);
-      const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
-      const firstRun = events.findIndex((e) => e.startsWith('run:'));
-      expect(firstSleep).toBeGreaterThanOrEqual(0);
-      expect(firstRun).toBeGreaterThan(firstSleep);
-
-      // And it still performs the full restart once the lock clears.
-      expect(events).toContain('run:mosaic-tmux-holder.service');
-      expect(events).toContain('run:mosaic-agent@coder0.service');
-
-      // The lock is released after the restart completes.
-      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
-    } finally {
-      await rm(home, { recursive: true, force: true });
-    }
-  });
-
-  it('breaks a stale restart lock and proceeds without waiting', async () => {
-    const home = await tempDir();
-    const rosterPath = join(home, 'fleet', 'roster.yaml');
-    await mkdir(join(home, 'fleet'), { recursive: true });
-    await writeFile(
-      rosterPath,
-      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
-        '\n',
-      ),
-    );
-
-    // A lock left behind by a crashed owner: timestamp older than the stale window.
-    const lockPath = restartLockPath(home);
-    await mkdir(dirname(lockPath), { recursive: true });
-    await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n`);
-
-    const calls: string[][] = [];
-    const runner: CommandRunner = async (command, args) => {
-      calls.push([command, ...args]);
-      return { stdout: '', stderr: '', exitCode: 0 };
-    };
-    const sleepFn = vi.fn<SleepFn>(async () => {});
-
-    const program = new Command();
-    program.exitOverride();
-    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
-
-    try {
-      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
-
-      // Stale lock is broken immediately — no waiting.
-      expect(sleepFn).not.toHaveBeenCalled();
-      expect(calls).toEqual([
-        ['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
-        ['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
-      ]);
-      // The stale lock is gone once the restart completes.
-      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
-    } finally {
-      await rm(home, { recursive: true, force: true });
-    }
-  });
-
-  it('releases the restart lock so a subsequent restart is not blocked', async () => {
-    const home = await tempDir();
-    const rosterPath = join(home, 'fleet', 'roster.yaml');
-    await mkdir(join(home, 'fleet'), { recursive: true });
-    await writeFile(
-      rosterPath,
-      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
-        '\n',
-      ),
-    );
-
-    const calls: string[][] = [];
-    const runner: CommandRunner = async (command, args) => {
-      calls.push([command, ...args]);
-      return { stdout: '', stderr: '', exitCode: 0 };
-    };
-    const sleepFn = vi.fn<SleepFn>(async () => {});
-
-    const program = new Command();
-    program.exitOverride();
-    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
-
-    try {
-      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
-      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
-
-      // Two sequential restarts both run fully and neither has to wait.
-      expect(sleepFn).not.toHaveBeenCalled();
-      expect(calls).toEqual([
-        ['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
-        ['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
-        ['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
-        ['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
-      ]);
-    } finally {
-      await rm(home, { recursive: true, force: true });
-    }
-  });
-
-  it('guards the single-agent restart path behind the in-flight restart lock', async () => {
-    const home = await tempDir();
-    const rosterPath = join(home, 'fleet', 'roster.yaml');
-    await mkdir(join(home, 'fleet'), { recursive: true });
-    await writeFile(
-      rosterPath,
-      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
-        '\n',
-      ),
-    );
-
-    // A full restart is mid-flight (lock held); a single-agent restart re-enters.
-    const lockPath = restartLockPath(home);
-    await mkdir(dirname(lockPath), { recursive: true });
-    await writeFile(lockPath, `4242\n${Date.now()}\n`);
-
-    const events: string[] = [];
-    const runner: CommandRunner = async (command, args) => {
-      events.push(`run:${args[args.length - 1]}`);
-      return { stdout: '', stderr: '', exitCode: 0 };
-    };
-    let sleeps = 0;
-    const sleepFn: SleepFn = async () => {
-      sleeps += 1;
-      events.push(`sleep:${sleeps}`);
-      await rm(lockPath, { force: true });
-    };
-
-    const program = new Command();
-    program.exitOverride();
-    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
-
-    try {
-      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart', 'coder0']);
-
-      // The single-agent restart waits for the in-flight restart before acting.
-      expect(sleeps).toBeGreaterThan(0);
-      const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
-      const firstRun = events.findIndex((e) => e.startsWith('run:'));
-      expect(firstSleep).toBeGreaterThanOrEqual(0);
-      expect(firstRun).toBeGreaterThan(firstSleep);
-      // Only the named agent is restarted; the holder is untouched.
-      expect(events).toContain('run:mosaic-agent@coder0.service');
-      expect(events).not.toContain('run:mosaic-tmux-holder.service');
-    } finally {
-      await rm(home, { recursive: true, force: true });
-    }
-  });
-
-  it('does not let a timed-out owner drop a lock another restart broke and re-owned', async () => {
-    const home = await tempDir();
-    const runDir = join(home, 'fleet', 'run');
-    await mkdir(runDir, { recursive: true });
-    const lockPath = restartLockPath(home);
-    const tokenOf = async (): Promise<string> => {
-      const raw = await readFile(lockPath, 'utf8');
-      return raw.split('\n')[2]?.trim() ?? '';
-    };
-    const sleepFn = vi.fn<SleepFn>(async () => {});
-
-    // R1 acquires the lock and begins a restart that then hangs.
-    const r1 = await acquireRestartLock(home, sleepFn);
-    const tokenR1 = await tokenOf();
-    expect(tokenR1).not.toBe('');
-
-    // The hung R1 leaves a stale lock: rewrite its timestamp into the past while
-    // preserving R1's token — exactly the on-disk state a stuck owner leaves.
-    await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n${tokenR1}\n`);
-
-    // R2 re-enters, sees the stale lock, and atomically takes ownership.
-    const r2 = await acquireRestartLock(home, sleepFn);
-    const tokenR2 = await tokenOf();
-    expect(tokenR2).not.toBe(tokenR1);
-    expect(sleepFn).not.toHaveBeenCalled();
-
-    // R1 finally finishes and releases. It must NOT delete R2's lock — otherwise
-    // a third restart (R3) could acquire and interleave with R2 still running.
-    await r1.release();
-    expect(await tokenOf()).toBe(tokenR2);
-
-    // R2 releases cleanly and the lock is gone.
-    await r2.release();
-    await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
-
-    await rm(home, { recursive: true, force: true });
-  });
-
-  it('lets only one of several concurrent breakers proceed past a stale lock', async () => {
-    const home = await tempDir();
-    const lockPath = restartLockPath(home);
-    await mkdir(dirname(lockPath), { recursive: true });
-
-    // A stale lock left by a crashed owner: every concurrent re-entrant restart
-    // will judge it stale and try to break it at the same instant. Breaking must
-    // NOT grant ownership — only the atomic re-create may — so exactly one
-    // contender can ever hold the lock at a time. (The v2 fix wrote our own token
-    // during the break and read it back, so two breakers each saw their own token
-    // and BOTH proceeded; this guards that regression.)
-    await writeFile(
-      lockPath,
-      `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
-    );
-
-    // Yielding sleep so a waiting contender lets the current owner finish and
-    // release before it re-contends, instead of spinning the microtask queue.
-    const sleepFn: SleepFn = async () => {
-      await new Promise((res) => setTimeout(res, 0));
-    };
-
-    let active = 0;
-    let maxActive = 0;
-    const tokens: string[] = [];
-    const tokenOf = async (): Promise<string> => {
-      const raw = await readFile(lockPath, 'utf8');
-      return raw.split('\n')[2]?.trim() ?? '';
-    };
-
-    // One "restart" = acquire the lock, do work in the critical section, release.
-    const restartOnce = async (): Promise<void> => {
-      const guard = await acquireRestartLock(home, sleepFn);
-      active += 1;
-      maxActive = Math.max(maxActive, active);
-      // Record the token we own while we hold it, then yield to interleave with
-      // any other contender that might (wrongly) believe it owns the lock too.
-      tokens.push(await tokenOf());
-      await new Promise((res) => setTimeout(res, 0));
-      active -= 1;
-      await guard.release();
-    };
-
-    try {
-      // Three breakers race the single stale lock simultaneously.
-      await Promise.all([restartOnce(), restartOnce(), restartOnce()]);
-
-      // Mutual exclusion held: never two owners at once despite concurrent breaks.
-      expect(maxActive).toBe(1);
-      // Each acquire owned with its own distinct token — no two ever shared it.
-      expect(new Set(tokens).size).toBe(3);
-      // The lock is fully released at the end.
-      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
-    } finally {
-      await rm(home, { recursive: true, force: true });
-    }
-  });
-
-  it('lets exactly one of two breakers take over a stale lock while the other waits', async () => {
-    const home = await tempDir();
-    const lockPath = restartLockPath(home);
-    await mkdir(dirname(lockPath), { recursive: true });
-
-    // A single stale lock both contenders will judge stale at the same instant.
-    // Every transition runs under the registry mutex, so only one may take the
-    // lock over; the other must observe a now-fresh owner and WAIT/re-evaluate
-    // rather than also taking over. (A content-blind clobber let both believe
-    // they owned it — this asserts the mutex-gated CAS takeover instead.)
-    await writeFile(
-      lockPath,
-      `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
-    );
-
-    // Barrier the winner holds against until the loser has observed the lock
-    // fresh and waited at least once — forcing the exact interleaving where one
-    // proceeds while the other waits, deterministically rather than by timing.
-    let resolveLoserWaited: () => void = () => {};
-    const loserWaited = new Promise<void>((res) => {
-      resolveLoserWaited = res;
-    });
-    let sleeps = 0;
-    const sleepFn: SleepFn = async () => {
-      sleeps += 1;
-      resolveLoserWaited();
-      await new Promise((res) => setTimeout(res, 0));
-    };
-
-    let active = 0;
-    let maxActive = 0;
-    const tokens: string[] = [];
-    const tokenOf = async (): Promise<string> => {
-      const raw = await readFile(lockPath, 'utf8');
-      return raw.split('\n')[2]?.trim() ?? '';
-    };
-
-    let firstOwner = true;
-    const restartOnce = async (): Promise<void> => {
-      const guard = await acquireRestartLock(home, sleepFn);
-      active += 1;
-      maxActive = Math.max(maxActive, active);
-      tokens.push(await tokenOf());
-      if (firstOwner) {
-        // Winner: keep holding the lock until the loser has waited once, so the
-        // loser is guaranteed to see a FRESH owner (not the stale one) and back
-        // off — proving it could not also take over.
-        firstOwner = false;
-        await loserWaited;
-      } else {
-        await new Promise((res) => setTimeout(res, 0));
-      }
-      active -= 1;
-      await guard.release();
-    };
-
-    try {
-      // Exactly two breakers race the single stale lock.
-      await Promise.all([restartOnce(), restartOnce()]);
-
-      // Mutual exclusion: never two owners at once (if both took over the stale
-      // lock, this would be 2).
-      expect(maxActive).toBe(1);
-      // Both eventually owned, each with its own distinct token.
-      expect(new Set(tokens).size).toBe(2);
-      // The loser observed the winner's fresh lock and waited — it did NOT also
-      // take over the stale lock.
-      expect(sleeps).toBeGreaterThanOrEqual(1);
-      // The lock is fully released at the end.
-      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
-    } finally {
-      await rm(home, { recursive: true, force: true });
-    }
-  });
-
  it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
    const home = await tempDir();
    const rosterPath = join(home, 'fleet', 'roster.yaml');
--- a/packages/mosaic/src/commands/fleet.ts
+++ b/packages/mosaic/src/commands/fleet.ts
@@ -1,16 +1,5 @@
 import { constants } from 'node:fs';
-import {
-  access,
-  chmod,
-  copyFile,
-  mkdir,
-  open,
-  readFile,
-  stat,
-  unlink,
-  writeFile,
-} from 'node:fs/promises';
-import { randomUUID } from 'node:crypto';
+import { access, chmod, copyFile, mkdir, readFile, unlink, writeFile } from 'node:fs/promises';
 import { homedir, hostname, userInfo } from 'node:os';
 import { dirname, join, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
@@ -544,295 +533,6 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
  return ['systemctl', '--user', action, service];
 }

-/** Poll interval (ms) while waiting for an in-flight restart's lock to clear. */
-export const RESTART_LOCK_POLL_INTERVAL_MS = 250;
-/**
- * Maximum time (ms) a re-entrant restart waits for the in-flight restart to
- * finish before it breaks the lock and proceeds anyway. A bound is required so
- * a crashed holder of the lock can never deadlock the fleet permanently.
- */
-export const RESTART_LOCK_MAX_WAIT_MS = 30_000;
-/**
- * Age (ms) past which a restart lock is treated as stale (its owner died
- * without releasing it) and is broken immediately rather than waited on.
- */
-export const RESTART_LOCK_STALE_MS = 60_000;
-
-/**
- * Resolves the path of the cross-process restart lock for a given Mosaic home.
- * Kept strictly under `<mosaicHome>/fleet/run` (not the heartbeat env override)
- * so the lock is scoped to the same fleet the restart acts on.
- */
-export function restartLockPath(mosaicHome: string): string {
-  return join(mosaicHome, 'fleet', 'run', 'restart.lock');
-}
-
-/** A held restart lock; `release()` removes the lock file iff we still own it. */
-interface RestartGuard {
-  release(): Promise<void>;
-}
-
-/** Lock-file contents: pid (informational), timestamp, and a unique owner token. */
-function formatRestartLockContent(token: string): string {
-  return `${process.pid}\n${Date.now()}\n${token}\n`;
-}
-
-/**
- * Reads the owner token (line 3) from a lock file, or null if the file is
- * missing/unreadable/tokenless. The token is what makes release and break
- * ownership-safe: a process only ever acts on a lock whose token matches its own.
- */
-async function readRestartLockToken(lockPath: string): Promise<string | null> {
-  let raw: string;
-  try {
-    raw = await readFile(lockPath, 'utf8');
-  } catch {
-    return null;
-  }
-  const token = raw.split('\n')[2]?.trim();
-  return token ? token : null;
-}
-
-/**
- * Returns true when a lock's contents are stale: older than RESTART_LOCK_STALE_MS,
- * or unparseable (a corrupt or partially written lock left by a crashed owner).
- */
-function isRestartLockContentStale(raw: string, now: number): boolean {
-  const stampLine = raw.split('\n')[1] ?? '';
-  const stamp = Number.parseInt(stampLine.trim(), 10);
-  if (!Number.isFinite(stamp)) {
-    return true;
-  }
-  return now - stamp >= RESTART_LOCK_STALE_MS;
-}
-
-/**
- * Path of the short-lived registry mutex that guards EVERY transition of the
- * restart lock (acquire, release, takeover). Held only across a few filesystem
- * ops — never across the restart itself — so contention clears in microseconds.
- */
-function restartMutexPath(lockPath: string): string {
-  return `${lockPath}.mutex`;
-}
-
-/** Brief back-off between registry-mutex acquisition attempts (held microseconds). */
-const RESTART_MUTEX_RETRY_MS = 20;
-
-/**
- * Staleness for the internal mutex / reclaim locks, judged by the file's mtime
- * rather than its CONTENT. `open(path, 'wx')` creates the inode (with a fresh
- * mtime) before any token/timestamp is written into it, so a content-based check
- * would momentarily see that empty file as corrupt-and-stale and could reap a
- * lock another contender is still acquiring. mtime is set atomically at creation,
- * so a just-created lock always reads as live; only a lock whose holder died and
- * stopped touching it ages past the threshold. These locks are never held across
- * the restart itself (only a couple of filesystem ops), so any mtime this old can
- * belong only to a dead holder.
- */
-async function isRestartLockPathStale(path: string, now: number): Promise<boolean> {
-  try {
-    const info = await stat(path);
-    return now - info.mtimeMs >= RESTART_LOCK_STALE_MS;
-  } catch (err) {
-    if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
-      return false; // Gone, not stale — the caller will re-contend.
-    }
-    return false; // Can't stat — treat as live and back off rather than reap.
-  }
-}
-
-/** Path of the reclaim lock that serializes reaping of a crashed-holder mutex. */
-function restartReclaimPath(mutexPath: string): string {
-  return `${mutexPath}.reclaim`;
-}
-
-/**
- * Reap a registry mutex left behind by a process that CRASHED mid-transition —
- * one whose file has aged past RESTART_LOCK_STALE_MS. Because the mutex is held
- * only for a couple of filesystem ops (no sleeps, never across the restart), a
- * mutex this old can only belong to a dead holder.
- *
- * The reap removes the dead mutex but never CREATES/holds it — acquisition stays
- * the single `open('wx')` create in {@link acquireRestartMutex}, so exactly one
- * contender wins ownership no matter how the reap and acquires interleave. The
- * removal is made conditional by a dedicated reclaim lock: while it is held the
- * dead mutex is stable (its dead holder will never touch it, and no other
- * reclaimer can race), so re-reading it and removing it only if it is STILL stale
- * is a true compare — a live holder's fresh mutex is never removed. This closes
- * the reclaim race a content-blind rename-and-restore left open (a third
- * contender slipping into the gap while a fresh mutex was moved aside).
- */
-async function reclaimStaleRestartMutex(mutexPath: string): Promise<void> {
-  const reclaimPath = restartReclaimPath(mutexPath);
-  let handle: Awaited<ReturnType<typeof open>>;
-  try {
-    handle = await open(reclaimPath, 'wx');
-  } catch (err) {
-    if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
-      throw err;
-    }
-    // Someone is already reclaiming. If their reclaim lock is itself stale by
-    // mtime, its holder crashed mid-reap (the lock spans only a stat + unlink,
-    // microseconds) — clear it so a later pass can retry. Otherwise a live
-    // reclaimer has it; back off. Either way we do not reap the mutex this pass.
-    if (await isRestartLockPathStale(reclaimPath, Date.now())) {
-      await unlink(reclaimPath).catch(() => {});
-    }
-    return;
-  }
-  try {
-    // Re-check the mutex UNDER the reclaim lock and remove it only if it is STILL
-    // stale by mtime. A live holder's mutex is fresh and is left untouched; a dead
-    // holder's mutex is stable here (its holder is gone and no other reclaimer can
-    // race us), so this re-check is authoritative.
-    if (await isRestartLockPathStale(mutexPath, Date.now())) {
-      await unlink(mutexPath).catch(() => {});
-    }
-  } finally {
-    await handle.close();
-    await unlink(reclaimPath).catch(() => {});
-  }
-}
-
-/**
- * Acquire the registry mutex, BLOCKING (with brief back-offs) until held, and
- * return a token-gated release. This is the single point of mutual exclusion for
- * the restart lock: acquire, release, and stale/timeout takeover all run under it,
- * so "read the lock, then mutate it" is atomic — no acquirer, releaser, or breaker
- * can ever interleave with another. A mutex left by a crashed holder is reclaimed
- * once it ages past the stale threshold.
- */
-async function acquireRestartMutex(
-  mutexPath: string,
-  token: string,
-): Promise<RestartGuard['release']> {
-  for (;;) {
-    let handle: Awaited<ReturnType<typeof open>>;
-    try {
-      handle = await open(mutexPath, 'wx');
-    } catch (err) {
-      if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
-        throw err;
-      }
-      // Staleness is judged by mtime, not content, so a mutex that exists but has
-      // not yet had its token written (the open-before-write window) reads as live
-      // and is never wrongly reaped.
-      if (!(await isRestartLockPathStale(mutexPath, Date.now()))) {
-        // A live holder has it — it will be gone in microseconds. Back off briefly.
-        await new Promise((resolve) => setTimeout(resolve, RESTART_MUTEX_RETRY_MS));
-        continue;
-      }
-      await reclaimStaleRestartMutex(mutexPath);
-      continue;
-    }
-    // We created the mutex. Populate it with our token; if writing fails, clean up
-    // our own file so we never leak an empty mutex that a peer would have to reap.
-    try {
-      await handle.writeFile(formatRestartLockContent(token));
-      await handle.close();
-    } catch (err) {
-      await handle.close().catch(() => {});
-      await unlink(mutexPath).catch(() => {});
-      throw err;
-    }
-    return async (): Promise<void> => {
-      if ((await readRestartLockToken(mutexPath)) !== token) return;
-      await unlink(mutexPath).catch(() => {});
-    };
-  }
-}
-
-/**
- * Acquire the fleet restart lock, serializing concurrent `mosaic fleet restart`
- * invocations across processes. Each restart tears the tmux holder (and the
- * agent sessions inside it) down and back up; without this guard a re-entrant
- * restart relaunches agents against a half-torn-down holder, which fails and
- * tight-loops. A re-entrant caller waits for the in-flight restart to release
- * the lock (clean shutdown settled) before proceeding, breaks a stale lock left
- * by a crashed owner, and after RESTART_LOCK_MAX_WAIT_MS breaks the lock to
- * avoid a permanent deadlock.
- *
- * Correctness rests on a single invariant: EVERY transition of the lock — taking
- * a free lock, taking over a stale/timed-out one, and releasing — happens under
- * the registry mutex. Because the check ("is the lock free / stale / fresh?") and
- * the mutation that follows it both run while the mutex is held, they are atomic:
- * no other acquirer, releaser, or breaker can slip in between. That is what makes
- * takeover a true compare-and-swap rather than a content-blind clobber — a normal
- * `open('wx')` acquirer cannot create a fresh lock in a gap, and the original
- * owner's `release()` (also mutex-gated and token-checked) cannot drop a lock a
- * breaker already took over. So no interleaving lets two restarts both own the
- * lock and run concurrently.
- */
-export async function acquireRestartLock(
-  mosaicHome: string,
-  sleepFn: SleepFn,
-): Promise<RestartGuard> {
-  const token = randomUUID();
-  const lockPath = restartLockPath(mosaicHome);
-  const mutexPath = restartMutexPath(lockPath);
-  await mkdir(dirname(lockPath), { recursive: true });
-  const release = async (): Promise<void> => {
-    // Mutex-gated and token-gated: only remove the lock if it is still ours. If
-    // another caller took it over (after a stale/timeout break) the token no
-    // longer matches and we leave their lock intact.
-    const releaseMutex = await acquireRestartMutex(mutexPath, token);
-    try {
-      if ((await readRestartLockToken(lockPath)) === token) {
-        await unlink(lockPath).catch(() => {});
-      }
-    } finally {
-      await releaseMutex();
-    }
-  };
-  const deadline = Date.now() + RESTART_LOCK_MAX_WAIT_MS;
-  for (;;) {
-    let owned = false;
-    const releaseMutex = await acquireRestartMutex(mutexPath, token);
-    try {
-      // Read and (if appropriate) mutate the lock atomically under the mutex.
-      let current: string | null = null;
-      let absent = false;
-      try {
-        current = await readFile(lockPath, 'utf8');
-      } catch (readErr) {
-        if ((readErr as NodeJS.ErrnoException).code === 'ENOENT') {
-          absent = true;
-        } else {
-          current = null; // Unreadable/corrupt: treat as stale.
-        }
-      }
-      const now = Date.now();
-      if (absent) {
-        // Lock is free — take it.
-        await writeFile(lockPath, formatRestartLockContent(token));
-        owned = true;
-      } else {
-        const stale = current === null || isRestartLockContentStale(current, now);
-        const timedOut = now >= deadline;
-        if (stale || timedOut) {
-          process.stderr.write(
-            stale
-              ? 'Breaking stale fleet restart lock.\n'
-              : `Timed out after ${RESTART_LOCK_MAX_WAIT_MS}ms waiting for the in-flight fleet ` +
-                  'restart; breaking the lock.\n',
-          );
-          // Takeover is just an overwrite — safe because we hold the mutex, so no
-          // acquirer or releaser can touch the lock between our read and this write.
-          await writeFile(lockPath, formatRestartLockContent(token));
-          owned = true;
-        }
-        // else: a fresh restart owns it — wait below and re-evaluate.
-      }
-    } finally {
-      await releaseMutex();
-    }
-    if (owned) {
-      return { release };
-    }
-    await sleepFn(RESTART_LOCK_POLL_INTERVAL_MS);
-  }
-}
-
 /**
 * Returns the systemctl --user enable command for a given unit.
 * Used by the install auto-enable step to persist units across reboots.
@@ -1472,7 +1172,6 @@ export function isSendAccepted(capturedOutput: string): SendVerifyResult {

 export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
  const runner = deps.runner ?? runCommand;
-  const sleepFn = deps.sleepFn ?? defaultSleep;
  const paths = resolveFleetPaths(deps.mosaicHome);
  const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();

@@ -1586,22 +1285,9 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
      .command(`${action} [agent]`)
      .description(`${action} the fleet holder or one agent`)
      .action(async (agent?: string) => {
-        const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
-        const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
        const roster = await loadRosterForCommand(cmd);
        if (agent) {
          getRosterAgent(roster, agent);
-          // Single-agent restart is guarded too: it can race a full restart that
-          // is tearing the shared holder down.
-          if (action === 'restart') {
-            const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
-            try {
-              await runChecked(runner, buildFleetServiceCommand(action, agent));
-            } finally {
-              await guard.release();
-            }
-            return;
-          }
          await runChecked(runner, buildFleetServiceCommand(action, agent));
          return;
        }
@@ -1612,21 +1298,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
          );
          return;
        }
-        if (action === 'restart') {
-          // Serialize the holder+agents teardown/relaunch behind the restart lock
-          // so a re-entrant restart waits for clean shutdown before relaunching,
-          // instead of racing a half-torn-down holder into a tight loop.
-          const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
-          try {
-            await runChecked(runner, buildFleetServiceCommand(action));
-            for (const rosterAgent of roster.agents) {
-              await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
-            }
-          } finally {
-            await guard.release();
-          }
-          return;
-        }
        await runChecked(runner, buildFleetServiceCommand(action));
        for (const rosterAgent of roster.agents) {
          await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
--- a/packages/mosaic/vitest.config.ts
+++ b/packages/mosaic/vitest.config.ts
@@ -4,6 +4,5 @@ export default defineConfig({
  test: {
    globals: true,
    environment: 'node',
-    testTimeout: 30_000,
  },
 });
--- a/tools/install.sh
+++ b/tools/install.sh
@@ -16,10 +16,6 @@
 #   --framework       Install/upgrade framework only (skip npm CLI)
 #   --cli             Install/upgrade npm CLI only (skip framework)
 #   --ref <branch>    Git ref for framework archive (default: main)
-#   --dev             Build CLI + gateway FROM SOURCE at --ref instead of the
-#                     registry @latest. Zero registry writes — packs local
-#                     tarballs and installs them globally. Use to test a branch
-#                     end-to-end before cutting a release.
 #   --yes             Accept all defaults; headless/non-interactive install
 #   --no-auto-launch  Skip automatic mosaic wizard + gateway install on first install
 #   --uninstall       Reverse the install: remove framework dir, CLI package, and npmrc line
@@ -31,7 +27,6 @@
 #   MOSAIC_PREFIX       — npm global prefix          (default: ~/.npm-global)
 #   MOSAIC_NO_COLOR     — disable colour             (set to 1)
 #   MOSAIC_REF          — git ref for framework      (default: main)
-#   MOSAIC_DEV          — equivalent to --dev         (set to 1)
 #   MOSAIC_ASSUME_YES   — equivalent to --yes        (set to 1)
 # ──────────────────────────────────────────────────────────────────────────────
 #
@@ -48,7 +43,6 @@ FLAG_CLI=true
 FLAG_NO_AUTO_LAUNCH=false
 FLAG_YES=false
 FLAG_UNINSTALL=false
-FLAG_DEV=false
 GIT_REF="${MOSAIC_REF:-main}"

 # MOSAIC_ASSUME_YES env var acts the same as --yes
@@ -56,18 +50,12 @@ if [[ "${MOSAIC_ASSUME_YES:-0}" == "1" ]]; then
  FLAG_YES=true
 fi

-# MOSAIC_DEV env var acts the same as --dev
-if [[ "${MOSAIC_DEV:-0}" == "1" ]]; then
-  FLAG_DEV=true
-fi
-
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --check)          FLAG_CHECK=true; shift ;;
    --framework)      FLAG_CLI=false; shift ;;
    --cli)            FLAG_FRAMEWORK=false; shift ;;
    --ref)            GIT_REF="${2:-main}"; shift 2 ;;
-    --dev)            FLAG_DEV=true; shift ;;
    --yes|-y)         FLAG_YES=true; shift ;;
    --no-auto-launch) FLAG_NO_AUTO_LAUNCH=true; shift ;;
    --uninstall)      FLAG_UNINSTALL=true; shift ;;
@@ -84,17 +72,6 @@ CLI_PKG="${SCOPE}/mosaic"
 REPO_BASE="https://git.mosaicstack.dev/mosaicstack/stack"
 ARCHIVE_URL="${REPO_BASE}/archive/${GIT_REF}.tar.gz"

-# In dev (build-from-source) mode the gateway is installed globally from a
-# locally-built tarball. Tell the wizard / gateway-config stage NOT to overwrite
-# it with the registry @latest build (honored by gatewayConfigStage).
-if [[ "$FLAG_DEV" == "true" ]]; then
-  export MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1
-fi
-
-# Shared monorepo checkout (populated on demand by ensure_monorepo).
-WORK_DIR=""
-EXTRACTED_DIR=""
-
 # ─── uninstall path ───────────────────────────────────────────────────────────
 # Shell-level uninstall for when the CLI is broken or not available.
 # Handles: framework directory, npm CLI package, npmrc scope line.
@@ -262,99 +239,6 @@ framework_version() {
  fi
 }

-# Download + extract the monorepo archive at $GIT_REF exactly once per run.
-# Sets the script-level EXTRACTED_DIR to the repo root. Reused by both the
-# framework install (Part 1) and the dev build-from-source path (Part 2).
-ensure_monorepo() {
-  if [[ -n "$EXTRACTED_DIR" ]] && [[ -d "$EXTRACTED_DIR" ]]; then
-    return 0
-  fi
-
-  require_cmd tar
-
-  WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
-  # shellcheck disable=SC2317
-  cleanup_work() { [[ -n "$WORK_DIR" ]] && rm -rf "$WORK_DIR"; }
-  trap cleanup_work EXIT
-
-  info "Downloading source from ${GIT_REF}…"
-  if command -v curl &>/dev/null; then
-    curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
-  elif command -v wget &>/dev/null; then
-    wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
-  else
-    fail "curl or wget required to download source."
-    exit 1
-  fi
-
-  # Gitea archives extract to <repo-name>/ inside the work dir
-  EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
-  if [[ -z "$EXTRACTED_DIR" ]] || [[ ! -d "$EXTRACTED_DIR" ]]; then
-    fail "Could not locate extracted source in archive."
-    ls -la "$WORK_DIR" >&2
-    exit 1
-  fi
-}
-
-# Build @mosaicstack/mosaic + @mosaicstack/gateway from source and install both
-# globally from locally-packed tarballs. ZERO registry writes. Workspace deps
-# (brain/config/db/…) are pulled from the registry at the versions pinned in
-# each package.json — `pnpm pack` rewrites `workspace:*` to those versions.
-install_cli_from_source() {
-  local src="$EXTRACTED_DIR"
-  local out_dir="$WORK_DIR/dist-tarballs"
-  mkdir -p "$out_dir"
-
-  # pnpm via corepack (ships with Node >= 16.9; required by Node >= 20 preflight).
-  # Pin to the repo's packageManager version so the build matches CI. Surface
-  # corepack failures so the fresh-machine case gives an actionable error
-  # instead of a bare "command not found".
-  if ! command -v pnpm &>/dev/null; then
-    info "Activating pnpm via corepack…"
-    corepack enable 2>&1 | sed 's/^/  /' || warn "corepack enable failed — pnpm may need manual install."
-    corepack prepare pnpm@10.6.2 --activate 2>&1 | sed 's/^/  /' \
-      || warn "corepack prepare failed — pnpm may need manual install."
-  fi
-  if ! command -v pnpm &>/dev/null; then
-    fail "pnpm not available after corepack activation."
-    echo "  Install pnpm manually (https://pnpm.io/installation) and re-run with --dev."
-    exit 1
-  fi
-
-  info "Installing workspace dependencies (pnpm install)…"
-  ( cd "$src" && pnpm install ) 2>&1 | sed 's/^/  /'
-
-  info "Building CLI + gateway from source…"
-  ( cd "$src" && pnpm --filter "@mosaicstack/mosaic..." --filter "@mosaicstack/gateway..." run build ) 2>&1 | sed 's/^/  /'
-
-  info "Packing local tarballs…"
-  ( cd "$src/packages/mosaic" && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/  /'
-  ( cd "$src/apps/gateway"    && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/  /'
-
-  local cli_tgz gw_tgz
-  cli_tgz="$(ls -1t "$out_dir"/mosaicstack-mosaic-*.tgz 2>/dev/null | head -1)"
-  gw_tgz="$(ls -1t "$out_dir"/mosaicstack-gateway-*.tgz 2>/dev/null | head -1)"
-
-  if [[ ! -f "$cli_tgz" ]]; then
-    fail "CLI tarball was not produced by pnpm pack."
-    exit 1
-  fi
-  if [[ ! -f "$gw_tgz" ]]; then
-    fail "Gateway tarball was not produced by pnpm pack."
-    exit 1
-  fi
-
-  # Gateway first so it is present globally before the CLI's wizard runs (which
-  # skips its own gateway install via MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1).
-  info "Installing gateway from source tarball (global)…"
-  npm install -g "$gw_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/  /'
-
-  info "Installing CLI from source tarball (global)…"
-  npm install -g "$cli_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/  /'
-
-  ok "Installed from source: CLI $(installed_cli_version)"
-}
-
 # ─── preflight ────────────────────────────────────────────────────────────────

 require_cmd node
@@ -398,8 +282,25 @@ if [[ "$FLAG_FRAMEWORK" == "true" ]]; then
      warn "Framework not installed."
    fi
  else
-    # Download repo archive and extract framework (shared with the dev build)
-    ensure_monorepo
+    # Download repo archive and extract framework
+    require_cmd tar
+
+    WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
+    cleanup_work() { rm -rf "$WORK_DIR"; }
+    trap cleanup_work EXIT
+
+    info "Downloading framework from ${GIT_REF}…"
+    if command -v curl &>/dev/null; then
+      curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
+    elif command -v wget &>/dev/null; then
+      wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
+    else
+      fail "curl or wget required to download framework."
+      exit 1
+    fi
+
+    # Gitea archives extract to <repo-name>/ inside the work dir
+    EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
    FRAMEWORK_SRC="$EXTRACTED_DIR/packages/mosaic/framework"

    if [[ ! -d "$FRAMEWORK_SRC" ]]; then
@@ -455,11 +356,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
  fi

  CURRENT="$(installed_cli_version)"
-  if [[ "$FLAG_DEV" == "true" ]]; then
-    LATEST=""
-  else
-    LATEST="$(latest_cli_version)"
-  fi
+  LATEST="$(latest_cli_version)"

  if [[ -n "$CURRENT" ]]; then
    dim "  Installed: ${CLI_PKG}@${CURRENT}"
@@ -467,9 +364,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
    dim "  Installed: (none)"
  fi

-  if [[ "$FLAG_DEV" == "true" ]]; then
-    dim "  Source:    ${REPO_BASE} (ref: ${GIT_REF}, build-from-source)"
-  elif [[ -n "$LATEST" ]]; then
+  if [[ -n "$LATEST" ]]; then
    dim "  Latest:    ${CLI_PKG}@${LATEST}"
  else
    dim "  Latest:    (registry unreachable)"
@@ -477,9 +372,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
  echo ""

  if [[ "$FLAG_CHECK" == "true" ]]; then
-    if [[ "$FLAG_DEV" == "true" ]]; then
-      info "Dev mode: installed version is ${CURRENT:-(none)} (no registry comparison)."
-    elif [[ -z "$LATEST" ]]; then
+    if [[ -z "$LATEST" ]]; then
      warn "Could not reach registry."
    elif [[ -z "$CURRENT" ]]; then
      warn "Not installed."
@@ -490,16 +383,6 @@ if [[ "$FLAG_CLI" == "true" ]]; then
    else
      ok "Up to date (or ahead of registry)."
    fi
-  elif [[ "$FLAG_DEV" == "true" ]]; then
-    info "Dev mode — building CLI + gateway from source at ref ${GIT_REF}…"
-    ensure_monorepo
-    install_cli_from_source
-
-    # PATH check for npm prefix
-    if [[ ":$PATH:" != *":$PREFIX/bin:"* ]]; then
-      warn "$PREFIX/bin is not on your PATH"
-      dim "  Add to your shell rc:  export PATH=\"$PREFIX/bin:\$PATH\""
-    fi
  else
    if [[ -z "$LATEST" ]]; then
      warn "Could not reach registry at $REGISTRY — skipping npm CLI."