feat(#462 ): add federation list verb

2026-06-24 18:24:57 -05:00
9 changed files with 875 additions and 831 deletions
--- a/apps/gateway/src/federation/federation.module.ts
+++ b/apps/gateway/src/federation/federation.module.ts
@@ -8,9 +8,11 @@ import { CapabilitiesController } from './server/verbs/capabilities.controller.j
 import { GrantsService } from './grants.service.js';
 import { FederationClientService, QuerySourceService } from './client/index.js';
 import { FederationAuthGuard, FederationScopeService } from './server/index.js';
 import { ListController } from './server/verbs/list.controller.js';
 import { FederationListQueryService } from './server/verbs/list-query.service.js';
@Module({
-  controllers: [EnrollmentController, FederationController, CapabilitiesController],
+  controllers: [EnrollmentController, FederationController, CapabilitiesController, ListController],
  providers: [
    AdminGuard,
    CaService,
@@ -20,6 +22,7 @@ import { FederationAuthGuard, FederationScopeService } from './server/index.js';
    QuerySourceService,
    FederationAuthGuard,
    FederationScopeService,
    FederationListQueryService,
  ],
  exports: [
    CaService,
@@ -29,6 +32,7 @@ import { FederationAuthGuard, FederationScopeService } from './server/index.js';
    QuerySourceService,
    FederationAuthGuard,
    FederationScopeService,
    FederationListQueryService,
  ],
 })
 export class FederationModule {}
--- a/apps/gateway/src/federation/server/verbs/tests/list-query.service.spec.ts
+++ b/apps/gateway/src/federation/server/verbs/tests/list-query.service.spec.ts
@@ -0,0 +1,118 @@
 import { describe, expect, it, vi } from 'vitest';
 import type { Db } from '@mosaicstack/db';
 import type { FederationScopeQueryFilter } from '../../scope.service.js';
 import { FederationListQueryService } from '../list-query.service.js';
 const TASK_FILTER: FederationScopeQueryFilter = {
  resource: 'tasks',
  subjectUserId: 'user-1',
  includePersonal: true,
  teamIds: [],
  limit: 2,
  maxRowsPerQuery: 2,
 };
 function makeService() {
  return new FederationListQueryService({} as Db);
 }
 function stubRows(
  service: FederationListQueryService,
  ...pages: Array<Array<Record<string, unknown>>>
 ) {
  const mock = vi.fn();
  for (const page of pages) {
    mock.mockResolvedValueOnce(page);
  }
  (
    service as unknown as {
      listAllRows: (
        _filter: FederationScopeQueryFilter,
        _rowLimit: number,
        _cursor: unknown,
      ) => Promise<Array<Record<string, unknown>>>;
    }
  ).listAllRows = mock;
  return mock;
 }
 describe('FederationListQueryService', () => {
  it('denies sensitive resources in native RBAC for M3 list reads', async () => {
    const service = makeService();
    await expect(
      service.evaluateReadAccess({
        grantId: 'grant-1',
        peerId: 'peer-1',
        subjectUserId: 'user-1',
        resource: 'credentials',
      }),
    ).resolves.toMatchObject({
      allowed: false,
      reason: 'credentials federation list access is not implemented in M3',
    });
  });
  it('allows personal memory reads without requiring team lookup', async () => {
    const service = makeService();
    await expect(
      service.evaluateReadAccess({
        grantId: 'grant-1',
        peerId: 'peer-1',
        subjectUserId: 'user-1',
        resource: 'memory',
      }),
    ).resolves.toEqual({
      allowed: true,
      access: { includePersonal: true, teamIds: [] },
    });
  });
  it('applies the scope row cap and returns an opaque next cursor when truncated', async () => {
    const service = makeService();
    const listAllRows = stubRows(
      service,
      [
        { id: '3', createdAt: new Date('2026-06-24T03:00:00.000Z') },
        { id: '2', createdAt: new Date('2026-06-24T02:00:00.000Z') },
        { id: '1', createdAt: new Date('2026-06-24T01:00:00.000Z') },
      ],
      [{ id: '1', createdAt: new Date('2026-06-24T01:00:00.000Z') }],
    );
    const firstPage = await service.list({ filter: TASK_FILTER });
    expect(firstPage).toEqual({
      items: [
        { id: '3', createdAt: new Date('2026-06-24T03:00:00.000Z') },
        { id: '2', createdAt: new Date('2026-06-24T02:00:00.000Z') },
      ],
      truncated: true,
      nextCursor: expect.any(String),
    });
    expect(listAllRows).toHaveBeenNthCalledWith(1, TASK_FILTER, 3, undefined);
    const secondPage = await service.list({ filter: TASK_FILTER, cursor: firstPage.nextCursor });
    expect(secondPage).toEqual({
      items: [{ id: '1', createdAt: new Date('2026-06-24T01:00:00.000Z') }],
      truncated: false,
    });
    expect(listAllRows).toHaveBeenNthCalledWith(
      2,
      TASK_FILTER,
      3,
      expect.objectContaining({ id: '2' }),
    );
  });
  it('rejects invalid cursors instead of falling back to the first page', async () => {
    const service = makeService();
    stubRows(service, [{ id: '1' }]);
    await expect(service.list({ filter: TASK_FILTER, cursor: 'not-base64-json' })).rejects.toThrow(
      'Invalid federation list cursor',
    );
  });
 });
--- a/apps/gateway/src/federation/server/verbs/tests/list.controller.spec.ts
+++ b/apps/gateway/src/federation/server/verbs/tests/list.controller.spec.ts
@@ -0,0 +1,170 @@
 import 'reflect-metadata';
 import { RequestMethod } from '@nestjs/common';
 import type { FastifyRequest } from 'fastify';
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 import { FederationAuthGuard } from '../../federation-auth.guard.js';
 import type {
  FederationScopeEvaluationResult,
  FederationScopeQueryFilter,
 } from '../../scope.service.js';
 import { ListController } from '../list.controller.js';
 import type { FederationListQueryResult } from '../list-query.service.js';
 const FEDERATION_CONTEXT = {
  grantId: 'grant-1',
  peerId: 'peer-1',
  subjectUserId: 'user-1',
  scope: { resources: ['tasks'], max_rows_per_query: 25 },
 };
 const TASK_FILTER: FederationScopeQueryFilter = {
  resource: 'tasks',
  subjectUserId: 'user-1',
  includePersonal: true,
  teamIds: ['team-1'],
  limit: 10,
  maxRowsPerQuery: 25,
 };
 function makeRequest(): FastifyRequest {
  return { federationContext: FEDERATION_CONTEXT } as unknown as FastifyRequest;
 }
 function allowedScope(
  filter: FederationScopeQueryFilter = TASK_FILTER,
 ): FederationScopeEvaluationResult {
  return { allowed: true, filter };
 }
 function makeController(opts?: {
  scopeResult?: FederationScopeEvaluationResult;
  queryResult?: FederationListQueryResult;
 }) {
  const scope = {
    evaluateAccess: vi.fn().mockResolvedValue(opts?.scopeResult ?? allowedScope()),
  };
  const query = {
    evaluateReadAccess: vi.fn(),
    list: vi.fn().mockResolvedValue(
      opts?.queryResult ?? {
        items: [
          {
            id: 'task-1',
            title: 'Federated task',
            createdAt: new Date('2026-06-24T00:00:00.000Z'),
          },
        ],
        truncated: false,
      },
    ),
  };
  return {
    controller: new ListController(scope as never, query as never),
    scope,
    query,
  };
 }
 describe('ListController', () => {
  beforeEach(() => {
    vi.clearAllMocks();
  });
  it('declares POST /api/federation/v1/list/:resource protected only by FederationAuthGuard', () => {
    expect(Reflect.getMetadata('path', ListController)).toBe('api/federation/v1/list');
    expect(Reflect.getMetadata('path', ListController.prototype.list)).toBe(':resource');
    expect(Reflect.getMetadata('method', ListController.prototype.list)).toBe(RequestMethod.POST);
    expect(Reflect.getMetadata('__guards__', ListController)).toEqual([FederationAuthGuard]);
  });
  it('runs AuthGuard context through ScopeService and returns local-source tagged rows', async () => {
    const { controller, scope, query } = makeController();
    const response = await controller.list('tasks', makeRequest(), { limit: 10 });
    expect(scope.evaluateAccess).toHaveBeenCalledWith({
      context: FEDERATION_CONTEXT,
      resource: 'tasks',
      requestedLimit: 10,
      nativeRbac: query,
    });
    expect(query.list).toHaveBeenCalledWith({ filter: TASK_FILTER, cursor: undefined });
    expect(response).toEqual({
      items: [
        {
          id: 'task-1',
          title: 'Federated task',
          createdAt: new Date('2026-06-24T00:00:00.000Z'),
          _source: 'local',
        },
      ],
    });
  });
  it('preserves pagination metadata when row cap truncates the query layer result', async () => {
    const { controller } = makeController({
      queryResult: {
        items: [{ id: 'task-1' }],
        nextCursor: 'cursor-2',
        truncated: true,
      },
    });
    const response = await controller.list('tasks', makeRequest(), { cursor: 'cursor-1' });
    expect(response).toEqual({
      items: [{ id: 'task-1', _source: 'local' }],
      nextCursor: 'cursor-2',
      _truncated: true,
    });
  });
  it('returns a federation error envelope when scope evaluation denies access', async () => {
    const { controller, query } = makeController({
      scopeResult: {
        allowed: false,
        deny: {
          code: 'resource_excluded',
          stage: 'resource_exclusion',
          statusCode: 403,
          message: 'Requested federation resource is explicitly excluded by grant scope',
          grantId: 'grant-1',
          peerId: 'peer-1',
          subjectUserId: 'user-1',
          resource: 'credentials',
        },
      },
    });
    await expect(controller.list('credentials', makeRequest(), {})).rejects.toMatchObject({
      response: {
        error: {
          code: 'scope_violation',
          message: 'Requested federation resource is explicitly excluded by grant scope',
        },
      },
      status: 403,
    });
    expect(query.list).not.toHaveBeenCalled();
  });
  it('rejects malformed request body fields before querying storage', async () => {
    const { controller, scope, query } = makeController();
    await expect(controller.list('tasks', makeRequest(), { cursor: 123 })).rejects.toMatchObject({
      response: { error: { code: 'invalid_request' } },
      status: 400,
    });
    await expect(controller.list('tasks', makeRequest(), { limit: false })).rejects.toMatchObject({
      response: { error: { code: 'invalid_request' } },
      status: 400,
    });
    await expect(controller.list('tasks', makeRequest(), { limit: 'abc' })).rejects.toMatchObject({
      response: { error: { code: 'invalid_request' } },
      status: 400,
    });
    expect(scope.evaluateAccess).not.toHaveBeenCalled();
    expect(query.list).not.toHaveBeenCalled();
  });
 });
--- a/apps/gateway/src/federation/server/verbs/list-query.service.ts
+++ b/apps/gateway/src/federation/server/verbs/list-query.service.ts
@@ -0,0 +1,365 @@
 /**
 * Federation list query layer (FED-M3-05).
 *
 * Read-only DB adapter used by ListController after FederationAuthGuard and
 * FederationScopeService have established the subject user, allowed resource,
 * native-RBAC intersection, and row cap. Audit writes are intentionally
 * deferred to M4.
 */
 import { Inject, Injectable } from '@nestjs/common';
 import {
  and,
  desc,
  eq,
  inArray,
  insights,
  isNotNull,
  lt,
  missionTasks,
  missions,
  or,
  preferences,
  projects,
  tasks,
  teamMembers,
  type Db,
 } from '@mosaicstack/db';
 import type {
  FederationNativeRbacEvaluator,
  FederationNativeRbacRequest,
  FederationNativeRbacResult,
  FederationScopeQueryFilter,
 } from '../scope.service.js';
 import { DB } from '../../../database/database.module.js';
 export interface FederationListQueryRequest {
  readonly filter: FederationScopeQueryFilter;
  readonly cursor?: string;
 }
 export interface FederationListQueryResult<T extends object = Record<string, unknown>> {
  readonly items: T[];
  readonly nextCursor?: string;
  readonly truncated: boolean;
 }
 type RowObject = Record<string, unknown>;
 interface KeysetCursor {
  readonly createdAt: Date;
  readonly id: string;
 }
 function encodeCursor(row: RowObject): string | undefined {
  const createdAt = row['createdAt'];
  const id = row['id'];
  if (!(createdAt instanceof Date) || typeof id !== 'string') {
    return undefined;
  }
  return Buffer.from(JSON.stringify({ createdAt: createdAt.toISOString(), id }), 'utf8').toString(
    'base64url',
  );
 }
 function decodeCursor(cursor: string | undefined): KeysetCursor | undefined {
  if (cursor === undefined) {
    return undefined;
  }
  try {
    const parsed = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf8')) as unknown;
    if (typeof parsed !== 'object' || parsed === null) {
      throw new Error('cursor must be an object');
    }
    const { createdAt, id } = parsed as { createdAt?: unknown; id?: unknown };
    if (typeof createdAt !== 'string' || typeof id !== 'string' || id.length === 0) {
      throw new Error('cursor is missing createdAt or id');
    }
    const date = new Date(createdAt);
    if (Number.isNaN(date.getTime())) {
      throw new Error('cursor createdAt is invalid');
    }
    return { createdAt: date, id };
  } catch {
    throw new Error('Invalid federation list cursor');
  }
 }
 function paginate<T extends RowObject>(rows: T[], limit: number): FederationListQueryResult<T> {
  const page = rows.slice(0, limit);
  const hasMore = rows.length > limit;
  const nextCursor = hasMore ? encodeCursor(page[page.length - 1] ?? {}) : undefined;
  return {
    items: page,
    truncated: hasMore,
    ...(nextCursor !== undefined ? { nextCursor } : {}),
  };
 }
 function sortRows(rows: RowObject[]): RowObject[] {
  return [...rows].sort((a, b) => {
    const aTime = a['createdAt'] instanceof Date ? a['createdAt'].getTime() : 0;
    const bTime = b['createdAt'] instanceof Date ? b['createdAt'].getTime() : 0;
    if (aTime !== bTime) {
      return bTime - aTime;
    }
    return String(b['id'] ?? '').localeCompare(String(a['id'] ?? ''));
  });
 }
@Injectable()
 export class FederationListQueryService implements FederationNativeRbacEvaluator {
  constructor(@Inject(DB) private readonly db: Db) {}
  async evaluateReadAccess(
    request: FederationNativeRbacRequest,
  ): Promise<FederationNativeRbacResult> {
    if (request.resource === 'credentials' || request.resource === 'api_keys') {
      return {
        allowed: false,
        reason: `${request.resource} federation list access is not implemented in M3`,
        details: { resource: request.resource },
      };
    }
    if (request.resource === 'memory') {
      return { allowed: true, access: { includePersonal: true, teamIds: [] } };
    }
    const teamIds = await this.listSubjectTeamIds(request.subjectUserId);
    return { allowed: true, access: { includePersonal: true, teamIds } };
  }
  async list<T extends RowObject = RowObject>(
    request: FederationListQueryRequest,
  ): Promise<FederationListQueryResult<T>> {
    const cursor = decodeCursor(request.cursor);
    const rows = await this.listAllRows(request.filter, request.filter.limit + 1, cursor);
    return paginate(rows as T[], request.filter.limit);
  }
  private async listAllRows(
    filter: FederationScopeQueryFilter,
    rowLimit: number,
    cursor: KeysetCursor | undefined,
  ): Promise<RowObject[]> {
    switch (filter.resource) {
      case 'tasks':
        return this.listTasks(filter, rowLimit, cursor);
      case 'notes':
        return this.listNotes(filter, rowLimit, cursor);
      case 'memory':
        return this.listMemory(filter, rowLimit, cursor);
      case 'credentials':
      case 'api_keys':
        return [];
    }
  }
  private async listSubjectTeamIds(subjectUserId: string): Promise<string[]> {
    const rows = await this.db
      .select({ teamId: teamMembers.teamId })
      .from(teamMembers)
      .where(eq(teamMembers.userId, subjectUserId));
    return rows.map((row) => row.teamId);
  }
  private async listAccessibleProjectIds(filter: FederationScopeQueryFilter): Promise<string[]> {
    const clauses = [];
    if (filter.includePersonal) {
      clauses.push(and(eq(projects.ownerType, 'user'), eq(projects.ownerId, filter.subjectUserId)));
    }
    if (filter.teamIds.length > 0) {
      clauses.push(
        and(eq(projects.ownerType, 'team'), inArray(projects.teamId, [...filter.teamIds])),
      );
    }
    if (clauses.length === 0) {
      return [];
    }
    const rows = await this.db
      .select({ id: projects.id })
      .from(projects)
      .where(clauses.length === 1 ? clauses[0] : or(...clauses));
    return rows.map((row) => row.id);
  }
  private async listMissionIds(projectIds: readonly string[]): Promise<string[]> {
    if (projectIds.length === 0) {
      return [];
    }
    const rows = await this.db
      .select({ id: missions.id })
      .from(missions)
      .where(inArray(missions.projectId, [...projectIds]));
    return rows.map((row) => row.id);
  }
  private async listTasks(
    filter: FederationScopeQueryFilter,
    rowLimit: number,
    cursor: KeysetCursor | undefined,
  ): Promise<RowObject[]> {
    const projectIds = await this.listAccessibleProjectIds(filter);
    const missionIds = await this.listMissionIds(projectIds);
    const clauses = [];
    if (projectIds.length > 0) {
      clauses.push(inArray(tasks.projectId, projectIds));
    }
    if (missionIds.length > 0) {
      clauses.push(inArray(tasks.missionId, missionIds));
    }
    if (clauses.length === 0) {
      return [];
    }
    const scopeClause = clauses.length === 1 ? clauses[0] : or(...clauses);
    const cursorClause = cursor
      ? or(
          lt(tasks.createdAt, cursor.createdAt),
          and(eq(tasks.createdAt, cursor.createdAt), lt(tasks.id, cursor.id)),
        )
      : undefined;
    const rows = await this.db
      .select({
        id: tasks.id,
        title: tasks.title,
        description: tasks.description,
        status: tasks.status,
        priority: tasks.priority,
        projectId: tasks.projectId,
        missionId: tasks.missionId,
        assignee: tasks.assignee,
        tags: tasks.tags,
        dueDate: tasks.dueDate,
        metadata: tasks.metadata,
        createdAt: tasks.createdAt,
        updatedAt: tasks.updatedAt,
      })
      .from(tasks)
      .where(and(scopeClause, cursorClause))
      .orderBy(desc(tasks.createdAt), desc(tasks.id))
      .limit(rowLimit);
    return sortRows(rows as RowObject[]);
  }
  private async listNotes(
    filter: FederationScopeQueryFilter,
    rowLimit: number,
    cursor: KeysetCursor | undefined,
  ): Promise<RowObject[]> {
    const projectIds = await this.listAccessibleProjectIds(filter);
    const missionIds = await this.listMissionIds(projectIds);
    const clauses = [];
    if (filter.includePersonal) {
      clauses.push(eq(missionTasks.userId, filter.subjectUserId));
    }
    if (missionIds.length > 0) {
      clauses.push(inArray(missionTasks.missionId, missionIds));
    }
    if (clauses.length === 0) {
      return [];
    }
    const scopeClause = clauses.length === 1 ? clauses[0] : or(...clauses);
    const cursorClause = cursor
      ? or(
          lt(missionTasks.createdAt, cursor.createdAt),
          and(eq(missionTasks.createdAt, cursor.createdAt), lt(missionTasks.id, cursor.id)),
        )
      : undefined;
    const rows = await this.db
      .select({
        id: missionTasks.id,
        missionId: missionTasks.missionId,
        taskId: missionTasks.taskId,
        status: missionTasks.status,
        content: missionTasks.notes,
        createdAt: missionTasks.createdAt,
        updatedAt: missionTasks.updatedAt,
      })
      .from(missionTasks)
      .where(and(scopeClause, cursorClause, isNotNull(missionTasks.notes)))
      .orderBy(desc(missionTasks.createdAt), desc(missionTasks.id))
      .limit(rowLimit);
    return sortRows(rows.filter((row) => row.content !== '') as RowObject[]);
  }
  private async listMemory(
    filter: FederationScopeQueryFilter,
    rowLimit: number,
    cursor: KeysetCursor | undefined,
  ): Promise<RowObject[]> {
    if (!filter.includePersonal) {
      return [];
    }
    const insightCursorClause = cursor
      ? or(
          lt(insights.createdAt, cursor.createdAt),
          and(eq(insights.createdAt, cursor.createdAt), lt(insights.id, cursor.id)),
        )
      : undefined;
    const preferenceCursorClause = cursor
      ? or(
          lt(preferences.createdAt, cursor.createdAt),
          and(eq(preferences.createdAt, cursor.createdAt), lt(preferences.id, cursor.id)),
        )
      : undefined;
    const [insightRows, preferenceRows] = await Promise.all([
      this.db
        .select({
          id: insights.id,
          kind: insights.source,
          content: insights.content,
          category: insights.category,
          relevanceScore: insights.relevanceScore,
          metadata: insights.metadata,
          createdAt: insights.createdAt,
          updatedAt: insights.updatedAt,
        })
        .from(insights)
        .where(and(eq(insights.userId, filter.subjectUserId), insightCursorClause))
        .orderBy(desc(insights.createdAt), desc(insights.id))
        .limit(rowLimit),
      this.db
        .select({
          id: preferences.id,
          kind: preferences.category,
          key: preferences.key,
          value: preferences.value,
          source: preferences.source,
          mutable: preferences.mutable,
          createdAt: preferences.createdAt,
          updatedAt: preferences.updatedAt,
        })
        .from(preferences)
        .where(and(eq(preferences.userId, filter.subjectUserId), preferenceCursorClause))
        .orderBy(desc(preferences.createdAt), desc(preferences.id))
        .limit(rowLimit),
    ]);
    return sortRows([...(insightRows as RowObject[]), ...(preferenceRows as RowObject[])]);
  }
 }
--- a/apps/gateway/src/federation/server/verbs/list.controller.ts
+++ b/apps/gateway/src/federation/server/verbs/list.controller.ts
@@ -0,0 +1,143 @@
 /**
 * Federation list verb (FED-M3-05).
 *
 * POST /api/federation/v1/list/:resource
 *
 * Pipeline: FederationAuthGuard attaches the active grant context, then
 * FederationScopeService enforces grant scope + native RBAC intersection, then
 * the read-only query layer returns capped rows tagged with `_source`. Read
 * audit-log writes are deferred to M4; this controller does not persist request
 * or response bodies.
 */
 import {
  Body,
  Controller,
  HttpException,
  Inject,
  Param,
  Post,
  Req,
  UseGuards,
 } from '@nestjs/common';
 import type { FastifyRequest } from 'fastify';
 import {
  FederationInvalidRequestError,
  FederationScopeViolationError,
  SOURCE_LOCAL,
  tagWithSource,
  type FederationListResponse,
  type SourceTag,
 } from '@mosaicstack/types';
 import { FederationAuthGuard } from '../federation-auth.guard.js';
 import '../federation-context.js';
 import { FederationScopeService } from '../scope.service.js';
 import { FederationListQueryService } from './list-query.service.js';
 interface FederationListRequestBody {
  readonly limit?: unknown;
  readonly cursor?: unknown;
 }
 type FederatedRow = Record<string, unknown> & SourceTag;
 function parseLimit(body: FederationListRequestBody | undefined): number | undefined {
  if (body?.limit === undefined) {
    return undefined;
  }
  const parsed =
    typeof body.limit === 'number'
      ? body.limit
      : typeof body.limit === 'string' && body.limit.trim().length > 0
        ? Number(body.limit)
        : Number.NaN;
  if (!Number.isSafeInteger(parsed) || parsed < 1) {
    throw new HttpException(
      new FederationInvalidRequestError(
        'Federation list limit must be a positive integer',
      ).toEnvelope(),
      400,
    );
  }
  return parsed;
 }
 function parseCursor(body: FederationListRequestBody | undefined): string | undefined {
  if (body?.cursor === undefined) {
    return undefined;
  }
  if (typeof body.cursor === 'string') {
    return body.cursor;
  }
  throw new HttpException(
    new FederationInvalidRequestError('Federation list cursor must be a string').toEnvelope(),
    400,
  );
 }
@Controller('api/federation/v1/list')
@UseGuards(FederationAuthGuard)
 export class ListController {
  constructor(
    @Inject(FederationScopeService) private readonly scope: FederationScopeService,
    @Inject(FederationListQueryService) private readonly query: FederationListQueryService,
  ) {}
  @Post(':resource')
  async list(
    @Param('resource') resource: string,
    @Req() request: FastifyRequest,
    @Body() body?: FederationListRequestBody,
  ): Promise<FederationListResponse<FederatedRow>> {
    if (!request.federationContext) {
      throw new Error('Federation context missing after auth guard');
    }
    const requestedLimit = parseLimit(body);
    const cursor = parseCursor(body);
    const scopeResult = await this.scope.evaluateAccess({
      context: request.federationContext,
      resource,
      requestedLimit,
      nativeRbac: this.query,
    });
    if (!scopeResult.allowed) {
      const ErrorClass =
        scopeResult.deny.statusCode === 400
          ? FederationInvalidRequestError
          : FederationScopeViolationError;
      throw new HttpException(
        new ErrorClass(scopeResult.deny.message, scopeResult.deny).toEnvelope(),
        scopeResult.deny.statusCode,
      );
    }
    let result: Awaited<ReturnType<FederationListQueryService['list']>>;
    try {
      result = await this.query.list({ filter: scopeResult.filter, cursor });
    } catch (error: unknown) {
      if (error instanceof Error && error.message === 'Invalid federation list cursor') {
        throw new HttpException(
          new FederationInvalidRequestError('Federation list cursor is invalid').toEnvelope(),
          400,
        );
      }
      throw error;
    }
    const response: FederationListResponse<FederatedRow> = {
      items: tagWithSource(result.items, SOURCE_LOCAL),
    };
    if (result.nextCursor !== undefined) {
      response.nextCursor = result.nextCursor;
    }
    if (result.truncated) {
      response._truncated = true;
    }
    return response;
  }
 }
--- a/docs/scratchpads/FED-M3-05-list-verb.md
+++ b/docs/scratchpads/FED-M3-05-list-verb.md
@@ -0,0 +1,51 @@
 # FED-M3-05 — Federation List Verb Scratchpad
 ## Objective
 Implement `POST /api/federation/v1/list/:resource`.
 ## Scope
 - Wire `FederationAuthGuard` → `FederationScopeService` → read-only list query layer.
 - Apply `max_rows_per_query` row cap and return pagination metadata when truncated.
 - Tag returned rows with `_source: "local"`.
 - Keep audit writes deferred to M4.
 - No request/response body persistence.
 ## Base / branch
 - Branch: `feat/federation-m3-verb-list`
 - Base: `feat/federation-m3-scope-service` (PR #672), per orchestrator, because M3-04 is not merged yet.
 - Rebase target after #672 merges: `main`.
 ## Implementation notes
 - Added `ListController` under `apps/gateway/src/federation/server/verbs/`.
 - Added `FederationListQueryService` as the read-only query layer and native RBAC evaluator.
 - Query resources supported in M3 list path:
  - `tasks`: project/mission scoped tasks visible through personal/team project access.
  - `notes`: non-empty `mission_tasks.notes` rows visible through personal/team mission access.
  - `memory`: user-owned `insights` and `preferences` rows.
  - `credentials` / `api_keys`: denied by native RBAC in M3 even if present in scope; sensitive-resource implementation is not part of FED-M3-05.
 - Cursor pagination uses an opaque base64url keyset cursor over `(createdAt, id)`; DB reads fetch at most `limit + 1` rows per resource query.
 ## Tests
 - `pnpm --filter @mosaicstack/gateway test -- list.controller.spec.ts list-query.service.spec.ts` — PASS (9 tests).
 - `pnpm --filter @mosaicstack/gateway typecheck` — PASS.
 - `pnpm --filter @mosaicstack/gateway lint` — PASS.
 - `pnpm format:check` — PASS.
 - `pnpm typecheck` — PASS (41/41 turbo tasks).
 - `pnpm lint` — PASS (23/23 turbo tasks).
 - `pnpm --filter @mosaicstack/gateway test` — FAIL in pre-existing/live-DB integration suite: `apps/gateway/src/__tests__/cross-user-isolation.test.ts` cleanup cannot connect to local PostgreSQL on `localhost:5433`. New list tests pass; failure is outside FED-M3-05.
 ## Review evidence
 - `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — PASS after remediation; approve, no findings.
 - `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — PASS after cursor remediation; risk level none, no findings.
 - Security-review note: read-path audit logging remains intentionally deferred to M4 per orchestrator clarification and FED-M3-05 scope.
 ## Risks / follow-up
 - This branch intentionally includes M3-04 diff until PR #672 lands; final PR must be rebased onto main after #672 merges.
 - Current branch base predates the M3-07 capabilities module registration; expect a small `FederationModule` rebase conflict once #672 and #674 are both on main.
--- a/packages/mosaic/src/commands/fleet.spec.ts
+++ b/packages/mosaic/src/commands/fleet.spec.ts
@@ -4,7 +4,6 @@ import { dirname, join, resolve } from 'node:path';
 import { Command } from 'commander';
 import { afterEach, describe, expect, it, vi } from 'vitest';
 import {
  acquireRestartLock,
  addAgentToRoster,
  buildAgentSendCommand,
  buildAgentWatchAttachCommand,
@@ -46,8 +45,6 @@ import {
  removeAgentFromRoster,
  resolveFleetPaths,
  resolvePresetFilename,
  restartLockPath,
  RESTART_LOCK_STALE_MS,
  RUNTIME_ACCEPTABLE_COMMANDS,
  serializeRosterToYaml,
  VERIFY_DEFAULT_TIMEOUT_MS,
@@ -681,364 +678,6 @@ describe('fleet command construction', () => {
    }
  });
  it('waits for an in-flight restart to clear before relaunching (re-entry guard)', async () => {
    const home = await tempDir();
    const rosterPath = join(home, 'fleet', 'roster.yaml');
    await mkdir(join(home, 'fleet'), { recursive: true });
    await writeFile(
      rosterPath,
      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
        '\n',
      ),
    );
    // Simulate another `mosaic fleet restart` process mid-teardown: a fresh lock
    // (recent timestamp, so it is NOT treated as stale) already held.
    const lockPath = restartLockPath(home);
    await mkdir(dirname(lockPath), { recursive: true });
    await writeFile(lockPath, `4242\n${Date.now()}\n`);
    const events: string[] = [];
    const runner: CommandRunner = async (command, args) => {
      events.push(`run:${args[args.length - 1]}`);
      return { stdout: '', stderr: '', exitCode: 0 };
    };
    // The injected sleep stands in for time passing while we wait; the in-flight
    // restart "finishes" (releases its lock) after the first poll.
    let sleeps = 0;
    const sleepFn: SleepFn = async () => {
      sleeps += 1;
      events.push(`sleep:${sleeps}`);
      await rm(lockPath, { force: true });
    };
    const program = new Command();
    program.exitOverride();
    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
    try {
      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
      // It must have waited at least once before issuing any systemctl restart.
      expect(sleeps).toBeGreaterThan(0);
      const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
      const firstRun = events.findIndex((e) => e.startsWith('run:'));
      expect(firstSleep).toBeGreaterThanOrEqual(0);
      expect(firstRun).toBeGreaterThan(firstSleep);
      // And it still performs the full restart once the lock clears.
      expect(events).toContain('run:mosaic-tmux-holder.service');
      expect(events).toContain('run:mosaic-agent@coder0.service');
      // The lock is released after the restart completes.
      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
    } finally {
      await rm(home, { recursive: true, force: true });
    }
  });
  it('breaks a stale restart lock and proceeds without waiting', async () => {
    const home = await tempDir();
    const rosterPath = join(home, 'fleet', 'roster.yaml');
    await mkdir(join(home, 'fleet'), { recursive: true });
    await writeFile(
      rosterPath,
      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
        '\n',
      ),
    );
    // A lock left behind by a crashed owner: timestamp older than the stale window.
    const lockPath = restartLockPath(home);
    await mkdir(dirname(lockPath), { recursive: true });
    await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n`);
    const calls: string[][] = [];
    const runner: CommandRunner = async (command, args) => {
      calls.push([command, ...args]);
      return { stdout: '', stderr: '', exitCode: 0 };
    };
    const sleepFn = vi.fn<SleepFn>(async () => {});
    const program = new Command();
    program.exitOverride();
    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
    try {
      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
      // Stale lock is broken immediately — no waiting.
      expect(sleepFn).not.toHaveBeenCalled();
      expect(calls).toEqual([
        ['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
        ['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
      ]);
      // The stale lock is gone once the restart completes.
      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
    } finally {
      await rm(home, { recursive: true, force: true });
    }
  });
  it('releases the restart lock so a subsequent restart is not blocked', async () => {
    const home = await tempDir();
    const rosterPath = join(home, 'fleet', 'roster.yaml');
    await mkdir(join(home, 'fleet'), { recursive: true });
    await writeFile(
      rosterPath,
      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
        '\n',
      ),
    );
    const calls: string[][] = [];
    const runner: CommandRunner = async (command, args) => {
      calls.push([command, ...args]);
      return { stdout: '', stderr: '', exitCode: 0 };
    };
    const sleepFn = vi.fn<SleepFn>(async () => {});
    const program = new Command();
    program.exitOverride();
    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
    try {
      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
      // Two sequential restarts both run fully and neither has to wait.
      expect(sleepFn).not.toHaveBeenCalled();
      expect(calls).toEqual([
        ['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
        ['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
        ['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
        ['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
      ]);
    } finally {
      await rm(home, { recursive: true, force: true });
    }
  });
  it('guards the single-agent restart path behind the in-flight restart lock', async () => {
    const home = await tempDir();
    const rosterPath = join(home, 'fleet', 'roster.yaml');
    await mkdir(join(home, 'fleet'), { recursive: true });
    await writeFile(
      rosterPath,
      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
        '\n',
      ),
    );
    // A full restart is mid-flight (lock held); a single-agent restart re-enters.
    const lockPath = restartLockPath(home);
    await mkdir(dirname(lockPath), { recursive: true });
    await writeFile(lockPath, `4242\n${Date.now()}\n`);
    const events: string[] = [];
    const runner: CommandRunner = async (command, args) => {
      events.push(`run:${args[args.length - 1]}`);
      return { stdout: '', stderr: '', exitCode: 0 };
    };
    let sleeps = 0;
    const sleepFn: SleepFn = async () => {
      sleeps += 1;
      events.push(`sleep:${sleeps}`);
      await rm(lockPath, { force: true });
    };
    const program = new Command();
    program.exitOverride();
    registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
    try {
      await program.parseAsync(['node', 'mosaic', 'fleet', 'restart', 'coder0']);
      // The single-agent restart waits for the in-flight restart before acting.
      expect(sleeps).toBeGreaterThan(0);
      const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
      const firstRun = events.findIndex((e) => e.startsWith('run:'));
      expect(firstSleep).toBeGreaterThanOrEqual(0);
      expect(firstRun).toBeGreaterThan(firstSleep);
      // Only the named agent is restarted; the holder is untouched.
      expect(events).toContain('run:mosaic-agent@coder0.service');
      expect(events).not.toContain('run:mosaic-tmux-holder.service');
    } finally {
      await rm(home, { recursive: true, force: true });
    }
  });
  it('does not let a timed-out owner drop a lock another restart broke and re-owned', async () => {
    const home = await tempDir();
    const runDir = join(home, 'fleet', 'run');
    await mkdir(runDir, { recursive: true });
    const lockPath = restartLockPath(home);
    const tokenOf = async (): Promise<string> => {
      const raw = await readFile(lockPath, 'utf8');
      return raw.split('\n')[2]?.trim() ?? '';
    };
    const sleepFn = vi.fn<SleepFn>(async () => {});
    // R1 acquires the lock and begins a restart that then hangs.
    const r1 = await acquireRestartLock(home, sleepFn);
    const tokenR1 = await tokenOf();
    expect(tokenR1).not.toBe('');
    // The hung R1 leaves a stale lock: rewrite its timestamp into the past while
    // preserving R1's token — exactly the on-disk state a stuck owner leaves.
    await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n${tokenR1}\n`);
    // R2 re-enters, sees the stale lock, and atomically takes ownership.
    const r2 = await acquireRestartLock(home, sleepFn);
    const tokenR2 = await tokenOf();
    expect(tokenR2).not.toBe(tokenR1);
    expect(sleepFn).not.toHaveBeenCalled();
    // R1 finally finishes and releases. It must NOT delete R2's lock — otherwise
    // a third restart (R3) could acquire and interleave with R2 still running.
    await r1.release();
    expect(await tokenOf()).toBe(tokenR2);
    // R2 releases cleanly and the lock is gone.
    await r2.release();
    await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
    await rm(home, { recursive: true, force: true });
  });
  it('lets only one of several concurrent breakers proceed past a stale lock', async () => {
    const home = await tempDir();
    const lockPath = restartLockPath(home);
    await mkdir(dirname(lockPath), { recursive: true });
    // A stale lock left by a crashed owner: every concurrent re-entrant restart
    // will judge it stale and try to break it at the same instant. Breaking must
    // NOT grant ownership — only the atomic re-create may — so exactly one
    // contender can ever hold the lock at a time. (The v2 fix wrote our own token
    // during the break and read it back, so two breakers each saw their own token
    // and BOTH proceeded; this guards that regression.)
    await writeFile(
      lockPath,
      `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
    );
    // Yielding sleep so a waiting contender lets the current owner finish and
    // release before it re-contends, instead of spinning the microtask queue.
    const sleepFn: SleepFn = async () => {
      await new Promise((res) => setTimeout(res, 0));
    };
    let active = 0;
    let maxActive = 0;
    const tokens: string[] = [];
    const tokenOf = async (): Promise<string> => {
      const raw = await readFile(lockPath, 'utf8');
      return raw.split('\n')[2]?.trim() ?? '';
    };
    // One "restart" = acquire the lock, do work in the critical section, release.
    const restartOnce = async (): Promise<void> => {
      const guard = await acquireRestartLock(home, sleepFn);
      active += 1;
      maxActive = Math.max(maxActive, active);
      // Record the token we own while we hold it, then yield to interleave with
      // any other contender that might (wrongly) believe it owns the lock too.
      tokens.push(await tokenOf());
      await new Promise((res) => setTimeout(res, 0));
      active -= 1;
      await guard.release();
    };
    try {
      // Three breakers race the single stale lock simultaneously.
      await Promise.all([restartOnce(), restartOnce(), restartOnce()]);
      // Mutual exclusion held: never two owners at once despite concurrent breaks.
      expect(maxActive).toBe(1);
      // Each acquire owned with its own distinct token — no two ever shared it.
      expect(new Set(tokens).size).toBe(3);
      // The lock is fully released at the end.
      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
    } finally {
      await rm(home, { recursive: true, force: true });
    }
  });
  it('lets exactly one of two breakers take over a stale lock while the other waits', async () => {
    const home = await tempDir();
    const lockPath = restartLockPath(home);
    await mkdir(dirname(lockPath), { recursive: true });
    // A single stale lock both contenders will judge stale at the same instant.
    // Every transition runs under the registry mutex, so only one may take the
    // lock over; the other must observe a now-fresh owner and WAIT/re-evaluate
    // rather than also taking over. (A content-blind clobber let both believe
    // they owned it — this asserts the mutex-gated CAS takeover instead.)
    await writeFile(
      lockPath,
      `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
    );
    // Barrier the winner holds against until the loser has observed the lock
    // fresh and waited at least once — forcing the exact interleaving where one
    // proceeds while the other waits, deterministically rather than by timing.
    let resolveLoserWaited: () => void = () => {};
    const loserWaited = new Promise<void>((res) => {
      resolveLoserWaited = res;
    });
    let sleeps = 0;
    const sleepFn: SleepFn = async () => {
      sleeps += 1;
      resolveLoserWaited();
      await new Promise((res) => setTimeout(res, 0));
    };
    let active = 0;
    let maxActive = 0;
    const tokens: string[] = [];
    const tokenOf = async (): Promise<string> => {
      const raw = await readFile(lockPath, 'utf8');
      return raw.split('\n')[2]?.trim() ?? '';
    };
    let firstOwner = true;
    const restartOnce = async (): Promise<void> => {
      const guard = await acquireRestartLock(home, sleepFn);
      active += 1;
      maxActive = Math.max(maxActive, active);
      tokens.push(await tokenOf());
      if (firstOwner) {
        // Winner: keep holding the lock until the loser has waited once, so the
        // loser is guaranteed to see a FRESH owner (not the stale one) and back
        // off — proving it could not also take over.
        firstOwner = false;
        await loserWaited;
      } else {
        await new Promise((res) => setTimeout(res, 0));
      }
      active -= 1;
      await guard.release();
    };
    try {
      // Exactly two breakers race the single stale lock.
      await Promise.all([restartOnce(), restartOnce()]);
      // Mutual exclusion: never two owners at once (if both took over the stale
      // lock, this would be 2).
      expect(maxActive).toBe(1);
      // Both eventually owned, each with its own distinct token.
      expect(new Set(tokens).size).toBe(2);
      // The loser observed the winner's fresh lock and waited — it did NOT also
      // take over the stale lock.
      expect(sleeps).toBeGreaterThanOrEqual(1);
      // The lock is fully released at the end.
      await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
    } finally {
      await rm(home, { recursive: true, force: true });
    }
  });
  it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
    const home = await tempDir();
    const rosterPath = join(home, 'fleet', 'roster.yaml');
--- a/packages/mosaic/src/commands/fleet.ts
+++ b/packages/mosaic/src/commands/fleet.ts
@@ -1,16 +1,5 @@
 import { constants } from 'node:fs';
-import {
+import { access, chmod, copyFile, mkdir, readFile, unlink, writeFile } from 'node:fs/promises';
  access,
  chmod,
  copyFile,
  mkdir,
  open,
  readFile,
  stat,
  unlink,
  writeFile,
 } from 'node:fs/promises';
 import { randomUUID } from 'node:crypto';
 import { homedir, hostname, userInfo } from 'node:os';
 import { dirname, join, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
@@ -544,295 +533,6 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
  return ['systemctl', '--user', action, service];
 }
 /** Poll interval (ms) while waiting for an in-flight restart's lock to clear. */
 export const RESTART_LOCK_POLL_INTERVAL_MS = 250;
 /**
 * Maximum time (ms) a re-entrant restart waits for the in-flight restart to
 * finish before it breaks the lock and proceeds anyway. A bound is required so
 * a crashed holder of the lock can never deadlock the fleet permanently.
 */
 export const RESTART_LOCK_MAX_WAIT_MS = 30_000;
 /**
 * Age (ms) past which a restart lock is treated as stale (its owner died
 * without releasing it) and is broken immediately rather than waited on.
 */
 export const RESTART_LOCK_STALE_MS = 60_000;
 /**
 * Resolves the path of the cross-process restart lock for a given Mosaic home.
 * Kept strictly under `<mosaicHome>/fleet/run` (not the heartbeat env override)
 * so the lock is scoped to the same fleet the restart acts on.
 */
 export function restartLockPath(mosaicHome: string): string {
  return join(mosaicHome, 'fleet', 'run', 'restart.lock');
 }
 /** A held restart lock; `release()` removes the lock file iff we still own it. */
 interface RestartGuard {
  release(): Promise<void>;
 }
 /** Lock-file contents: pid (informational), timestamp, and a unique owner token. */
 function formatRestartLockContent(token: string): string {
  return `${process.pid}\n${Date.now()}\n${token}\n`;
 }
 /**
 * Reads the owner token (line 3) from a lock file, or null if the file is
 * missing/unreadable/tokenless. The token is what makes release and break
 * ownership-safe: a process only ever acts on a lock whose token matches its own.
 */
 async function readRestartLockToken(lockPath: string): Promise<string | null> {
  let raw: string;
  try {
    raw = await readFile(lockPath, 'utf8');
  } catch {
    return null;
  }
  const token = raw.split('\n')[2]?.trim();
  return token ? token : null;
 }
 /**
 * Returns true when a lock's contents are stale: older than RESTART_LOCK_STALE_MS,
 * or unparseable (a corrupt or partially written lock left by a crashed owner).
 */
 function isRestartLockContentStale(raw: string, now: number): boolean {
  const stampLine = raw.split('\n')[1] ?? '';
  const stamp = Number.parseInt(stampLine.trim(), 10);
  if (!Number.isFinite(stamp)) {
    return true;
  }
  return now - stamp >= RESTART_LOCK_STALE_MS;
 }
 /**
 * Path of the short-lived registry mutex that guards EVERY transition of the
 * restart lock (acquire, release, takeover). Held only across a few filesystem
 * ops — never across the restart itself — so contention clears in microseconds.
 */
 function restartMutexPath(lockPath: string): string {
  return `${lockPath}.mutex`;
 }
 /** Brief back-off between registry-mutex acquisition attempts (held microseconds). */
 const RESTART_MUTEX_RETRY_MS = 20;
 /**
 * Staleness for the internal mutex / reclaim locks, judged by the file's mtime
 * rather than its CONTENT. `open(path, 'wx')` creates the inode (with a fresh
 * mtime) before any token/timestamp is written into it, so a content-based check
 * would momentarily see that empty file as corrupt-and-stale and could reap a
 * lock another contender is still acquiring. mtime is set atomically at creation,
 * so a just-created lock always reads as live; only a lock whose holder died and
 * stopped touching it ages past the threshold. These locks are never held across
 * the restart itself (only a couple of filesystem ops), so any mtime this old can
 * belong only to a dead holder.
 */
 async function isRestartLockPathStale(path: string, now: number): Promise<boolean> {
  try {
    const info = await stat(path);
    return now - info.mtimeMs >= RESTART_LOCK_STALE_MS;
  } catch (err) {
    if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
      return false; // Gone, not stale — the caller will re-contend.
    }
    return false; // Can't stat — treat as live and back off rather than reap.
  }
 }
 /** Path of the reclaim lock that serializes reaping of a crashed-holder mutex. */
 function restartReclaimPath(mutexPath: string): string {
  return `${mutexPath}.reclaim`;
 }
 /**
 * Reap a registry mutex left behind by a process that CRASHED mid-transition —
 * one whose file has aged past RESTART_LOCK_STALE_MS. Because the mutex is held
 * only for a couple of filesystem ops (no sleeps, never across the restart), a
 * mutex this old can only belong to a dead holder.
 *
 * The reap removes the dead mutex but never CREATES/holds it — acquisition stays
 * the single `open('wx')` create in {@link acquireRestartMutex}, so exactly one
 * contender wins ownership no matter how the reap and acquires interleave. The
 * removal is made conditional by a dedicated reclaim lock: while it is held the
 * dead mutex is stable (its dead holder will never touch it, and no other
 * reclaimer can race), so re-reading it and removing it only if it is STILL stale
 * is a true compare — a live holder's fresh mutex is never removed. This closes
 * the reclaim race a content-blind rename-and-restore left open (a third
 * contender slipping into the gap while a fresh mutex was moved aside).
 */
 async function reclaimStaleRestartMutex(mutexPath: string): Promise<void> {
  const reclaimPath = restartReclaimPath(mutexPath);
  let handle: Awaited<ReturnType<typeof open>>;
  try {
    handle = await open(reclaimPath, 'wx');
  } catch (err) {
    if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
      throw err;
    }
    // Someone is already reclaiming. If their reclaim lock is itself stale by
    // mtime, its holder crashed mid-reap (the lock spans only a stat + unlink,
    // microseconds) — clear it so a later pass can retry. Otherwise a live
    // reclaimer has it; back off. Either way we do not reap the mutex this pass.
    if (await isRestartLockPathStale(reclaimPath, Date.now())) {
      await unlink(reclaimPath).catch(() => {});
    }
    return;
  }
  try {
    // Re-check the mutex UNDER the reclaim lock and remove it only if it is STILL
    // stale by mtime. A live holder's mutex is fresh and is left untouched; a dead
    // holder's mutex is stable here (its holder is gone and no other reclaimer can
    // race us), so this re-check is authoritative.
    if (await isRestartLockPathStale(mutexPath, Date.now())) {
      await unlink(mutexPath).catch(() => {});
    }
  } finally {
    await handle.close();
    await unlink(reclaimPath).catch(() => {});
  }
 }
 /**
 * Acquire the registry mutex, BLOCKING (with brief back-offs) until held, and
 * return a token-gated release. This is the single point of mutual exclusion for
 * the restart lock: acquire, release, and stale/timeout takeover all run under it,
 * so "read the lock, then mutate it" is atomic — no acquirer, releaser, or breaker
 * can ever interleave with another. A mutex left by a crashed holder is reclaimed
 * once it ages past the stale threshold.
 */
 async function acquireRestartMutex(
  mutexPath: string,
  token: string,
 ): Promise<RestartGuard['release']> {
  for (;;) {
    let handle: Awaited<ReturnType<typeof open>>;
    try {
      handle = await open(mutexPath, 'wx');
    } catch (err) {
      if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
        throw err;
      }
      // Staleness is judged by mtime, not content, so a mutex that exists but has
      // not yet had its token written (the open-before-write window) reads as live
      // and is never wrongly reaped.
      if (!(await isRestartLockPathStale(mutexPath, Date.now()))) {
        // A live holder has it — it will be gone in microseconds. Back off briefly.
        await new Promise((resolve) => setTimeout(resolve, RESTART_MUTEX_RETRY_MS));
        continue;
      }
      await reclaimStaleRestartMutex(mutexPath);
      continue;
    }
    // We created the mutex. Populate it with our token; if writing fails, clean up
    // our own file so we never leak an empty mutex that a peer would have to reap.
    try {
      await handle.writeFile(formatRestartLockContent(token));
      await handle.close();
    } catch (err) {
      await handle.close().catch(() => {});
      await unlink(mutexPath).catch(() => {});
      throw err;
    }
    return async (): Promise<void> => {
      if ((await readRestartLockToken(mutexPath)) !== token) return;
      await unlink(mutexPath).catch(() => {});
    };
  }
 }
 /**
 * Acquire the fleet restart lock, serializing concurrent `mosaic fleet restart`
 * invocations across processes. Each restart tears the tmux holder (and the
 * agent sessions inside it) down and back up; without this guard a re-entrant
 * restart relaunches agents against a half-torn-down holder, which fails and
 * tight-loops. A re-entrant caller waits for the in-flight restart to release
 * the lock (clean shutdown settled) before proceeding, breaks a stale lock left
 * by a crashed owner, and after RESTART_LOCK_MAX_WAIT_MS breaks the lock to
 * avoid a permanent deadlock.
 *
 * Correctness rests on a single invariant: EVERY transition of the lock — taking
 * a free lock, taking over a stale/timed-out one, and releasing — happens under
 * the registry mutex. Because the check ("is the lock free / stale / fresh?") and
 * the mutation that follows it both run while the mutex is held, they are atomic:
 * no other acquirer, releaser, or breaker can slip in between. That is what makes
 * takeover a true compare-and-swap rather than a content-blind clobber — a normal
 * `open('wx')` acquirer cannot create a fresh lock in a gap, and the original
 * owner's `release()` (also mutex-gated and token-checked) cannot drop a lock a
 * breaker already took over. So no interleaving lets two restarts both own the
 * lock and run concurrently.
 */
 export async function acquireRestartLock(
  mosaicHome: string,
  sleepFn: SleepFn,
 ): Promise<RestartGuard> {
  const token = randomUUID();
  const lockPath = restartLockPath(mosaicHome);
  const mutexPath = restartMutexPath(lockPath);
  await mkdir(dirname(lockPath), { recursive: true });
  const release = async (): Promise<void> => {
    // Mutex-gated and token-gated: only remove the lock if it is still ours. If
    // another caller took it over (after a stale/timeout break) the token no
    // longer matches and we leave their lock intact.
    const releaseMutex = await acquireRestartMutex(mutexPath, token);
    try {
      if ((await readRestartLockToken(lockPath)) === token) {
        await unlink(lockPath).catch(() => {});
      }
    } finally {
      await releaseMutex();
    }
  };
  const deadline = Date.now() + RESTART_LOCK_MAX_WAIT_MS;
  for (;;) {
    let owned = false;
    const releaseMutex = await acquireRestartMutex(mutexPath, token);
    try {
      // Read and (if appropriate) mutate the lock atomically under the mutex.
      let current: string | null = null;
      let absent = false;
      try {
        current = await readFile(lockPath, 'utf8');
      } catch (readErr) {
        if ((readErr as NodeJS.ErrnoException).code === 'ENOENT') {
          absent = true;
        } else {
          current = null; // Unreadable/corrupt: treat as stale.
        }
      }
      const now = Date.now();
      if (absent) {
        // Lock is free — take it.
        await writeFile(lockPath, formatRestartLockContent(token));
        owned = true;
      } else {
        const stale = current === null || isRestartLockContentStale(current, now);
        const timedOut = now >= deadline;
        if (stale || timedOut) {
          process.stderr.write(
            stale
              ? 'Breaking stale fleet restart lock.\n'
              : `Timed out after ${RESTART_LOCK_MAX_WAIT_MS}ms waiting for the in-flight fleet ` +
                  'restart; breaking the lock.\n',
          );
          // Takeover is just an overwrite — safe because we hold the mutex, so no
          // acquirer or releaser can touch the lock between our read and this write.
          await writeFile(lockPath, formatRestartLockContent(token));
          owned = true;
        }
        // else: a fresh restart owns it — wait below and re-evaluate.
      }
    } finally {
      await releaseMutex();
    }
    if (owned) {
      return { release };
    }
    await sleepFn(RESTART_LOCK_POLL_INTERVAL_MS);
  }
 }
 /**
 * Returns the systemctl --user enable command for a given unit.
 * Used by the install auto-enable step to persist units across reboots.
@@ -1472,7 +1172,6 @@ export function isSendAccepted(capturedOutput: string): SendVerifyResult {
 export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
  const runner = deps.runner ?? runCommand;
  const sleepFn = deps.sleepFn ?? defaultSleep;
  const paths = resolveFleetPaths(deps.mosaicHome);
  const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
@@ -1586,22 +1285,9 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
      .command(`${action} [agent]`)
      .description(`${action} the fleet holder or one agent`)
      .action(async (agent?: string) => {
        const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
        const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
        const roster = await loadRosterForCommand(cmd);
        if (agent) {
          getRosterAgent(roster, agent);
          // Single-agent restart is guarded too: it can race a full restart that
          // is tearing the shared holder down.
          if (action === 'restart') {
            const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
            try {
              await runChecked(runner, buildFleetServiceCommand(action, agent));
            } finally {
              await guard.release();
            }
            return;
          }
          await runChecked(runner, buildFleetServiceCommand(action, agent));
          return;
        }
@@ -1612,21 +1298,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
          );
          return;
        }
        if (action === 'restart') {
          // Serialize the holder+agents teardown/relaunch behind the restart lock
          // so a re-entrant restart waits for clean shutdown before relaunching,
          // instead of racing a half-torn-down holder into a tight loop.
          const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
          try {
            await runChecked(runner, buildFleetServiceCommand(action));
            for (const rosterAgent of roster.agents) {
              await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
            }
          } finally {
            await guard.release();
          }
          return;
        }
        await runChecked(runner, buildFleetServiceCommand(action));
        for (const rosterAgent of roster.agents) {
          await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
--- a/tools/install.sh
+++ b/tools/install.sh
@@ -16,10 +16,6 @@
 #   --framework       Install/upgrade framework only (skip npm CLI)
 #   --cli             Install/upgrade npm CLI only (skip framework)
 #   --ref <branch>    Git ref for framework archive (default: main)
 #   --dev             Build CLI + gateway FROM SOURCE at --ref instead of the
 #                     registry @latest. Zero registry writes — packs local
 #                     tarballs and installs them globally. Use to test a branch
 #                     end-to-end before cutting a release.
 #   --yes             Accept all defaults; headless/non-interactive install
 #   --no-auto-launch  Skip automatic mosaic wizard + gateway install on first install
 #   --uninstall       Reverse the install: remove framework dir, CLI package, and npmrc line
@@ -31,7 +27,6 @@
 #   MOSAIC_PREFIX       — npm global prefix          (default: ~/.npm-global)
 #   MOSAIC_NO_COLOR     — disable colour             (set to 1)
 #   MOSAIC_REF          — git ref for framework      (default: main)
 #   MOSAIC_DEV          — equivalent to --dev         (set to 1)
 #   MOSAIC_ASSUME_YES   — equivalent to --yes        (set to 1)
 # ──────────────────────────────────────────────────────────────────────────────
 #
@@ -48,7 +43,6 @@ FLAG_CLI=true
 FLAG_NO_AUTO_LAUNCH=false
 FLAG_YES=false
 FLAG_UNINSTALL=false
 FLAG_DEV=false
 GIT_REF="${MOSAIC_REF:-main}"
 # MOSAIC_ASSUME_YES env var acts the same as --yes
@@ -56,18 +50,12 @@ if [[ "${MOSAIC_ASSUME_YES:-0}" == "1" ]]; then
  FLAG_YES=true
 fi
 # MOSAIC_DEV env var acts the same as --dev
 if [[ "${MOSAIC_DEV:-0}" == "1" ]]; then
  FLAG_DEV=true
 fi
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --check)          FLAG_CHECK=true; shift ;;
    --framework)      FLAG_CLI=false; shift ;;
    --cli)            FLAG_FRAMEWORK=false; shift ;;
    --ref)            GIT_REF="${2:-main}"; shift 2 ;;
    --dev)            FLAG_DEV=true; shift ;;
    --yes|-y)         FLAG_YES=true; shift ;;
    --no-auto-launch) FLAG_NO_AUTO_LAUNCH=true; shift ;;
    --uninstall)      FLAG_UNINSTALL=true; shift ;;
@@ -84,17 +72,6 @@ CLI_PKG="${SCOPE}/mosaic"
 REPO_BASE="https://git.mosaicstack.dev/mosaicstack/stack"
 ARCHIVE_URL="${REPO_BASE}/archive/${GIT_REF}.tar.gz"
 # In dev (build-from-source) mode the gateway is installed globally from a
 # locally-built tarball. Tell the wizard / gateway-config stage NOT to overwrite
 # it with the registry @latest build (honored by gatewayConfigStage).
 if [[ "$FLAG_DEV" == "true" ]]; then
  export MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1
 fi
 # Shared monorepo checkout (populated on demand by ensure_monorepo).
 WORK_DIR=""
 EXTRACTED_DIR=""
 # ─── uninstall path ───────────────────────────────────────────────────────────
 # Shell-level uninstall for when the CLI is broken or not available.
 # Handles: framework directory, npm CLI package, npmrc scope line.
@@ -262,99 +239,6 @@ framework_version() {
  fi
 }
 # Download + extract the monorepo archive at $GIT_REF exactly once per run.
 # Sets the script-level EXTRACTED_DIR to the repo root. Reused by both the
 # framework install (Part 1) and the dev build-from-source path (Part 2).
 ensure_monorepo() {
  if [[ -n "$EXTRACTED_DIR" ]] && [[ -d "$EXTRACTED_DIR" ]]; then
    return 0
  fi
  require_cmd tar
  WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
  # shellcheck disable=SC2317
  cleanup_work() { [[ -n "$WORK_DIR" ]] && rm -rf "$WORK_DIR"; }
  trap cleanup_work EXIT
  info "Downloading source from ${GIT_REF}…"
  if command -v curl &>/dev/null; then
    curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
  elif command -v wget &>/dev/null; then
    wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
  else
    fail "curl or wget required to download source."
    exit 1
  fi
  # Gitea archives extract to <repo-name>/ inside the work dir
  EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
  if [[ -z "$EXTRACTED_DIR" ]] || [[ ! -d "$EXTRACTED_DIR" ]]; then
    fail "Could not locate extracted source in archive."
    ls -la "$WORK_DIR" >&2
    exit 1
  fi
 }
 # Build @mosaicstack/mosaic + @mosaicstack/gateway from source and install both
 # globally from locally-packed tarballs. ZERO registry writes. Workspace deps
 # (brain/config/db/…) are pulled from the registry at the versions pinned in
 # each package.json — `pnpm pack` rewrites `workspace:*` to those versions.
 install_cli_from_source() {
  local src="$EXTRACTED_DIR"
  local out_dir="$WORK_DIR/dist-tarballs"
  mkdir -p "$out_dir"
  # pnpm via corepack (ships with Node >= 16.9; required by Node >= 20 preflight).
  # Pin to the repo's packageManager version so the build matches CI. Surface
  # corepack failures so the fresh-machine case gives an actionable error
  # instead of a bare "command not found".
  if ! command -v pnpm &>/dev/null; then
    info "Activating pnpm via corepack…"
    corepack enable 2>&1 | sed 's/^/  /' || warn "corepack enable failed — pnpm may need manual install."
    corepack prepare pnpm@10.6.2 --activate 2>&1 | sed 's/^/  /' \
      || warn "corepack prepare failed — pnpm may need manual install."
  fi
  if ! command -v pnpm &>/dev/null; then
    fail "pnpm not available after corepack activation."
    echo "  Install pnpm manually (https://pnpm.io/installation) and re-run with --dev."
    exit 1
  fi
  info "Installing workspace dependencies (pnpm install)…"
  ( cd "$src" && pnpm install ) 2>&1 | sed 's/^/  /'
  info "Building CLI + gateway from source…"
  ( cd "$src" && pnpm --filter "@mosaicstack/mosaic..." --filter "@mosaicstack/gateway..." run build ) 2>&1 | sed 's/^/  /'
  info "Packing local tarballs…"
  ( cd "$src/packages/mosaic" && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/  /'
  ( cd "$src/apps/gateway"    && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/  /'
  local cli_tgz gw_tgz
  cli_tgz="$(ls -1t "$out_dir"/mosaicstack-mosaic-*.tgz 2>/dev/null | head -1)"
  gw_tgz="$(ls -1t "$out_dir"/mosaicstack-gateway-*.tgz 2>/dev/null | head -1)"
  if [[ ! -f "$cli_tgz" ]]; then
    fail "CLI tarball was not produced by pnpm pack."
    exit 1
  fi
  if [[ ! -f "$gw_tgz" ]]; then
    fail "Gateway tarball was not produced by pnpm pack."
    exit 1
  fi
  # Gateway first so it is present globally before the CLI's wizard runs (which
  # skips its own gateway install via MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1).
  info "Installing gateway from source tarball (global)…"
  npm install -g "$gw_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/  /'
  info "Installing CLI from source tarball (global)…"
  npm install -g "$cli_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/  /'
  ok "Installed from source: CLI $(installed_cli_version)"
 }
 # ─── preflight ────────────────────────────────────────────────────────────────
 require_cmd node
@@ -398,8 +282,25 @@ if [[ "$FLAG_FRAMEWORK" == "true" ]]; then
      warn "Framework not installed."
    fi
  else
-    # Download repo archive and extract framework (shared with the dev build)
+    # Download repo archive and extract framework
-    ensure_monorepo
+    require_cmd tar
    WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
    cleanup_work() { rm -rf "$WORK_DIR"; }
    trap cleanup_work EXIT
    info "Downloading framework from ${GIT_REF}…"
    if command -v curl &>/dev/null; then
      curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
    elif command -v wget &>/dev/null; then
      wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
    else
      fail "curl or wget required to download framework."
      exit 1
    fi
    # Gitea archives extract to <repo-name>/ inside the work dir
    EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
    FRAMEWORK_SRC="$EXTRACTED_DIR/packages/mosaic/framework"
    if [[ ! -d "$FRAMEWORK_SRC" ]]; then
@@ -455,11 +356,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
  fi
  CURRENT="$(installed_cli_version)"
-  if [[ "$FLAG_DEV" == "true" ]]; then
+  LATEST="$(latest_cli_version)"
    LATEST=""
  else
    LATEST="$(latest_cli_version)"
  fi
  if [[ -n "$CURRENT" ]]; then
    dim "  Installed: ${CLI_PKG}@${CURRENT}"
@@ -467,9 +364,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
    dim "  Installed: (none)"
  fi
-  if [[ "$FLAG_DEV" == "true" ]]; then
+  if [[ -n "$LATEST" ]]; then
    dim "  Source:    ${REPO_BASE} (ref: ${GIT_REF}, build-from-source)"
  elif [[ -n "$LATEST" ]]; then
    dim "  Latest:    ${CLI_PKG}@${LATEST}"
  else
    dim "  Latest:    (registry unreachable)"
@@ -477,9 +372,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
  echo ""
  if [[ "$FLAG_CHECK" == "true" ]]; then
-    if [[ "$FLAG_DEV" == "true" ]]; then
+    if [[ -z "$LATEST" ]]; then
      info "Dev mode: installed version is ${CURRENT:-(none)} (no registry comparison)."
    elif [[ -z "$LATEST" ]]; then
      warn "Could not reach registry."
    elif [[ -z "$CURRENT" ]]; then
      warn "Not installed."
@@ -490,16 +383,6 @@ if [[ "$FLAG_CLI" == "true" ]]; then
    else
      ok "Up to date (or ahead of registry)."
    fi
  elif [[ "$FLAG_DEV" == "true" ]]; then
    info "Dev mode — building CLI + gateway from source at ref ${GIT_REF}…"
    ensure_monorepo
    install_cli_from_source
    # PATH check for npm prefix
    if [[ ":$PATH:" != *":$PREFIX/bin:"* ]]; then
      warn "$PREFIX/bin is not on your PATH"
      dim "  Add to your shell rc:  export PATH=\"$PREFIX/bin:\$PATH\""
    fi
  else
    if [[ -z "$LATEST" ]]; then
      warn "Could not reach registry at $REGISTRY — skipping npm CLI."