Compare commits
2 Commits
fix/fleet-
...
feat/feder
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e483d976e4 | ||
| 67135d3822 |
@@ -8,9 +8,11 @@ import { CapabilitiesController } from './server/verbs/capabilities.controller.j
|
||||
import { GrantsService } from './grants.service.js';
|
||||
import { FederationClientService, QuerySourceService } from './client/index.js';
|
||||
import { FederationAuthGuard, FederationScopeService } from './server/index.js';
|
||||
import { ListController } from './server/verbs/list.controller.js';
|
||||
import { FederationListQueryService } from './server/verbs/list-query.service.js';
|
||||
|
||||
@Module({
|
||||
controllers: [EnrollmentController, FederationController, CapabilitiesController],
|
||||
controllers: [EnrollmentController, FederationController, CapabilitiesController, ListController],
|
||||
providers: [
|
||||
AdminGuard,
|
||||
CaService,
|
||||
@@ -20,6 +22,7 @@ import { FederationAuthGuard, FederationScopeService } from './server/index.js';
|
||||
QuerySourceService,
|
||||
FederationAuthGuard,
|
||||
FederationScopeService,
|
||||
FederationListQueryService,
|
||||
],
|
||||
exports: [
|
||||
CaService,
|
||||
@@ -29,6 +32,7 @@ import { FederationAuthGuard, FederationScopeService } from './server/index.js';
|
||||
QuerySourceService,
|
||||
FederationAuthGuard,
|
||||
FederationScopeService,
|
||||
FederationListQueryService,
|
||||
],
|
||||
})
|
||||
export class FederationModule {}
|
||||
|
||||
@@ -0,0 +1,428 @@
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
createPgliteDb,
|
||||
insights,
|
||||
missionTasks,
|
||||
missions,
|
||||
preferences,
|
||||
projects,
|
||||
runPgliteMigrations,
|
||||
teams,
|
||||
users,
|
||||
type Db,
|
||||
type DbHandle,
|
||||
} from '@mosaicstack/db';
|
||||
import type { FederationScopeQueryFilter } from '../../scope.service.js';
|
||||
import { FederationListQueryService } from '../list-query.service.js';
|
||||
|
||||
const TASK_FILTER: FederationScopeQueryFilter = {
|
||||
resource: 'tasks',
|
||||
subjectUserId: 'user-1',
|
||||
includePersonal: true,
|
||||
teamIds: [],
|
||||
limit: 2,
|
||||
maxRowsPerQuery: 2,
|
||||
};
|
||||
|
||||
const SUBJECT_USER_ID = 'fed-m3-05-subject';
|
||||
const OTHER_USER_ID = 'fed-m3-05-other';
|
||||
const TEAM_ID = '05000000-0000-4000-8000-000000000001';
|
||||
const UNAUTHORIZED_TEAM_ID = '05000000-0000-4000-8000-000000000002';
|
||||
const PERSONAL_PROJECT_ID = '05000000-0000-4000-8000-000000000101';
|
||||
const TEAM_PROJECT_ID = '05000000-0000-4000-8000-000000000102';
|
||||
const UNAUTHORIZED_PROJECT_ID = '05000000-0000-4000-8000-000000000103';
|
||||
const PERSONAL_MISSION_ID = '05000000-0000-4000-8000-000000000201';
|
||||
const TEAM_MISSION_ID = '05000000-0000-4000-8000-000000000202';
|
||||
const UNAUTHORIZED_MISSION_ID = '05000000-0000-4000-8000-000000000203';
|
||||
const SUBJECT_TEAM_NOTE_ID = '05000000-0000-4000-8000-000000000301';
|
||||
const OTHER_TEAM_NOTE_ID = '05000000-0000-4000-8000-000000000302';
|
||||
const SUBJECT_PERSONAL_NOTE_ID = '05000000-0000-4000-8000-000000000303';
|
||||
const SUBJECT_UNAUTHORIZED_NOTE_ID = '05000000-0000-4000-8000-000000000304';
|
||||
const INSIGHT_ONE_ID = '05000000-0000-4000-8000-000000000401';
|
||||
const INSIGHT_TWO_ID = '05000000-0000-4000-8000-000000000402';
|
||||
const PREFERENCE_ONE_ID = '05000000-0000-4000-8000-000000000501';
|
||||
const PREFERENCE_TWO_ID = '05000000-0000-4000-8000-000000000502';
|
||||
|
||||
let dbHandle: DbHandle | undefined;
|
||||
|
||||
function makeService() {
|
||||
return new FederationListQueryService({} as Db);
|
||||
}
|
||||
|
||||
function makeDbService() {
|
||||
if (!dbHandle) {
|
||||
throw new Error('test DB not initialized');
|
||||
}
|
||||
return new FederationListQueryService(dbHandle.db);
|
||||
}
|
||||
|
||||
async function seedNotesFixture() {
|
||||
if (!dbHandle) {
|
||||
throw new Error('test DB not initialized');
|
||||
}
|
||||
|
||||
await dbHandle.db.insert(users).values([
|
||||
{
|
||||
id: SUBJECT_USER_ID,
|
||||
name: 'Federation Subject',
|
||||
email: `${SUBJECT_USER_ID}@example.test`,
|
||||
emailVerified: false,
|
||||
},
|
||||
{
|
||||
id: OTHER_USER_ID,
|
||||
name: 'Federation Other',
|
||||
email: `${OTHER_USER_ID}@example.test`,
|
||||
emailVerified: false,
|
||||
},
|
||||
]);
|
||||
|
||||
await dbHandle.db.insert(teams).values([
|
||||
{
|
||||
id: TEAM_ID,
|
||||
name: 'FED-M3-05 Team',
|
||||
slug: 'fed-m3-05-team',
|
||||
ownerId: SUBJECT_USER_ID,
|
||||
managerId: SUBJECT_USER_ID,
|
||||
},
|
||||
{
|
||||
id: UNAUTHORIZED_TEAM_ID,
|
||||
name: 'FED-M3-05 Unauthorized Team',
|
||||
slug: 'fed-m3-05-unauthorized-team',
|
||||
ownerId: OTHER_USER_ID,
|
||||
managerId: OTHER_USER_ID,
|
||||
},
|
||||
]);
|
||||
|
||||
await dbHandle.db.insert(projects).values([
|
||||
{
|
||||
id: PERSONAL_PROJECT_ID,
|
||||
name: 'FED-M3-05 Personal Project',
|
||||
ownerId: SUBJECT_USER_ID,
|
||||
ownerType: 'user',
|
||||
},
|
||||
{
|
||||
id: TEAM_PROJECT_ID,
|
||||
name: 'FED-M3-05 Team Project',
|
||||
teamId: TEAM_ID,
|
||||
ownerType: 'team',
|
||||
},
|
||||
{
|
||||
id: UNAUTHORIZED_PROJECT_ID,
|
||||
name: 'FED-M3-05 Unauthorized Project',
|
||||
teamId: UNAUTHORIZED_TEAM_ID,
|
||||
ownerType: 'team',
|
||||
},
|
||||
]);
|
||||
|
||||
await dbHandle.db.insert(missions).values([
|
||||
{
|
||||
id: PERSONAL_MISSION_ID,
|
||||
name: 'FED-M3-05 Personal Mission',
|
||||
projectId: PERSONAL_PROJECT_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
},
|
||||
{
|
||||
id: TEAM_MISSION_ID,
|
||||
name: 'FED-M3-05 Team Mission',
|
||||
projectId: TEAM_PROJECT_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
},
|
||||
{
|
||||
id: UNAUTHORIZED_MISSION_ID,
|
||||
name: 'FED-M3-05 Unauthorized Mission',
|
||||
projectId: UNAUTHORIZED_PROJECT_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
},
|
||||
]);
|
||||
|
||||
await dbHandle.db.insert(missionTasks).values([
|
||||
{
|
||||
id: SUBJECT_TEAM_NOTE_ID,
|
||||
missionId: TEAM_MISSION_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
notes: 'subject note on team mission',
|
||||
createdAt: new Date('2026-06-24T03:00:00.000Z'),
|
||||
updatedAt: new Date('2026-06-24T03:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: OTHER_TEAM_NOTE_ID,
|
||||
missionId: TEAM_MISSION_ID,
|
||||
userId: OTHER_USER_ID,
|
||||
notes: 'other user note on team mission',
|
||||
createdAt: new Date('2026-06-24T02:00:00.000Z'),
|
||||
updatedAt: new Date('2026-06-24T02:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: SUBJECT_PERSONAL_NOTE_ID,
|
||||
missionId: PERSONAL_MISSION_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
notes: 'subject note on personal mission',
|
||||
createdAt: new Date('2026-06-24T01:00:00.000Z'),
|
||||
updatedAt: new Date('2026-06-24T01:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: SUBJECT_UNAUTHORIZED_NOTE_ID,
|
||||
missionId: UNAUTHORIZED_MISSION_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
notes: 'subject note outside grant-visible missions',
|
||||
createdAt: new Date('2026-06-24T04:00:00.000Z'),
|
||||
updatedAt: new Date('2026-06-24T04:00:00.000Z'),
|
||||
},
|
||||
]);
|
||||
|
||||
const memoryCreatedAt = new Date('2026-06-24T05:00:00.000Z');
|
||||
await dbHandle.db.insert(insights).values([
|
||||
{
|
||||
id: INSIGHT_ONE_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
content: 'first insight',
|
||||
source: 'agent',
|
||||
createdAt: memoryCreatedAt,
|
||||
updatedAt: memoryCreatedAt,
|
||||
},
|
||||
{
|
||||
id: INSIGHT_TWO_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
content: 'second insight',
|
||||
source: 'agent',
|
||||
createdAt: memoryCreatedAt,
|
||||
updatedAt: memoryCreatedAt,
|
||||
},
|
||||
]);
|
||||
|
||||
await dbHandle.db.insert(preferences).values([
|
||||
{
|
||||
id: PREFERENCE_ONE_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
key: 'fed-m3-05-pref-1',
|
||||
value: { enabled: true },
|
||||
createdAt: memoryCreatedAt,
|
||||
updatedAt: memoryCreatedAt,
|
||||
},
|
||||
{
|
||||
id: PREFERENCE_TWO_ID,
|
||||
userId: SUBJECT_USER_ID,
|
||||
key: 'fed-m3-05-pref-2',
|
||||
value: { enabled: false },
|
||||
createdAt: memoryCreatedAt,
|
||||
updatedAt: memoryCreatedAt,
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
function stubRows(
|
||||
service: FederationListQueryService,
|
||||
...pages: Array<Array<Record<string, unknown>>>
|
||||
) {
|
||||
const mock = vi.fn();
|
||||
for (const page of pages) {
|
||||
mock.mockResolvedValueOnce(page);
|
||||
}
|
||||
(
|
||||
service as unknown as {
|
||||
listAllRows: (
|
||||
_filter: FederationScopeQueryFilter,
|
||||
_rowLimit: number,
|
||||
_cursor: unknown,
|
||||
) => Promise<Array<Record<string, unknown>>>;
|
||||
}
|
||||
).listAllRows = mock;
|
||||
return mock;
|
||||
}
|
||||
|
||||
describe('FederationListQueryService', () => {
|
||||
beforeAll(async () => {
|
||||
dbHandle = createPgliteDb(`memory://fed-m3-05-list-${Date.now()}`);
|
||||
await runPgliteMigrations(dbHandle);
|
||||
await seedNotesFixture();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await dbHandle?.close();
|
||||
dbHandle = undefined;
|
||||
});
|
||||
|
||||
it('denies sensitive resources in native RBAC for M3 list reads', async () => {
|
||||
const service = makeService();
|
||||
|
||||
await expect(
|
||||
service.evaluateReadAccess({
|
||||
grantId: 'grant-1',
|
||||
peerId: 'peer-1',
|
||||
subjectUserId: 'user-1',
|
||||
resource: 'credentials',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
allowed: false,
|
||||
reason: 'credentials federation list access is not implemented in M3',
|
||||
});
|
||||
});
|
||||
|
||||
it('allows personal memory reads without requiring team lookup', async () => {
|
||||
const service = makeService();
|
||||
|
||||
await expect(
|
||||
service.evaluateReadAccess({
|
||||
grantId: 'grant-1',
|
||||
peerId: 'peer-1',
|
||||
subjectUserId: 'user-1',
|
||||
resource: 'memory',
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
allowed: true,
|
||||
access: { includePersonal: true, teamIds: [] },
|
||||
});
|
||||
});
|
||||
|
||||
it('applies the scope row cap and returns an opaque next cursor when truncated', async () => {
|
||||
const service = makeService();
|
||||
const listAllRows = stubRows(
|
||||
service,
|
||||
[
|
||||
{ id: '3', createdAt: new Date('2026-06-24T03:00:00.000Z') },
|
||||
{ id: '2', createdAt: new Date('2026-06-24T02:00:00.000Z') },
|
||||
{ id: '1', createdAt: new Date('2026-06-24T01:00:00.000Z') },
|
||||
],
|
||||
[{ id: '1', createdAt: new Date('2026-06-24T01:00:00.000Z') }],
|
||||
);
|
||||
|
||||
const firstPage = await service.list({ filter: TASK_FILTER });
|
||||
|
||||
expect(firstPage).toEqual({
|
||||
items: [
|
||||
{ id: '3', createdAt: new Date('2026-06-24T03:00:00.000Z') },
|
||||
{ id: '2', createdAt: new Date('2026-06-24T02:00:00.000Z') },
|
||||
],
|
||||
truncated: true,
|
||||
nextCursor: expect.any(String),
|
||||
});
|
||||
|
||||
expect(listAllRows).toHaveBeenNthCalledWith(1, TASK_FILTER, 3, undefined);
|
||||
|
||||
const secondPage = await service.list({ filter: TASK_FILTER, cursor: firstPage.nextCursor });
|
||||
expect(secondPage).toEqual({
|
||||
items: [{ id: '1', createdAt: new Date('2026-06-24T01:00:00.000Z') }],
|
||||
truncated: false,
|
||||
});
|
||||
expect(listAllRows).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
TASK_FILTER,
|
||||
3,
|
||||
expect.objectContaining({ id: '2' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects invalid cursors instead of falling back to the first page', async () => {
|
||||
const service = makeService();
|
||||
stubRows(service, [{ id: '1' }]);
|
||||
|
||||
await expect(service.list({ filter: TASK_FILTER, cursor: 'not-base64-json' })).rejects.toThrow(
|
||||
'Invalid federation list cursor',
|
||||
);
|
||||
});
|
||||
|
||||
it('throws when a truncated page cannot encode a resumable cursor', async () => {
|
||||
const service = makeService();
|
||||
stubRows(service, [
|
||||
{ id: '2', createdAt: 'not-a-date' },
|
||||
{ id: '1', createdAt: 'not-a-date' },
|
||||
]);
|
||||
|
||||
await expect(service.list({ filter: { ...TASK_FILTER, limit: 1 } })).rejects.toThrow(
|
||||
'Federation list cursor cannot be encoded',
|
||||
);
|
||||
});
|
||||
|
||||
it('throws on unsupported resources instead of crashing pagination', async () => {
|
||||
const service = makeService();
|
||||
|
||||
await expect(
|
||||
service.list({
|
||||
filter: {
|
||||
...TASK_FILTER,
|
||||
resource: 'unknown-resource' as FederationScopeQueryFilter['resource'],
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow('Unsupported federation list resource');
|
||||
});
|
||||
|
||||
it('does not leak another user mission task notes through team-scoped note reads', async () => {
|
||||
const service = makeDbService();
|
||||
|
||||
const result = await service.list({
|
||||
filter: {
|
||||
resource: 'notes',
|
||||
subjectUserId: SUBJECT_USER_ID,
|
||||
includePersonal: false,
|
||||
teamIds: [TEAM_ID],
|
||||
limit: 10,
|
||||
maxRowsPerQuery: 10,
|
||||
},
|
||||
});
|
||||
|
||||
const ids = result.items.map((item) => item['id']);
|
||||
expect(ids).toEqual([SUBJECT_TEAM_NOTE_ID]);
|
||||
expect(ids).not.toContain(OTHER_TEAM_NOTE_ID);
|
||||
});
|
||||
|
||||
it('does not return subject personal mission task notes when includePersonal is false', async () => {
|
||||
const service = makeDbService();
|
||||
|
||||
const result = await service.list({
|
||||
filter: {
|
||||
resource: 'notes',
|
||||
subjectUserId: SUBJECT_USER_ID,
|
||||
includePersonal: false,
|
||||
teamIds: [TEAM_ID],
|
||||
limit: 10,
|
||||
maxRowsPerQuery: 10,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.items.map((item) => item['id'])).not.toContain(SUBJECT_PERSONAL_NOTE_ID);
|
||||
});
|
||||
|
||||
it('does not return subject notes from missions outside the grant-visible project set', async () => {
|
||||
const service = makeDbService();
|
||||
|
||||
const result = await service.list({
|
||||
filter: {
|
||||
resource: 'notes',
|
||||
subjectUserId: SUBJECT_USER_ID,
|
||||
includePersonal: true,
|
||||
teamIds: [TEAM_ID],
|
||||
limit: 10,
|
||||
maxRowsPerQuery: 10,
|
||||
},
|
||||
});
|
||||
|
||||
const ids = result.items.map((item) => item['id']);
|
||||
expect(ids).toContain(SUBJECT_PERSONAL_NOTE_ID);
|
||||
expect(ids).toContain(SUBJECT_TEAM_NOTE_ID);
|
||||
expect(ids).not.toContain(SUBJECT_UNAUTHORIZED_NOTE_ID);
|
||||
expect(ids).not.toContain(OTHER_TEAM_NOTE_ID);
|
||||
});
|
||||
|
||||
it('paginates memory deterministically across insights and preferences', async () => {
|
||||
const service = makeDbService();
|
||||
const filter: FederationScopeQueryFilter = {
|
||||
resource: 'memory',
|
||||
subjectUserId: SUBJECT_USER_ID,
|
||||
includePersonal: true,
|
||||
teamIds: [],
|
||||
limit: 2,
|
||||
maxRowsPerQuery: 2,
|
||||
};
|
||||
|
||||
const firstPage = await service.list({ filter });
|
||||
const secondPage = await service.list({ filter, cursor: firstPage.nextCursor });
|
||||
const firstPageIds = firstPage.items.map((item) => item['id']);
|
||||
const secondPageIds = secondPage.items.map((item) => item['id']);
|
||||
const allIds = [...firstPageIds, ...secondPageIds];
|
||||
|
||||
expect(firstPage).toMatchObject({ truncated: true, nextCursor: expect.any(String) });
|
||||
expect(firstPageIds).toEqual([INSIGHT_TWO_ID, INSIGHT_ONE_ID]);
|
||||
expect(secondPageIds).toEqual([PREFERENCE_TWO_ID, PREFERENCE_ONE_ID]);
|
||||
expect(new Set(allIds).size).toBe(allIds.length);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,188 @@
|
||||
import 'reflect-metadata';
|
||||
import { RequestMethod } from '@nestjs/common';
|
||||
import type { FastifyRequest } from 'fastify';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { FederationAuthGuard } from '../../federation-auth.guard.js';
|
||||
import type {
|
||||
FederationScopeEvaluationResult,
|
||||
FederationScopeQueryFilter,
|
||||
} from '../../scope.service.js';
|
||||
import { ListController } from '../list.controller.js';
|
||||
import type { FederationListQueryResult } from '../list-query.service.js';
|
||||
|
||||
const FEDERATION_CONTEXT = {
|
||||
grantId: 'grant-1',
|
||||
peerId: 'peer-1',
|
||||
subjectUserId: 'user-1',
|
||||
scope: { resources: ['tasks'], max_rows_per_query: 25 },
|
||||
};
|
||||
|
||||
const TASK_FILTER: FederationScopeQueryFilter = {
|
||||
resource: 'tasks',
|
||||
subjectUserId: 'user-1',
|
||||
includePersonal: true,
|
||||
teamIds: ['team-1'],
|
||||
limit: 10,
|
||||
maxRowsPerQuery: 25,
|
||||
};
|
||||
|
||||
function makeRequest(): FastifyRequest {
|
||||
return { federationContext: FEDERATION_CONTEXT } as unknown as FastifyRequest;
|
||||
}
|
||||
|
||||
function allowedScope(
|
||||
filter: FederationScopeQueryFilter = TASK_FILTER,
|
||||
): FederationScopeEvaluationResult {
|
||||
return { allowed: true, filter };
|
||||
}
|
||||
|
||||
function makeController(opts?: {
|
||||
scopeResult?: FederationScopeEvaluationResult;
|
||||
queryResult?: FederationListQueryResult;
|
||||
}) {
|
||||
const scope = {
|
||||
evaluateAccess: vi.fn().mockResolvedValue(opts?.scopeResult ?? allowedScope()),
|
||||
};
|
||||
const query = {
|
||||
evaluateReadAccess: vi.fn(),
|
||||
list: vi.fn().mockResolvedValue(
|
||||
opts?.queryResult ?? {
|
||||
items: [
|
||||
{
|
||||
id: 'task-1',
|
||||
title: 'Federated task',
|
||||
createdAt: new Date('2026-06-24T00:00:00.000Z'),
|
||||
},
|
||||
],
|
||||
truncated: false,
|
||||
},
|
||||
),
|
||||
};
|
||||
|
||||
return {
|
||||
controller: new ListController(scope as never, query as never),
|
||||
scope,
|
||||
query,
|
||||
};
|
||||
}
|
||||
|
||||
describe('ListController', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('declares POST /api/federation/v1/list/:resource protected only by FederationAuthGuard', () => {
|
||||
expect(Reflect.getMetadata('path', ListController)).toBe('api/federation/v1/list');
|
||||
expect(Reflect.getMetadata('path', ListController.prototype.list)).toBe(':resource');
|
||||
expect(Reflect.getMetadata('method', ListController.prototype.list)).toBe(RequestMethod.POST);
|
||||
expect(Reflect.getMetadata('__guards__', ListController)).toEqual([FederationAuthGuard]);
|
||||
});
|
||||
|
||||
it('runs AuthGuard context through ScopeService and returns local-source tagged rows', async () => {
|
||||
const { controller, scope, query } = makeController();
|
||||
|
||||
const response = await controller.list('tasks', makeRequest(), { limit: 10 });
|
||||
|
||||
expect(scope.evaluateAccess).toHaveBeenCalledWith({
|
||||
context: FEDERATION_CONTEXT,
|
||||
resource: 'tasks',
|
||||
requestedLimit: 10,
|
||||
nativeRbac: query,
|
||||
});
|
||||
expect(query.list).toHaveBeenCalledWith({ filter: TASK_FILTER, cursor: undefined });
|
||||
expect(response).toEqual({
|
||||
items: [
|
||||
{
|
||||
id: 'task-1',
|
||||
title: 'Federated task',
|
||||
createdAt: new Date('2026-06-24T00:00:00.000Z'),
|
||||
_source: 'local',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('preserves pagination metadata when row cap truncates the query layer result', async () => {
|
||||
const { controller } = makeController({
|
||||
queryResult: {
|
||||
items: [{ id: 'task-1' }],
|
||||
nextCursor: 'cursor-2',
|
||||
truncated: true,
|
||||
},
|
||||
});
|
||||
|
||||
const response = await controller.list('tasks', makeRequest(), { cursor: 'cursor-1' });
|
||||
|
||||
expect(response).toEqual({
|
||||
items: [{ id: 'task-1', _source: 'local' }],
|
||||
nextCursor: 'cursor-2',
|
||||
_truncated: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a federation error envelope when auth guard context is missing', async () => {
|
||||
const { controller, scope, query } = makeController();
|
||||
|
||||
await expect(
|
||||
controller.list('tasks', {} as unknown as FastifyRequest, {}),
|
||||
).rejects.toMatchObject({
|
||||
response: {
|
||||
error: {
|
||||
code: 'unauthorized',
|
||||
message: 'Federation context missing',
|
||||
},
|
||||
},
|
||||
status: 401,
|
||||
});
|
||||
expect(scope.evaluateAccess).not.toHaveBeenCalled();
|
||||
expect(query.list).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('returns a federation error envelope when scope evaluation denies access', async () => {
|
||||
const { controller, query } = makeController({
|
||||
scopeResult: {
|
||||
allowed: false,
|
||||
deny: {
|
||||
code: 'resource_excluded',
|
||||
stage: 'resource_exclusion',
|
||||
statusCode: 403,
|
||||
message: 'Requested federation resource is explicitly excluded by grant scope',
|
||||
grantId: 'grant-1',
|
||||
peerId: 'peer-1',
|
||||
subjectUserId: 'user-1',
|
||||
resource: 'credentials',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await expect(controller.list('credentials', makeRequest(), {})).rejects.toMatchObject({
|
||||
response: {
|
||||
error: {
|
||||
code: 'scope_violation',
|
||||
message: 'Requested federation resource is explicitly excluded by grant scope',
|
||||
},
|
||||
},
|
||||
status: 403,
|
||||
});
|
||||
expect(query.list).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects malformed request body fields before querying storage', async () => {
|
||||
const { controller, scope, query } = makeController();
|
||||
|
||||
await expect(controller.list('tasks', makeRequest(), { cursor: 123 })).rejects.toMatchObject({
|
||||
response: { error: { code: 'invalid_request' } },
|
||||
status: 400,
|
||||
});
|
||||
await expect(controller.list('tasks', makeRequest(), { limit: false })).rejects.toMatchObject({
|
||||
response: { error: { code: 'invalid_request' } },
|
||||
status: 400,
|
||||
});
|
||||
await expect(controller.list('tasks', makeRequest(), { limit: 'abc' })).rejects.toMatchObject({
|
||||
response: { error: { code: 'invalid_request' } },
|
||||
status: 400,
|
||||
});
|
||||
expect(scope.evaluateAccess).not.toHaveBeenCalled();
|
||||
expect(query.list).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
408
apps/gateway/src/federation/server/verbs/list-query.service.ts
Normal file
408
apps/gateway/src/federation/server/verbs/list-query.service.ts
Normal file
@@ -0,0 +1,408 @@
|
||||
/**
|
||||
* Federation list query layer (FED-M3-05).
|
||||
*
|
||||
* Read-only DB adapter used by ListController after FederationAuthGuard and
|
||||
* FederationScopeService have established the subject user, allowed resource,
|
||||
* native-RBAC intersection, and row cap. Audit writes are intentionally
|
||||
* deferred to M4.
|
||||
*/
|
||||
|
||||
import { Inject, Injectable } from '@nestjs/common';
|
||||
import {
|
||||
and,
|
||||
desc,
|
||||
eq,
|
||||
inArray,
|
||||
insights,
|
||||
isNotNull,
|
||||
lt,
|
||||
missionTasks,
|
||||
missions,
|
||||
or,
|
||||
preferences,
|
||||
projects,
|
||||
tasks,
|
||||
teamMembers,
|
||||
type Db,
|
||||
} from '@mosaicstack/db';
|
||||
import type {
|
||||
FederationNativeRbacEvaluator,
|
||||
FederationNativeRbacRequest,
|
||||
FederationNativeRbacResult,
|
||||
FederationScopeQueryFilter,
|
||||
} from '../scope.service.js';
|
||||
import { DB } from '../../../database/database.module.js';
|
||||
|
||||
export interface FederationListQueryRequest {
|
||||
readonly filter: FederationScopeQueryFilter;
|
||||
readonly cursor?: string;
|
||||
}
|
||||
|
||||
export interface FederationListQueryResult<T extends object = Record<string, unknown>> {
|
||||
readonly items: T[];
|
||||
readonly nextCursor?: string;
|
||||
readonly truncated: boolean;
|
||||
}
|
||||
|
||||
type CursorSource = 'insights' | 'preferences';
|
||||
const CURSOR_SOURCE = Symbol('federationCursorSource');
|
||||
|
||||
type RowObject = Record<string, unknown> & { readonly [CURSOR_SOURCE]?: CursorSource };
|
||||
|
||||
interface KeysetCursor {
|
||||
readonly createdAt: Date;
|
||||
readonly id: string;
|
||||
readonly source?: CursorSource;
|
||||
}
|
||||
|
||||
function encodeCursor(row: RowObject): string {
|
||||
const createdAt = row['createdAt'];
|
||||
const id = row['id'];
|
||||
if (!(createdAt instanceof Date) || typeof id !== 'string') {
|
||||
throw new Error('Federation list cursor cannot be encoded');
|
||||
}
|
||||
|
||||
const source = row[CURSOR_SOURCE];
|
||||
return Buffer.from(
|
||||
JSON.stringify({ createdAt: createdAt.toISOString(), id, ...(source ? { source } : {}) }),
|
||||
'utf8',
|
||||
).toString('base64url');
|
||||
}
|
||||
|
||||
function decodeCursor(cursor: string | undefined): KeysetCursor | undefined {
|
||||
if (cursor === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf8')) as unknown;
|
||||
if (typeof parsed !== 'object' || parsed === null) {
|
||||
throw new Error('cursor must be an object');
|
||||
}
|
||||
|
||||
const { createdAt, id, source } = parsed as {
|
||||
createdAt?: unknown;
|
||||
id?: unknown;
|
||||
source?: unknown;
|
||||
};
|
||||
if (typeof createdAt !== 'string' || typeof id !== 'string' || id.length === 0) {
|
||||
throw new Error('cursor is missing createdAt or id');
|
||||
}
|
||||
if (source !== undefined && source !== 'insights' && source !== 'preferences') {
|
||||
throw new Error('cursor source is invalid');
|
||||
}
|
||||
|
||||
const date = new Date(createdAt);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
throw new Error('cursor createdAt is invalid');
|
||||
}
|
||||
|
||||
return { createdAt: date, id, ...(source ? { source } : {}) };
|
||||
} catch {
|
||||
throw new Error('Invalid federation list cursor');
|
||||
}
|
||||
}
|
||||
|
||||
function paginate<T extends RowObject>(rows: T[], limit: number): FederationListQueryResult<T> {
|
||||
const page = rows.slice(0, limit);
|
||||
const hasMore = rows.length > limit;
|
||||
const nextCursor = hasMore ? encodeCursor(page[page.length - 1] ?? {}) : undefined;
|
||||
|
||||
return {
|
||||
items: page,
|
||||
truncated: hasMore,
|
||||
...(nextCursor !== undefined ? { nextCursor } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function markCursorSource<T extends RowObject>(row: T, source: CursorSource): T {
|
||||
Object.defineProperty(row, CURSOR_SOURCE, {
|
||||
value: source,
|
||||
enumerable: false,
|
||||
configurable: false,
|
||||
});
|
||||
return row;
|
||||
}
|
||||
|
||||
function sortRows(rows: RowObject[]): RowObject[] {
|
||||
return [...rows].sort((a, b) => {
|
||||
const aTime = a['createdAt'] instanceof Date ? a['createdAt'].getTime() : 0;
|
||||
const bTime = b['createdAt'] instanceof Date ? b['createdAt'].getTime() : 0;
|
||||
if (aTime !== bTime) {
|
||||
return bTime - aTime;
|
||||
}
|
||||
return String(b['id'] ?? '').localeCompare(String(a['id'] ?? ''));
|
||||
});
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class FederationListQueryService implements FederationNativeRbacEvaluator {
|
||||
constructor(@Inject(DB) private readonly db: Db) {}
|
||||
|
||||
async evaluateReadAccess(
|
||||
request: FederationNativeRbacRequest,
|
||||
): Promise<FederationNativeRbacResult> {
|
||||
if (request.resource === 'credentials' || request.resource === 'api_keys') {
|
||||
return {
|
||||
allowed: false,
|
||||
reason: `${request.resource} federation list access is not implemented in M3`,
|
||||
details: { resource: request.resource },
|
||||
};
|
||||
}
|
||||
|
||||
if (request.resource === 'memory') {
|
||||
return { allowed: true, access: { includePersonal: true, teamIds: [] } };
|
||||
}
|
||||
|
||||
const teamIds = await this.listSubjectTeamIds(request.subjectUserId);
|
||||
return { allowed: true, access: { includePersonal: true, teamIds } };
|
||||
}
|
||||
|
||||
async list<T extends RowObject = RowObject>(
|
||||
request: FederationListQueryRequest,
|
||||
): Promise<FederationListQueryResult<T>> {
|
||||
const cursor = decodeCursor(request.cursor);
|
||||
const rows = await this.listAllRows(request.filter, request.filter.limit + 1, cursor);
|
||||
return paginate(rows as T[], request.filter.limit);
|
||||
}
|
||||
|
||||
private async listAllRows(
|
||||
filter: FederationScopeQueryFilter,
|
||||
rowLimit: number,
|
||||
cursor: KeysetCursor | undefined,
|
||||
): Promise<RowObject[]> {
|
||||
switch (filter.resource) {
|
||||
case 'tasks':
|
||||
return this.listTasks(filter, rowLimit, cursor);
|
||||
case 'notes':
|
||||
return this.listNotes(filter, rowLimit, cursor);
|
||||
case 'memory':
|
||||
return this.listMemory(filter, rowLimit, cursor);
|
||||
case 'credentials':
|
||||
case 'api_keys':
|
||||
return [];
|
||||
default:
|
||||
throw new Error(`Unsupported federation list resource: ${String(filter.resource)}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async listSubjectTeamIds(subjectUserId: string): Promise<string[]> {
|
||||
const rows = await this.db
|
||||
.select({ teamId: teamMembers.teamId })
|
||||
.from(teamMembers)
|
||||
.where(eq(teamMembers.userId, subjectUserId));
|
||||
|
||||
return rows.map((row) => row.teamId);
|
||||
}
|
||||
|
||||
private async listAccessibleProjectIds(filter: FederationScopeQueryFilter): Promise<string[]> {
|
||||
const clauses = [];
|
||||
if (filter.includePersonal) {
|
||||
clauses.push(and(eq(projects.ownerType, 'user'), eq(projects.ownerId, filter.subjectUserId)));
|
||||
}
|
||||
if (filter.teamIds.length > 0) {
|
||||
clauses.push(
|
||||
and(eq(projects.ownerType, 'team'), inArray(projects.teamId, [...filter.teamIds])),
|
||||
);
|
||||
}
|
||||
|
||||
if (clauses.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const rows = await this.db
|
||||
.select({ id: projects.id })
|
||||
.from(projects)
|
||||
.where(clauses.length === 1 ? clauses[0] : or(...clauses));
|
||||
|
||||
return rows.map((row) => row.id);
|
||||
}
|
||||
|
||||
private async listMissionIds(projectIds: readonly string[]): Promise<string[]> {
|
||||
if (projectIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const rows = await this.db
|
||||
.select({ id: missions.id })
|
||||
.from(missions)
|
||||
.where(inArray(missions.projectId, [...projectIds]));
|
||||
|
||||
return rows.map((row) => row.id);
|
||||
}
|
||||
|
||||
private async listTasks(
|
||||
filter: FederationScopeQueryFilter,
|
||||
rowLimit: number,
|
||||
cursor: KeysetCursor | undefined,
|
||||
): Promise<RowObject[]> {
|
||||
const projectIds = await this.listAccessibleProjectIds(filter);
|
||||
const missionIds = await this.listMissionIds(projectIds);
|
||||
const clauses = [];
|
||||
|
||||
if (projectIds.length > 0) {
|
||||
clauses.push(inArray(tasks.projectId, projectIds));
|
||||
}
|
||||
if (missionIds.length > 0) {
|
||||
clauses.push(inArray(tasks.missionId, missionIds));
|
||||
}
|
||||
|
||||
if (clauses.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const scopeClause = clauses.length === 1 ? clauses[0] : or(...clauses);
|
||||
const cursorClause = cursor
|
||||
? or(
|
||||
lt(tasks.createdAt, cursor.createdAt),
|
||||
and(eq(tasks.createdAt, cursor.createdAt), lt(tasks.id, cursor.id)),
|
||||
)
|
||||
: undefined;
|
||||
|
||||
const rows = await this.db
|
||||
.select({
|
||||
id: tasks.id,
|
||||
title: tasks.title,
|
||||
description: tasks.description,
|
||||
status: tasks.status,
|
||||
priority: tasks.priority,
|
||||
projectId: tasks.projectId,
|
||||
missionId: tasks.missionId,
|
||||
assignee: tasks.assignee,
|
||||
tags: tasks.tags,
|
||||
dueDate: tasks.dueDate,
|
||||
metadata: tasks.metadata,
|
||||
createdAt: tasks.createdAt,
|
||||
updatedAt: tasks.updatedAt,
|
||||
})
|
||||
.from(tasks)
|
||||
.where(and(scopeClause, cursorClause))
|
||||
.orderBy(desc(tasks.createdAt), desc(tasks.id))
|
||||
.limit(rowLimit);
|
||||
|
||||
return sortRows(rows as RowObject[]);
|
||||
}
|
||||
|
||||
private async listNotes(
|
||||
filter: FederationScopeQueryFilter,
|
||||
rowLimit: number,
|
||||
cursor: KeysetCursor | undefined,
|
||||
): Promise<RowObject[]> {
|
||||
const projectIds = await this.listAccessibleProjectIds(filter);
|
||||
const missionIds = await this.listMissionIds(projectIds);
|
||||
|
||||
if (missionIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// mission_tasks rows are user-scoped even when the mission belongs to a team.
|
||||
// Team visibility can narrow the mission set, but it must never widen the
|
||||
// query to other users' mission task notes.
|
||||
const scopeClause = and(
|
||||
eq(missionTasks.userId, filter.subjectUserId),
|
||||
inArray(missionTasks.missionId, missionIds),
|
||||
);
|
||||
const cursorClause = cursor
|
||||
? or(
|
||||
lt(missionTasks.createdAt, cursor.createdAt),
|
||||
and(eq(missionTasks.createdAt, cursor.createdAt), lt(missionTasks.id, cursor.id)),
|
||||
)
|
||||
: undefined;
|
||||
|
||||
const rows = await this.db
|
||||
.select({
|
||||
id: missionTasks.id,
|
||||
missionId: missionTasks.missionId,
|
||||
taskId: missionTasks.taskId,
|
||||
status: missionTasks.status,
|
||||
content: missionTasks.notes,
|
||||
createdAt: missionTasks.createdAt,
|
||||
updatedAt: missionTasks.updatedAt,
|
||||
})
|
||||
.from(missionTasks)
|
||||
.where(and(scopeClause, cursorClause, isNotNull(missionTasks.notes)))
|
||||
.orderBy(desc(missionTasks.createdAt), desc(missionTasks.id))
|
||||
.limit(rowLimit);
|
||||
|
||||
return sortRows(rows.filter((row) => row.content !== '') as RowObject[]);
|
||||
}
|
||||
|
||||
private async listMemory(
|
||||
filter: FederationScopeQueryFilter,
|
||||
rowLimit: number,
|
||||
cursor: KeysetCursor | undefined,
|
||||
): Promise<RowObject[]> {
|
||||
if (!filter.includePersonal) {
|
||||
return [];
|
||||
}
|
||||
if (cursor && cursor.source === undefined) {
|
||||
throw new Error('Invalid federation list cursor');
|
||||
}
|
||||
|
||||
const rows: RowObject[] = [];
|
||||
|
||||
// Memory spans two physical tables. To keep pagination deterministic and
|
||||
// resumable without a SQL UNION, M3 emits a fixed block order: all insights
|
||||
// first, then preferences. The opaque cursor records which table produced
|
||||
// the boundary row, so the next page never re-applies one table's keyset to
|
||||
// the other table (which could duplicate/skip rows at equal timestamps).
|
||||
if (cursor?.source !== 'preferences') {
|
||||
const insightCursorClause = cursor
|
||||
? or(
|
||||
lt(insights.createdAt, cursor.createdAt),
|
||||
and(eq(insights.createdAt, cursor.createdAt), lt(insights.id, cursor.id)),
|
||||
)
|
||||
: undefined;
|
||||
const insightRows = await this.db
|
||||
.select({
|
||||
id: insights.id,
|
||||
kind: insights.source,
|
||||
content: insights.content,
|
||||
category: insights.category,
|
||||
relevanceScore: insights.relevanceScore,
|
||||
metadata: insights.metadata,
|
||||
createdAt: insights.createdAt,
|
||||
updatedAt: insights.updatedAt,
|
||||
})
|
||||
.from(insights)
|
||||
.where(and(eq(insights.userId, filter.subjectUserId), insightCursorClause))
|
||||
.orderBy(desc(insights.createdAt), desc(insights.id))
|
||||
.limit(rowLimit);
|
||||
|
||||
rows.push(...(insightRows as RowObject[]).map((row) => markCursorSource(row, 'insights')));
|
||||
}
|
||||
|
||||
const remaining = rowLimit - rows.length;
|
||||
if (remaining <= 0) {
|
||||
return rows;
|
||||
}
|
||||
|
||||
const preferenceCursorClause =
|
||||
cursor?.source === 'preferences'
|
||||
? or(
|
||||
lt(preferences.createdAt, cursor.createdAt),
|
||||
and(eq(preferences.createdAt, cursor.createdAt), lt(preferences.id, cursor.id)),
|
||||
)
|
||||
: undefined;
|
||||
const preferenceRows = await this.db
|
||||
.select({
|
||||
id: preferences.id,
|
||||
kind: preferences.category,
|
||||
key: preferences.key,
|
||||
value: preferences.value,
|
||||
source: preferences.source,
|
||||
mutable: preferences.mutable,
|
||||
createdAt: preferences.createdAt,
|
||||
updatedAt: preferences.updatedAt,
|
||||
})
|
||||
.from(preferences)
|
||||
.where(and(eq(preferences.userId, filter.subjectUserId), preferenceCursorClause))
|
||||
.orderBy(desc(preferences.createdAt), desc(preferences.id))
|
||||
.limit(remaining);
|
||||
|
||||
rows.push(
|
||||
...(preferenceRows as RowObject[]).map((row) => markCursorSource(row, 'preferences')),
|
||||
);
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
147
apps/gateway/src/federation/server/verbs/list.controller.ts
Normal file
147
apps/gateway/src/federation/server/verbs/list.controller.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
/**
|
||||
* Federation list verb (FED-M3-05).
|
||||
*
|
||||
* POST /api/federation/v1/list/:resource
|
||||
*
|
||||
* Pipeline: FederationAuthGuard attaches the active grant context, then
|
||||
* FederationScopeService enforces grant scope + native RBAC intersection, then
|
||||
* the read-only query layer returns capped rows tagged with `_source`. Read
|
||||
* audit-log writes are deferred to M4; this controller does not persist request
|
||||
* or response bodies.
|
||||
*/
|
||||
|
||||
import {
|
||||
Body,
|
||||
Controller,
|
||||
HttpException,
|
||||
Inject,
|
||||
Param,
|
||||
Post,
|
||||
Req,
|
||||
UseGuards,
|
||||
} from '@nestjs/common';
|
||||
import type { FastifyRequest } from 'fastify';
|
||||
import {
|
||||
FederationInvalidRequestError,
|
||||
FederationScopeViolationError,
|
||||
FederationUnauthorizedError,
|
||||
SOURCE_LOCAL,
|
||||
tagWithSource,
|
||||
type FederationListResponse,
|
||||
type SourceTag,
|
||||
} from '@mosaicstack/types';
|
||||
import { FederationAuthGuard } from '../federation-auth.guard.js';
|
||||
import '../federation-context.js';
|
||||
import { FederationScopeService } from '../scope.service.js';
|
||||
import { FederationListQueryService } from './list-query.service.js';
|
||||
|
||||
interface FederationListRequestBody {
|
||||
readonly limit?: unknown;
|
||||
readonly cursor?: unknown;
|
||||
}
|
||||
|
||||
type FederatedRow = Record<string, unknown> & SourceTag;
|
||||
|
||||
function parseLimit(body: FederationListRequestBody | undefined): number | undefined {
|
||||
if (body?.limit === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const parsed =
|
||||
typeof body.limit === 'number'
|
||||
? body.limit
|
||||
: typeof body.limit === 'string' && body.limit.trim().length > 0
|
||||
? Number(body.limit)
|
||||
: Number.NaN;
|
||||
|
||||
if (!Number.isSafeInteger(parsed) || parsed < 1) {
|
||||
throw new HttpException(
|
||||
new FederationInvalidRequestError(
|
||||
'Federation list limit must be a positive integer',
|
||||
).toEnvelope(),
|
||||
400,
|
||||
);
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function parseCursor(body: FederationListRequestBody | undefined): string | undefined {
|
||||
if (body?.cursor === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
if (typeof body.cursor === 'string') {
|
||||
return body.cursor;
|
||||
}
|
||||
throw new HttpException(
|
||||
new FederationInvalidRequestError('Federation list cursor must be a string').toEnvelope(),
|
||||
400,
|
||||
);
|
||||
}
|
||||
|
||||
@Controller('api/federation/v1/list')
|
||||
@UseGuards(FederationAuthGuard)
|
||||
export class ListController {
|
||||
constructor(
|
||||
@Inject(FederationScopeService) private readonly scope: FederationScopeService,
|
||||
@Inject(FederationListQueryService) private readonly query: FederationListQueryService,
|
||||
) {}
|
||||
|
||||
@Post(':resource')
|
||||
async list(
|
||||
@Param('resource') resource: string,
|
||||
@Req() request: FastifyRequest,
|
||||
@Body() body?: FederationListRequestBody,
|
||||
): Promise<FederationListResponse<FederatedRow>> {
|
||||
if (!request.federationContext) {
|
||||
throw new HttpException(
|
||||
new FederationUnauthorizedError('Federation context missing').toEnvelope(),
|
||||
401,
|
||||
);
|
||||
}
|
||||
|
||||
const requestedLimit = parseLimit(body);
|
||||
const cursor = parseCursor(body);
|
||||
const scopeResult = await this.scope.evaluateAccess({
|
||||
context: request.federationContext,
|
||||
resource,
|
||||
requestedLimit,
|
||||
nativeRbac: this.query,
|
||||
});
|
||||
|
||||
if (!scopeResult.allowed) {
|
||||
const ErrorClass =
|
||||
scopeResult.deny.statusCode === 400
|
||||
? FederationInvalidRequestError
|
||||
: FederationScopeViolationError;
|
||||
throw new HttpException(
|
||||
new ErrorClass(scopeResult.deny.message, scopeResult.deny).toEnvelope(),
|
||||
scopeResult.deny.statusCode,
|
||||
);
|
||||
}
|
||||
|
||||
let result: Awaited<ReturnType<FederationListQueryService['list']>>;
|
||||
try {
|
||||
result = await this.query.list({ filter: scopeResult.filter, cursor });
|
||||
} catch (error: unknown) {
|
||||
if (error instanceof Error && error.message === 'Invalid federation list cursor') {
|
||||
throw new HttpException(
|
||||
new FederationInvalidRequestError('Federation list cursor is invalid').toEnvelope(),
|
||||
400,
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
const response: FederationListResponse<FederatedRow> = {
|
||||
items: tagWithSource(result.items, SOURCE_LOCAL),
|
||||
};
|
||||
if (result.nextCursor !== undefined) {
|
||||
response.nextCursor = result.nextCursor;
|
||||
}
|
||||
if (result.truncated) {
|
||||
response._truncated = true;
|
||||
}
|
||||
return response;
|
||||
}
|
||||
}
|
||||
52
docs/scratchpads/FED-M3-05-list-verb.md
Normal file
52
docs/scratchpads/FED-M3-05-list-verb.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# FED-M3-05 — Federation List Verb Scratchpad
|
||||
|
||||
## Objective
|
||||
|
||||
Implement `POST /api/federation/v1/list/:resource`.
|
||||
|
||||
## Scope
|
||||
|
||||
- Wire `FederationAuthGuard` → `FederationScopeService` → read-only list query layer.
|
||||
- Apply `max_rows_per_query` row cap and return pagination metadata when truncated.
|
||||
- Tag returned rows with `_source: "local"`.
|
||||
- Keep audit writes deferred to M4.
|
||||
- No request/response body persistence.
|
||||
|
||||
## Base / branch
|
||||
|
||||
- Branch: `feat/federation-m3-verb-list`
|
||||
- Base: `main` after M3-04 scope service merged via PR #672 (`c739256a`).
|
||||
|
||||
## Implementation notes
|
||||
|
||||
- Added `ListController` under `apps/gateway/src/federation/server/verbs/`.
|
||||
- Added `FederationListQueryService` as the read-only query layer and native RBAC evaluator.
|
||||
- Query resources supported in M3 list path:
|
||||
- `tasks`: project/mission scoped tasks visible through personal/team project access.
|
||||
- `notes`: non-empty `mission_tasks.notes` rows visible through personal/team mission access.
|
||||
- `memory`: user-owned `insights` and `preferences` rows.
|
||||
- `credentials` / `api_keys`: denied by native RBAC in M3 even if present in scope; sensitive-resource implementation is not part of FED-M3-05.
|
||||
- Cursor pagination uses an opaque base64url keyset cursor over `(createdAt, id)`; DB reads fetch at most `limit + 1` rows per resource query.
|
||||
- Reviewer isolation fix: `mission_tasks.notes` rows are always constrained by `missionTasks.userId = subjectUserId` and accessible mission IDs; team scope narrows missions but never widens to other users' mission task notes.
|
||||
- Follow-up review fix: memory listing now uses deterministic table-block pagination (`insights` first, then `preferences`) with cursor source metadata, so one table's cursor is never applied to the other.
|
||||
- Follow-up hardening: missing auth-guard context returns a structured federation `unauthorized` envelope; unsupported resources and non-encodable truncated cursors throw instead of silently crashing/truncating.
|
||||
|
||||
## Tests
|
||||
|
||||
- `pnpm --filter @mosaicstack/gateway test -- list.controller.spec.ts list-query.service.spec.ts` — PASS (16 tests, including PGlite regression coverage for team-scoped notes isolation, unauthorized mission notes exclusion, `includePersonal: false`, deterministic memory pagination, missing context envelope, unsupported resource, and cursor encode failure).
|
||||
- `pnpm --filter @mosaicstack/gateway typecheck` — PASS.
|
||||
- `pnpm --filter @mosaicstack/gateway lint` — PASS.
|
||||
- `pnpm format:check` — PASS.
|
||||
- `pnpm typecheck` — PASS (41/41 turbo tasks).
|
||||
- `pnpm lint` — PASS (23/23 turbo tasks).
|
||||
- `pnpm --filter @mosaicstack/gateway test` — FAIL in pre-existing/live-DB integration suite: `apps/gateway/src/__tests__/cross-user-isolation.test.ts` cleanup cannot connect to local PostgreSQL on `localhost:5433`. New list tests pass; failure is outside FED-M3-05.
|
||||
|
||||
## Review evidence
|
||||
|
||||
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — PASS after follow-up remediation; approve, no findings.
|
||||
- `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — PASS after follow-up remediation; risk level none, no findings.
|
||||
- Security-review note: read-path audit logging remains intentionally deferred to M4 per orchestrator clarification and FED-M3-05 scope.
|
||||
|
||||
## Risks / follow-up
|
||||
|
||||
- Read-path audit logging remains intentionally deferred to M4.
|
||||
@@ -4,6 +4,7 @@ import { dirname, join, resolve } from 'node:path';
|
||||
import { Command } from 'commander';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
acquireRestartLock,
|
||||
addAgentToRoster,
|
||||
buildAgentSendCommand,
|
||||
buildAgentWatchAttachCommand,
|
||||
@@ -45,6 +46,8 @@ import {
|
||||
removeAgentFromRoster,
|
||||
resolveFleetPaths,
|
||||
resolvePresetFilename,
|
||||
restartLockPath,
|
||||
RESTART_LOCK_STALE_MS,
|
||||
RUNTIME_ACCEPTABLE_COMMANDS,
|
||||
serializeRosterToYaml,
|
||||
VERIFY_DEFAULT_TIMEOUT_MS,
|
||||
@@ -678,6 +681,364 @@ describe('fleet command construction', () => {
|
||||
}
|
||||
});
|
||||
|
||||
it('waits for an in-flight restart to clear before relaunching (re-entry guard)', async () => {
|
||||
const home = await tempDir();
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
rosterPath,
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
// Simulate another `mosaic fleet restart` process mid-teardown: a fresh lock
|
||||
// (recent timestamp, so it is NOT treated as stale) already held.
|
||||
const lockPath = restartLockPath(home);
|
||||
await mkdir(dirname(lockPath), { recursive: true });
|
||||
await writeFile(lockPath, `4242\n${Date.now()}\n`);
|
||||
|
||||
const events: string[] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
events.push(`run:${args[args.length - 1]}`);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
// The injected sleep stands in for time passing while we wait; the in-flight
|
||||
// restart "finishes" (releases its lock) after the first poll.
|
||||
let sleeps = 0;
|
||||
const sleepFn: SleepFn = async () => {
|
||||
sleeps += 1;
|
||||
events.push(`sleep:${sleeps}`);
|
||||
await rm(lockPath, { force: true });
|
||||
};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||
|
||||
// It must have waited at least once before issuing any systemctl restart.
|
||||
expect(sleeps).toBeGreaterThan(0);
|
||||
const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
|
||||
const firstRun = events.findIndex((e) => e.startsWith('run:'));
|
||||
expect(firstSleep).toBeGreaterThanOrEqual(0);
|
||||
expect(firstRun).toBeGreaterThan(firstSleep);
|
||||
|
||||
// And it still performs the full restart once the lock clears.
|
||||
expect(events).toContain('run:mosaic-tmux-holder.service');
|
||||
expect(events).toContain('run:mosaic-agent@coder0.service');
|
||||
|
||||
// The lock is released after the restart completes.
|
||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('breaks a stale restart lock and proceeds without waiting', async () => {
|
||||
const home = await tempDir();
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
rosterPath,
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
// A lock left behind by a crashed owner: timestamp older than the stale window.
|
||||
const lockPath = restartLockPath(home);
|
||||
await mkdir(dirname(lockPath), { recursive: true });
|
||||
await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n`);
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
const sleepFn = vi.fn<SleepFn>(async () => {});
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||
|
||||
// Stale lock is broken immediately — no waiting.
|
||||
expect(sleepFn).not.toHaveBeenCalled();
|
||||
expect(calls).toEqual([
|
||||
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
||||
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
||||
]);
|
||||
// The stale lock is gone once the restart completes.
|
||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('releases the restart lock so a subsequent restart is not blocked', async () => {
|
||||
const home = await tempDir();
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
rosterPath,
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
const sleepFn = vi.fn<SleepFn>(async () => {});
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||
|
||||
// Two sequential restarts both run fully and neither has to wait.
|
||||
expect(sleepFn).not.toHaveBeenCalled();
|
||||
expect(calls).toEqual([
|
||||
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
||||
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
||||
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
||||
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
||||
]);
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('guards the single-agent restart path behind the in-flight restart lock', async () => {
|
||||
const home = await tempDir();
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
rosterPath,
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
// A full restart is mid-flight (lock held); a single-agent restart re-enters.
|
||||
const lockPath = restartLockPath(home);
|
||||
await mkdir(dirname(lockPath), { recursive: true });
|
||||
await writeFile(lockPath, `4242\n${Date.now()}\n`);
|
||||
|
||||
const events: string[] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
events.push(`run:${args[args.length - 1]}`);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
let sleeps = 0;
|
||||
const sleepFn: SleepFn = async () => {
|
||||
sleeps += 1;
|
||||
events.push(`sleep:${sleeps}`);
|
||||
await rm(lockPath, { force: true });
|
||||
};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart', 'coder0']);
|
||||
|
||||
// The single-agent restart waits for the in-flight restart before acting.
|
||||
expect(sleeps).toBeGreaterThan(0);
|
||||
const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
|
||||
const firstRun = events.findIndex((e) => e.startsWith('run:'));
|
||||
expect(firstSleep).toBeGreaterThanOrEqual(0);
|
||||
expect(firstRun).toBeGreaterThan(firstSleep);
|
||||
// Only the named agent is restarted; the holder is untouched.
|
||||
expect(events).toContain('run:mosaic-agent@coder0.service');
|
||||
expect(events).not.toContain('run:mosaic-tmux-holder.service');
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('does not let a timed-out owner drop a lock another restart broke and re-owned', async () => {
|
||||
const home = await tempDir();
|
||||
const runDir = join(home, 'fleet', 'run');
|
||||
await mkdir(runDir, { recursive: true });
|
||||
const lockPath = restartLockPath(home);
|
||||
const tokenOf = async (): Promise<string> => {
|
||||
const raw = await readFile(lockPath, 'utf8');
|
||||
return raw.split('\n')[2]?.trim() ?? '';
|
||||
};
|
||||
const sleepFn = vi.fn<SleepFn>(async () => {});
|
||||
|
||||
// R1 acquires the lock and begins a restart that then hangs.
|
||||
const r1 = await acquireRestartLock(home, sleepFn);
|
||||
const tokenR1 = await tokenOf();
|
||||
expect(tokenR1).not.toBe('');
|
||||
|
||||
// The hung R1 leaves a stale lock: rewrite its timestamp into the past while
|
||||
// preserving R1's token — exactly the on-disk state a stuck owner leaves.
|
||||
await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n${tokenR1}\n`);
|
||||
|
||||
// R2 re-enters, sees the stale lock, and atomically takes ownership.
|
||||
const r2 = await acquireRestartLock(home, sleepFn);
|
||||
const tokenR2 = await tokenOf();
|
||||
expect(tokenR2).not.toBe(tokenR1);
|
||||
expect(sleepFn).not.toHaveBeenCalled();
|
||||
|
||||
// R1 finally finishes and releases. It must NOT delete R2's lock — otherwise
|
||||
// a third restart (R3) could acquire and interleave with R2 still running.
|
||||
await r1.release();
|
||||
expect(await tokenOf()).toBe(tokenR2);
|
||||
|
||||
// R2 releases cleanly and the lock is gone.
|
||||
await r2.release();
|
||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
|
||||
await rm(home, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('lets only one of several concurrent breakers proceed past a stale lock', async () => {
|
||||
const home = await tempDir();
|
||||
const lockPath = restartLockPath(home);
|
||||
await mkdir(dirname(lockPath), { recursive: true });
|
||||
|
||||
// A stale lock left by a crashed owner: every concurrent re-entrant restart
|
||||
// will judge it stale and try to break it at the same instant. Breaking must
|
||||
// NOT grant ownership — only the atomic re-create may — so exactly one
|
||||
// contender can ever hold the lock at a time. (The v2 fix wrote our own token
|
||||
// during the break and read it back, so two breakers each saw their own token
|
||||
// and BOTH proceeded; this guards that regression.)
|
||||
await writeFile(
|
||||
lockPath,
|
||||
`4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
|
||||
);
|
||||
|
||||
// Yielding sleep so a waiting contender lets the current owner finish and
|
||||
// release before it re-contends, instead of spinning the microtask queue.
|
||||
const sleepFn: SleepFn = async () => {
|
||||
await new Promise((res) => setTimeout(res, 0));
|
||||
};
|
||||
|
||||
let active = 0;
|
||||
let maxActive = 0;
|
||||
const tokens: string[] = [];
|
||||
const tokenOf = async (): Promise<string> => {
|
||||
const raw = await readFile(lockPath, 'utf8');
|
||||
return raw.split('\n')[2]?.trim() ?? '';
|
||||
};
|
||||
|
||||
// One "restart" = acquire the lock, do work in the critical section, release.
|
||||
const restartOnce = async (): Promise<void> => {
|
||||
const guard = await acquireRestartLock(home, sleepFn);
|
||||
active += 1;
|
||||
maxActive = Math.max(maxActive, active);
|
||||
// Record the token we own while we hold it, then yield to interleave with
|
||||
// any other contender that might (wrongly) believe it owns the lock too.
|
||||
tokens.push(await tokenOf());
|
||||
await new Promise((res) => setTimeout(res, 0));
|
||||
active -= 1;
|
||||
await guard.release();
|
||||
};
|
||||
|
||||
try {
|
||||
// Three breakers race the single stale lock simultaneously.
|
||||
await Promise.all([restartOnce(), restartOnce(), restartOnce()]);
|
||||
|
||||
// Mutual exclusion held: never two owners at once despite concurrent breaks.
|
||||
expect(maxActive).toBe(1);
|
||||
// Each acquire owned with its own distinct token — no two ever shared it.
|
||||
expect(new Set(tokens).size).toBe(3);
|
||||
// The lock is fully released at the end.
|
||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('lets exactly one of two breakers take over a stale lock while the other waits', async () => {
|
||||
const home = await tempDir();
|
||||
const lockPath = restartLockPath(home);
|
||||
await mkdir(dirname(lockPath), { recursive: true });
|
||||
|
||||
// A single stale lock both contenders will judge stale at the same instant.
|
||||
// Every transition runs under the registry mutex, so only one may take the
|
||||
// lock over; the other must observe a now-fresh owner and WAIT/re-evaluate
|
||||
// rather than also taking over. (A content-blind clobber let both believe
|
||||
// they owned it — this asserts the mutex-gated CAS takeover instead.)
|
||||
await writeFile(
|
||||
lockPath,
|
||||
`4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
|
||||
);
|
||||
|
||||
// Barrier the winner holds against until the loser has observed the lock
|
||||
// fresh and waited at least once — forcing the exact interleaving where one
|
||||
// proceeds while the other waits, deterministically rather than by timing.
|
||||
let resolveLoserWaited: () => void = () => {};
|
||||
const loserWaited = new Promise<void>((res) => {
|
||||
resolveLoserWaited = res;
|
||||
});
|
||||
let sleeps = 0;
|
||||
const sleepFn: SleepFn = async () => {
|
||||
sleeps += 1;
|
||||
resolveLoserWaited();
|
||||
await new Promise((res) => setTimeout(res, 0));
|
||||
};
|
||||
|
||||
let active = 0;
|
||||
let maxActive = 0;
|
||||
const tokens: string[] = [];
|
||||
const tokenOf = async (): Promise<string> => {
|
||||
const raw = await readFile(lockPath, 'utf8');
|
||||
return raw.split('\n')[2]?.trim() ?? '';
|
||||
};
|
||||
|
||||
let firstOwner = true;
|
||||
const restartOnce = async (): Promise<void> => {
|
||||
const guard = await acquireRestartLock(home, sleepFn);
|
||||
active += 1;
|
||||
maxActive = Math.max(maxActive, active);
|
||||
tokens.push(await tokenOf());
|
||||
if (firstOwner) {
|
||||
// Winner: keep holding the lock until the loser has waited once, so the
|
||||
// loser is guaranteed to see a FRESH owner (not the stale one) and back
|
||||
// off — proving it could not also take over.
|
||||
firstOwner = false;
|
||||
await loserWaited;
|
||||
} else {
|
||||
await new Promise((res) => setTimeout(res, 0));
|
||||
}
|
||||
active -= 1;
|
||||
await guard.release();
|
||||
};
|
||||
|
||||
try {
|
||||
// Exactly two breakers race the single stale lock.
|
||||
await Promise.all([restartOnce(), restartOnce()]);
|
||||
|
||||
// Mutual exclusion: never two owners at once (if both took over the stale
|
||||
// lock, this would be 2).
|
||||
expect(maxActive).toBe(1);
|
||||
// Both eventually owned, each with its own distinct token.
|
||||
expect(new Set(tokens).size).toBe(2);
|
||||
// The loser observed the winner's fresh lock and waited — it did NOT also
|
||||
// take over the stale lock.
|
||||
expect(sleeps).toBeGreaterThanOrEqual(1);
|
||||
// The lock is fully released at the end.
|
||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
|
||||
const home = await tempDir();
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
import { constants } from 'node:fs';
|
||||
import { access, chmod, copyFile, mkdir, readFile, unlink, writeFile } from 'node:fs/promises';
|
||||
import {
|
||||
access,
|
||||
chmod,
|
||||
copyFile,
|
||||
mkdir,
|
||||
open,
|
||||
readFile,
|
||||
stat,
|
||||
unlink,
|
||||
writeFile,
|
||||
} from 'node:fs/promises';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { homedir, hostname, userInfo } from 'node:os';
|
||||
import { dirname, join, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
@@ -533,6 +544,295 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
|
||||
return ['systemctl', '--user', action, service];
|
||||
}
|
||||
|
||||
/** Poll interval (ms) while waiting for an in-flight restart's lock to clear. */
|
||||
export const RESTART_LOCK_POLL_INTERVAL_MS = 250;
|
||||
/**
|
||||
* Maximum time (ms) a re-entrant restart waits for the in-flight restart to
|
||||
* finish before it breaks the lock and proceeds anyway. A bound is required so
|
||||
* a crashed holder of the lock can never deadlock the fleet permanently.
|
||||
*/
|
||||
export const RESTART_LOCK_MAX_WAIT_MS = 30_000;
|
||||
/**
|
||||
* Age (ms) past which a restart lock is treated as stale (its owner died
|
||||
* without releasing it) and is broken immediately rather than waited on.
|
||||
*/
|
||||
export const RESTART_LOCK_STALE_MS = 60_000;
|
||||
|
||||
/**
|
||||
* Resolves the path of the cross-process restart lock for a given Mosaic home.
|
||||
* Kept strictly under `<mosaicHome>/fleet/run` (not the heartbeat env override)
|
||||
* so the lock is scoped to the same fleet the restart acts on.
|
||||
*/
|
||||
export function restartLockPath(mosaicHome: string): string {
|
||||
return join(mosaicHome, 'fleet', 'run', 'restart.lock');
|
||||
}
|
||||
|
||||
/** A held restart lock; `release()` removes the lock file iff we still own it. */
|
||||
interface RestartGuard {
|
||||
release(): Promise<void>;
|
||||
}
|
||||
|
||||
/** Lock-file contents: pid (informational), timestamp, and a unique owner token. */
|
||||
function formatRestartLockContent(token: string): string {
|
||||
return `${process.pid}\n${Date.now()}\n${token}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the owner token (line 3) from a lock file, or null if the file is
|
||||
* missing/unreadable/tokenless. The token is what makes release and break
|
||||
* ownership-safe: a process only ever acts on a lock whose token matches its own.
|
||||
*/
|
||||
async function readRestartLockToken(lockPath: string): Promise<string | null> {
|
||||
let raw: string;
|
||||
try {
|
||||
raw = await readFile(lockPath, 'utf8');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const token = raw.split('\n')[2]?.trim();
|
||||
return token ? token : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when a lock's contents are stale: older than RESTART_LOCK_STALE_MS,
|
||||
* or unparseable (a corrupt or partially written lock left by a crashed owner).
|
||||
*/
|
||||
function isRestartLockContentStale(raw: string, now: number): boolean {
|
||||
const stampLine = raw.split('\n')[1] ?? '';
|
||||
const stamp = Number.parseInt(stampLine.trim(), 10);
|
||||
if (!Number.isFinite(stamp)) {
|
||||
return true;
|
||||
}
|
||||
return now - stamp >= RESTART_LOCK_STALE_MS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Path of the short-lived registry mutex that guards EVERY transition of the
|
||||
* restart lock (acquire, release, takeover). Held only across a few filesystem
|
||||
* ops — never across the restart itself — so contention clears in microseconds.
|
||||
*/
|
||||
function restartMutexPath(lockPath: string): string {
|
||||
return `${lockPath}.mutex`;
|
||||
}
|
||||
|
||||
/** Brief back-off between registry-mutex acquisition attempts (held microseconds). */
|
||||
const RESTART_MUTEX_RETRY_MS = 20;
|
||||
|
||||
/**
|
||||
* Staleness for the internal mutex / reclaim locks, judged by the file's mtime
|
||||
* rather than its CONTENT. `open(path, 'wx')` creates the inode (with a fresh
|
||||
* mtime) before any token/timestamp is written into it, so a content-based check
|
||||
* would momentarily see that empty file as corrupt-and-stale and could reap a
|
||||
* lock another contender is still acquiring. mtime is set atomically at creation,
|
||||
* so a just-created lock always reads as live; only a lock whose holder died and
|
||||
* stopped touching it ages past the threshold. These locks are never held across
|
||||
* the restart itself (only a couple of filesystem ops), so any mtime this old can
|
||||
* belong only to a dead holder.
|
||||
*/
|
||||
async function isRestartLockPathStale(path: string, now: number): Promise<boolean> {
|
||||
try {
|
||||
const info = await stat(path);
|
||||
return now - info.mtimeMs >= RESTART_LOCK_STALE_MS;
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return false; // Gone, not stale — the caller will re-contend.
|
||||
}
|
||||
return false; // Can't stat — treat as live and back off rather than reap.
|
||||
}
|
||||
}
|
||||
|
||||
/** Path of the reclaim lock that serializes reaping of a crashed-holder mutex. */
|
||||
function restartReclaimPath(mutexPath: string): string {
|
||||
return `${mutexPath}.reclaim`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reap a registry mutex left behind by a process that CRASHED mid-transition —
|
||||
* one whose file has aged past RESTART_LOCK_STALE_MS. Because the mutex is held
|
||||
* only for a couple of filesystem ops (no sleeps, never across the restart), a
|
||||
* mutex this old can only belong to a dead holder.
|
||||
*
|
||||
* The reap removes the dead mutex but never CREATES/holds it — acquisition stays
|
||||
* the single `open('wx')` create in {@link acquireRestartMutex}, so exactly one
|
||||
* contender wins ownership no matter how the reap and acquires interleave. The
|
||||
* removal is made conditional by a dedicated reclaim lock: while it is held the
|
||||
* dead mutex is stable (its dead holder will never touch it, and no other
|
||||
* reclaimer can race), so re-reading it and removing it only if it is STILL stale
|
||||
* is a true compare — a live holder's fresh mutex is never removed. This closes
|
||||
* the reclaim race a content-blind rename-and-restore left open (a third
|
||||
* contender slipping into the gap while a fresh mutex was moved aside).
|
||||
*/
|
||||
async function reclaimStaleRestartMutex(mutexPath: string): Promise<void> {
|
||||
const reclaimPath = restartReclaimPath(mutexPath);
|
||||
let handle: Awaited<ReturnType<typeof open>>;
|
||||
try {
|
||||
handle = await open(reclaimPath, 'wx');
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
|
||||
throw err;
|
||||
}
|
||||
// Someone is already reclaiming. If their reclaim lock is itself stale by
|
||||
// mtime, its holder crashed mid-reap (the lock spans only a stat + unlink,
|
||||
// microseconds) — clear it so a later pass can retry. Otherwise a live
|
||||
// reclaimer has it; back off. Either way we do not reap the mutex this pass.
|
||||
if (await isRestartLockPathStale(reclaimPath, Date.now())) {
|
||||
await unlink(reclaimPath).catch(() => {});
|
||||
}
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// Re-check the mutex UNDER the reclaim lock and remove it only if it is STILL
|
||||
// stale by mtime. A live holder's mutex is fresh and is left untouched; a dead
|
||||
// holder's mutex is stable here (its holder is gone and no other reclaimer can
|
||||
// race us), so this re-check is authoritative.
|
||||
if (await isRestartLockPathStale(mutexPath, Date.now())) {
|
||||
await unlink(mutexPath).catch(() => {});
|
||||
}
|
||||
} finally {
|
||||
await handle.close();
|
||||
await unlink(reclaimPath).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquire the registry mutex, BLOCKING (with brief back-offs) until held, and
|
||||
* return a token-gated release. This is the single point of mutual exclusion for
|
||||
* the restart lock: acquire, release, and stale/timeout takeover all run under it,
|
||||
* so "read the lock, then mutate it" is atomic — no acquirer, releaser, or breaker
|
||||
* can ever interleave with another. A mutex left by a crashed holder is reclaimed
|
||||
* once it ages past the stale threshold.
|
||||
*/
|
||||
async function acquireRestartMutex(
|
||||
mutexPath: string,
|
||||
token: string,
|
||||
): Promise<RestartGuard['release']> {
|
||||
for (;;) {
|
||||
let handle: Awaited<ReturnType<typeof open>>;
|
||||
try {
|
||||
handle = await open(mutexPath, 'wx');
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
|
||||
throw err;
|
||||
}
|
||||
// Staleness is judged by mtime, not content, so a mutex that exists but has
|
||||
// not yet had its token written (the open-before-write window) reads as live
|
||||
// and is never wrongly reaped.
|
||||
if (!(await isRestartLockPathStale(mutexPath, Date.now()))) {
|
||||
// A live holder has it — it will be gone in microseconds. Back off briefly.
|
||||
await new Promise((resolve) => setTimeout(resolve, RESTART_MUTEX_RETRY_MS));
|
||||
continue;
|
||||
}
|
||||
await reclaimStaleRestartMutex(mutexPath);
|
||||
continue;
|
||||
}
|
||||
// We created the mutex. Populate it with our token; if writing fails, clean up
|
||||
// our own file so we never leak an empty mutex that a peer would have to reap.
|
||||
try {
|
||||
await handle.writeFile(formatRestartLockContent(token));
|
||||
await handle.close();
|
||||
} catch (err) {
|
||||
await handle.close().catch(() => {});
|
||||
await unlink(mutexPath).catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
return async (): Promise<void> => {
|
||||
if ((await readRestartLockToken(mutexPath)) !== token) return;
|
||||
await unlink(mutexPath).catch(() => {});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquire the fleet restart lock, serializing concurrent `mosaic fleet restart`
|
||||
* invocations across processes. Each restart tears the tmux holder (and the
|
||||
* agent sessions inside it) down and back up; without this guard a re-entrant
|
||||
* restart relaunches agents against a half-torn-down holder, which fails and
|
||||
* tight-loops. A re-entrant caller waits for the in-flight restart to release
|
||||
* the lock (clean shutdown settled) before proceeding, breaks a stale lock left
|
||||
* by a crashed owner, and after RESTART_LOCK_MAX_WAIT_MS breaks the lock to
|
||||
* avoid a permanent deadlock.
|
||||
*
|
||||
* Correctness rests on a single invariant: EVERY transition of the lock — taking
|
||||
* a free lock, taking over a stale/timed-out one, and releasing — happens under
|
||||
* the registry mutex. Because the check ("is the lock free / stale / fresh?") and
|
||||
* the mutation that follows it both run while the mutex is held, they are atomic:
|
||||
* no other acquirer, releaser, or breaker can slip in between. That is what makes
|
||||
* takeover a true compare-and-swap rather than a content-blind clobber — a normal
|
||||
* `open('wx')` acquirer cannot create a fresh lock in a gap, and the original
|
||||
* owner's `release()` (also mutex-gated and token-checked) cannot drop a lock a
|
||||
* breaker already took over. So no interleaving lets two restarts both own the
|
||||
* lock and run concurrently.
|
||||
*/
|
||||
export async function acquireRestartLock(
|
||||
mosaicHome: string,
|
||||
sleepFn: SleepFn,
|
||||
): Promise<RestartGuard> {
|
||||
const token = randomUUID();
|
||||
const lockPath = restartLockPath(mosaicHome);
|
||||
const mutexPath = restartMutexPath(lockPath);
|
||||
await mkdir(dirname(lockPath), { recursive: true });
|
||||
const release = async (): Promise<void> => {
|
||||
// Mutex-gated and token-gated: only remove the lock if it is still ours. If
|
||||
// another caller took it over (after a stale/timeout break) the token no
|
||||
// longer matches and we leave their lock intact.
|
||||
const releaseMutex = await acquireRestartMutex(mutexPath, token);
|
||||
try {
|
||||
if ((await readRestartLockToken(lockPath)) === token) {
|
||||
await unlink(lockPath).catch(() => {});
|
||||
}
|
||||
} finally {
|
||||
await releaseMutex();
|
||||
}
|
||||
};
|
||||
const deadline = Date.now() + RESTART_LOCK_MAX_WAIT_MS;
|
||||
for (;;) {
|
||||
let owned = false;
|
||||
const releaseMutex = await acquireRestartMutex(mutexPath, token);
|
||||
try {
|
||||
// Read and (if appropriate) mutate the lock atomically under the mutex.
|
||||
let current: string | null = null;
|
||||
let absent = false;
|
||||
try {
|
||||
current = await readFile(lockPath, 'utf8');
|
||||
} catch (readErr) {
|
||||
if ((readErr as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
absent = true;
|
||||
} else {
|
||||
current = null; // Unreadable/corrupt: treat as stale.
|
||||
}
|
||||
}
|
||||
const now = Date.now();
|
||||
if (absent) {
|
||||
// Lock is free — take it.
|
||||
await writeFile(lockPath, formatRestartLockContent(token));
|
||||
owned = true;
|
||||
} else {
|
||||
const stale = current === null || isRestartLockContentStale(current, now);
|
||||
const timedOut = now >= deadline;
|
||||
if (stale || timedOut) {
|
||||
process.stderr.write(
|
||||
stale
|
||||
? 'Breaking stale fleet restart lock.\n'
|
||||
: `Timed out after ${RESTART_LOCK_MAX_WAIT_MS}ms waiting for the in-flight fleet ` +
|
||||
'restart; breaking the lock.\n',
|
||||
);
|
||||
// Takeover is just an overwrite — safe because we hold the mutex, so no
|
||||
// acquirer or releaser can touch the lock between our read and this write.
|
||||
await writeFile(lockPath, formatRestartLockContent(token));
|
||||
owned = true;
|
||||
}
|
||||
// else: a fresh restart owns it — wait below and re-evaluate.
|
||||
}
|
||||
} finally {
|
||||
await releaseMutex();
|
||||
}
|
||||
if (owned) {
|
||||
return { release };
|
||||
}
|
||||
await sleepFn(RESTART_LOCK_POLL_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the systemctl --user enable command for a given unit.
|
||||
* Used by the install auto-enable step to persist units across reboots.
|
||||
@@ -1172,6 +1472,7 @@ export function isSendAccepted(capturedOutput: string): SendVerifyResult {
|
||||
|
||||
export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
|
||||
const runner = deps.runner ?? runCommand;
|
||||
const sleepFn = deps.sleepFn ?? defaultSleep;
|
||||
const paths = resolveFleetPaths(deps.mosaicHome);
|
||||
const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
|
||||
|
||||
@@ -1285,9 +1586,22 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
||||
.command(`${action} [agent]`)
|
||||
.description(`${action} the fleet holder or one agent`)
|
||||
.action(async (agent?: string) => {
|
||||
const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
|
||||
const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
|
||||
const roster = await loadRosterForCommand(cmd);
|
||||
if (agent) {
|
||||
getRosterAgent(roster, agent);
|
||||
// Single-agent restart is guarded too: it can race a full restart that
|
||||
// is tearing the shared holder down.
|
||||
if (action === 'restart') {
|
||||
const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
|
||||
try {
|
||||
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
||||
} finally {
|
||||
await guard.release();
|
||||
}
|
||||
return;
|
||||
}
|
||||
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
||||
return;
|
||||
}
|
||||
@@ -1298,6 +1612,21 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (action === 'restart') {
|
||||
// Serialize the holder+agents teardown/relaunch behind the restart lock
|
||||
// so a re-entrant restart waits for clean shutdown before relaunching,
|
||||
// instead of racing a half-torn-down holder into a tight loop.
|
||||
const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
|
||||
try {
|
||||
await runChecked(runner, buildFleetServiceCommand(action));
|
||||
for (const rosterAgent of roster.agents) {
|
||||
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
||||
}
|
||||
} finally {
|
||||
await guard.release();
|
||||
}
|
||||
return;
|
||||
}
|
||||
await runChecked(runner, buildFleetServiceCommand(action));
|
||||
for (const rosterAgent of roster.agents) {
|
||||
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
||||
|
||||
Reference in New Issue
Block a user