Compare commits
1 Commits
e483d976e4
...
b38b9846c1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b38b9846c1 |
@@ -1,10 +1,8 @@
|
|||||||
import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest';
|
import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||||
import {
|
import {
|
||||||
createPgliteDb,
|
createPgliteDb,
|
||||||
insights,
|
|
||||||
missionTasks,
|
missionTasks,
|
||||||
missions,
|
missions,
|
||||||
preferences,
|
|
||||||
projects,
|
projects,
|
||||||
runPgliteMigrations,
|
runPgliteMigrations,
|
||||||
teams,
|
teams,
|
||||||
@@ -27,21 +25,13 @@ const TASK_FILTER: FederationScopeQueryFilter = {
|
|||||||
const SUBJECT_USER_ID = 'fed-m3-05-subject';
|
const SUBJECT_USER_ID = 'fed-m3-05-subject';
|
||||||
const OTHER_USER_ID = 'fed-m3-05-other';
|
const OTHER_USER_ID = 'fed-m3-05-other';
|
||||||
const TEAM_ID = '05000000-0000-4000-8000-000000000001';
|
const TEAM_ID = '05000000-0000-4000-8000-000000000001';
|
||||||
const UNAUTHORIZED_TEAM_ID = '05000000-0000-4000-8000-000000000002';
|
|
||||||
const PERSONAL_PROJECT_ID = '05000000-0000-4000-8000-000000000101';
|
const PERSONAL_PROJECT_ID = '05000000-0000-4000-8000-000000000101';
|
||||||
const TEAM_PROJECT_ID = '05000000-0000-4000-8000-000000000102';
|
const TEAM_PROJECT_ID = '05000000-0000-4000-8000-000000000102';
|
||||||
const UNAUTHORIZED_PROJECT_ID = '05000000-0000-4000-8000-000000000103';
|
|
||||||
const PERSONAL_MISSION_ID = '05000000-0000-4000-8000-000000000201';
|
const PERSONAL_MISSION_ID = '05000000-0000-4000-8000-000000000201';
|
||||||
const TEAM_MISSION_ID = '05000000-0000-4000-8000-000000000202';
|
const TEAM_MISSION_ID = '05000000-0000-4000-8000-000000000202';
|
||||||
const UNAUTHORIZED_MISSION_ID = '05000000-0000-4000-8000-000000000203';
|
|
||||||
const SUBJECT_TEAM_NOTE_ID = '05000000-0000-4000-8000-000000000301';
|
const SUBJECT_TEAM_NOTE_ID = '05000000-0000-4000-8000-000000000301';
|
||||||
const OTHER_TEAM_NOTE_ID = '05000000-0000-4000-8000-000000000302';
|
const OTHER_TEAM_NOTE_ID = '05000000-0000-4000-8000-000000000302';
|
||||||
const SUBJECT_PERSONAL_NOTE_ID = '05000000-0000-4000-8000-000000000303';
|
const SUBJECT_PERSONAL_NOTE_ID = '05000000-0000-4000-8000-000000000303';
|
||||||
const SUBJECT_UNAUTHORIZED_NOTE_ID = '05000000-0000-4000-8000-000000000304';
|
|
||||||
const INSIGHT_ONE_ID = '05000000-0000-4000-8000-000000000401';
|
|
||||||
const INSIGHT_TWO_ID = '05000000-0000-4000-8000-000000000402';
|
|
||||||
const PREFERENCE_ONE_ID = '05000000-0000-4000-8000-000000000501';
|
|
||||||
const PREFERENCE_TWO_ID = '05000000-0000-4000-8000-000000000502';
|
|
||||||
|
|
||||||
let dbHandle: DbHandle | undefined;
|
let dbHandle: DbHandle | undefined;
|
||||||
|
|
||||||
@@ -76,22 +66,13 @@ async function seedNotesFixture() {
|
|||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await dbHandle.db.insert(teams).values([
|
await dbHandle.db.insert(teams).values({
|
||||||
{
|
id: TEAM_ID,
|
||||||
id: TEAM_ID,
|
name: 'FED-M3-05 Team',
|
||||||
name: 'FED-M3-05 Team',
|
slug: 'fed-m3-05-team',
|
||||||
slug: 'fed-m3-05-team',
|
ownerId: SUBJECT_USER_ID,
|
||||||
ownerId: SUBJECT_USER_ID,
|
managerId: SUBJECT_USER_ID,
|
||||||
managerId: SUBJECT_USER_ID,
|
});
|
||||||
},
|
|
||||||
{
|
|
||||||
id: UNAUTHORIZED_TEAM_ID,
|
|
||||||
name: 'FED-M3-05 Unauthorized Team',
|
|
||||||
slug: 'fed-m3-05-unauthorized-team',
|
|
||||||
ownerId: OTHER_USER_ID,
|
|
||||||
managerId: OTHER_USER_ID,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
|
|
||||||
await dbHandle.db.insert(projects).values([
|
await dbHandle.db.insert(projects).values([
|
||||||
{
|
{
|
||||||
@@ -106,12 +87,6 @@ async function seedNotesFixture() {
|
|||||||
teamId: TEAM_ID,
|
teamId: TEAM_ID,
|
||||||
ownerType: 'team',
|
ownerType: 'team',
|
||||||
},
|
},
|
||||||
{
|
|
||||||
id: UNAUTHORIZED_PROJECT_ID,
|
|
||||||
name: 'FED-M3-05 Unauthorized Project',
|
|
||||||
teamId: UNAUTHORIZED_TEAM_ID,
|
|
||||||
ownerType: 'team',
|
|
||||||
},
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await dbHandle.db.insert(missions).values([
|
await dbHandle.db.insert(missions).values([
|
||||||
@@ -127,12 +102,6 @@ async function seedNotesFixture() {
|
|||||||
projectId: TEAM_PROJECT_ID,
|
projectId: TEAM_PROJECT_ID,
|
||||||
userId: SUBJECT_USER_ID,
|
userId: SUBJECT_USER_ID,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
id: UNAUTHORIZED_MISSION_ID,
|
|
||||||
name: 'FED-M3-05 Unauthorized Mission',
|
|
||||||
projectId: UNAUTHORIZED_PROJECT_ID,
|
|
||||||
userId: SUBJECT_USER_ID,
|
|
||||||
},
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await dbHandle.db.insert(missionTasks).values([
|
await dbHandle.db.insert(missionTasks).values([
|
||||||
@@ -160,53 +129,6 @@ async function seedNotesFixture() {
|
|||||||
createdAt: new Date('2026-06-24T01:00:00.000Z'),
|
createdAt: new Date('2026-06-24T01:00:00.000Z'),
|
||||||
updatedAt: new Date('2026-06-24T01:00:00.000Z'),
|
updatedAt: new Date('2026-06-24T01:00:00.000Z'),
|
||||||
},
|
},
|
||||||
{
|
|
||||||
id: SUBJECT_UNAUTHORIZED_NOTE_ID,
|
|
||||||
missionId: UNAUTHORIZED_MISSION_ID,
|
|
||||||
userId: SUBJECT_USER_ID,
|
|
||||||
notes: 'subject note outside grant-visible missions',
|
|
||||||
createdAt: new Date('2026-06-24T04:00:00.000Z'),
|
|
||||||
updatedAt: new Date('2026-06-24T04:00:00.000Z'),
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
|
|
||||||
const memoryCreatedAt = new Date('2026-06-24T05:00:00.000Z');
|
|
||||||
await dbHandle.db.insert(insights).values([
|
|
||||||
{
|
|
||||||
id: INSIGHT_ONE_ID,
|
|
||||||
userId: SUBJECT_USER_ID,
|
|
||||||
content: 'first insight',
|
|
||||||
source: 'agent',
|
|
||||||
createdAt: memoryCreatedAt,
|
|
||||||
updatedAt: memoryCreatedAt,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
id: INSIGHT_TWO_ID,
|
|
||||||
userId: SUBJECT_USER_ID,
|
|
||||||
content: 'second insight',
|
|
||||||
source: 'agent',
|
|
||||||
createdAt: memoryCreatedAt,
|
|
||||||
updatedAt: memoryCreatedAt,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
|
|
||||||
await dbHandle.db.insert(preferences).values([
|
|
||||||
{
|
|
||||||
id: PREFERENCE_ONE_ID,
|
|
||||||
userId: SUBJECT_USER_ID,
|
|
||||||
key: 'fed-m3-05-pref-1',
|
|
||||||
value: { enabled: true },
|
|
||||||
createdAt: memoryCreatedAt,
|
|
||||||
updatedAt: memoryCreatedAt,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
id: PREFERENCE_TWO_ID,
|
|
||||||
userId: SUBJECT_USER_ID,
|
|
||||||
key: 'fed-m3-05-pref-2',
|
|
||||||
value: { enabled: false },
|
|
||||||
createdAt: memoryCreatedAt,
|
|
||||||
updatedAt: memoryCreatedAt,
|
|
||||||
},
|
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -321,31 +243,6 @@ describe('FederationListQueryService', () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('throws when a truncated page cannot encode a resumable cursor', async () => {
|
|
||||||
const service = makeService();
|
|
||||||
stubRows(service, [
|
|
||||||
{ id: '2', createdAt: 'not-a-date' },
|
|
||||||
{ id: '1', createdAt: 'not-a-date' },
|
|
||||||
]);
|
|
||||||
|
|
||||||
await expect(service.list({ filter: { ...TASK_FILTER, limit: 1 } })).rejects.toThrow(
|
|
||||||
'Federation list cursor cannot be encoded',
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('throws on unsupported resources instead of crashing pagination', async () => {
|
|
||||||
const service = makeService();
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
service.list({
|
|
||||||
filter: {
|
|
||||||
...TASK_FILTER,
|
|
||||||
resource: 'unknown-resource' as FederationScopeQueryFilter['resource'],
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
).rejects.toThrow('Unsupported federation list resource');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('does not leak another user mission task notes through team-scoped note reads', async () => {
|
it('does not leak another user mission task notes through team-scoped note reads', async () => {
|
||||||
const service = makeDbService();
|
const service = makeDbService();
|
||||||
|
|
||||||
@@ -381,48 +278,4 @@ describe('FederationListQueryService', () => {
|
|||||||
|
|
||||||
expect(result.items.map((item) => item['id'])).not.toContain(SUBJECT_PERSONAL_NOTE_ID);
|
expect(result.items.map((item) => item['id'])).not.toContain(SUBJECT_PERSONAL_NOTE_ID);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does not return subject notes from missions outside the grant-visible project set', async () => {
|
|
||||||
const service = makeDbService();
|
|
||||||
|
|
||||||
const result = await service.list({
|
|
||||||
filter: {
|
|
||||||
resource: 'notes',
|
|
||||||
subjectUserId: SUBJECT_USER_ID,
|
|
||||||
includePersonal: true,
|
|
||||||
teamIds: [TEAM_ID],
|
|
||||||
limit: 10,
|
|
||||||
maxRowsPerQuery: 10,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const ids = result.items.map((item) => item['id']);
|
|
||||||
expect(ids).toContain(SUBJECT_PERSONAL_NOTE_ID);
|
|
||||||
expect(ids).toContain(SUBJECT_TEAM_NOTE_ID);
|
|
||||||
expect(ids).not.toContain(SUBJECT_UNAUTHORIZED_NOTE_ID);
|
|
||||||
expect(ids).not.toContain(OTHER_TEAM_NOTE_ID);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('paginates memory deterministically across insights and preferences', async () => {
|
|
||||||
const service = makeDbService();
|
|
||||||
const filter: FederationScopeQueryFilter = {
|
|
||||||
resource: 'memory',
|
|
||||||
subjectUserId: SUBJECT_USER_ID,
|
|
||||||
includePersonal: true,
|
|
||||||
teamIds: [],
|
|
||||||
limit: 2,
|
|
||||||
maxRowsPerQuery: 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
const firstPage = await service.list({ filter });
|
|
||||||
const secondPage = await service.list({ filter, cursor: firstPage.nextCursor });
|
|
||||||
const firstPageIds = firstPage.items.map((item) => item['id']);
|
|
||||||
const secondPageIds = secondPage.items.map((item) => item['id']);
|
|
||||||
const allIds = [...firstPageIds, ...secondPageIds];
|
|
||||||
|
|
||||||
expect(firstPage).toMatchObject({ truncated: true, nextCursor: expect.any(String) });
|
|
||||||
expect(firstPageIds).toEqual([INSIGHT_TWO_ID, INSIGHT_ONE_ID]);
|
|
||||||
expect(secondPageIds).toEqual([PREFERENCE_TWO_ID, PREFERENCE_ONE_ID]);
|
|
||||||
expect(new Set(allIds).size).toBe(allIds.length);
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -120,24 +120,6 @@ describe('ListController', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns a federation error envelope when auth guard context is missing', async () => {
|
|
||||||
const { controller, scope, query } = makeController();
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
controller.list('tasks', {} as unknown as FastifyRequest, {}),
|
|
||||||
).rejects.toMatchObject({
|
|
||||||
response: {
|
|
||||||
error: {
|
|
||||||
code: 'unauthorized',
|
|
||||||
message: 'Federation context missing',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
status: 401,
|
|
||||||
});
|
|
||||||
expect(scope.evaluateAccess).not.toHaveBeenCalled();
|
|
||||||
expect(query.list).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns a federation error envelope when scope evaluation denies access', async () => {
|
it('returns a federation error envelope when scope evaluation denies access', async () => {
|
||||||
const { controller, query } = makeController({
|
const { controller, query } = makeController({
|
||||||
scopeResult: {
|
scopeResult: {
|
||||||
|
|||||||
@@ -44,29 +44,23 @@ export interface FederationListQueryResult<T extends object = Record<string, unk
|
|||||||
readonly truncated: boolean;
|
readonly truncated: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
type CursorSource = 'insights' | 'preferences';
|
type RowObject = Record<string, unknown>;
|
||||||
const CURSOR_SOURCE = Symbol('federationCursorSource');
|
|
||||||
|
|
||||||
type RowObject = Record<string, unknown> & { readonly [CURSOR_SOURCE]?: CursorSource };
|
|
||||||
|
|
||||||
interface KeysetCursor {
|
interface KeysetCursor {
|
||||||
readonly createdAt: Date;
|
readonly createdAt: Date;
|
||||||
readonly id: string;
|
readonly id: string;
|
||||||
readonly source?: CursorSource;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function encodeCursor(row: RowObject): string {
|
function encodeCursor(row: RowObject): string | undefined {
|
||||||
const createdAt = row['createdAt'];
|
const createdAt = row['createdAt'];
|
||||||
const id = row['id'];
|
const id = row['id'];
|
||||||
if (!(createdAt instanceof Date) || typeof id !== 'string') {
|
if (!(createdAt instanceof Date) || typeof id !== 'string') {
|
||||||
throw new Error('Federation list cursor cannot be encoded');
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
const source = row[CURSOR_SOURCE];
|
return Buffer.from(JSON.stringify({ createdAt: createdAt.toISOString(), id }), 'utf8').toString(
|
||||||
return Buffer.from(
|
'base64url',
|
||||||
JSON.stringify({ createdAt: createdAt.toISOString(), id, ...(source ? { source } : {}) }),
|
);
|
||||||
'utf8',
|
|
||||||
).toString('base64url');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function decodeCursor(cursor: string | undefined): KeysetCursor | undefined {
|
function decodeCursor(cursor: string | undefined): KeysetCursor | undefined {
|
||||||
@@ -80,24 +74,17 @@ function decodeCursor(cursor: string | undefined): KeysetCursor | undefined {
|
|||||||
throw new Error('cursor must be an object');
|
throw new Error('cursor must be an object');
|
||||||
}
|
}
|
||||||
|
|
||||||
const { createdAt, id, source } = parsed as {
|
const { createdAt, id } = parsed as { createdAt?: unknown; id?: unknown };
|
||||||
createdAt?: unknown;
|
|
||||||
id?: unknown;
|
|
||||||
source?: unknown;
|
|
||||||
};
|
|
||||||
if (typeof createdAt !== 'string' || typeof id !== 'string' || id.length === 0) {
|
if (typeof createdAt !== 'string' || typeof id !== 'string' || id.length === 0) {
|
||||||
throw new Error('cursor is missing createdAt or id');
|
throw new Error('cursor is missing createdAt or id');
|
||||||
}
|
}
|
||||||
if (source !== undefined && source !== 'insights' && source !== 'preferences') {
|
|
||||||
throw new Error('cursor source is invalid');
|
|
||||||
}
|
|
||||||
|
|
||||||
const date = new Date(createdAt);
|
const date = new Date(createdAt);
|
||||||
if (Number.isNaN(date.getTime())) {
|
if (Number.isNaN(date.getTime())) {
|
||||||
throw new Error('cursor createdAt is invalid');
|
throw new Error('cursor createdAt is invalid');
|
||||||
}
|
}
|
||||||
|
|
||||||
return { createdAt: date, id, ...(source ? { source } : {}) };
|
return { createdAt: date, id };
|
||||||
} catch {
|
} catch {
|
||||||
throw new Error('Invalid federation list cursor');
|
throw new Error('Invalid federation list cursor');
|
||||||
}
|
}
|
||||||
@@ -115,15 +102,6 @@ function paginate<T extends RowObject>(rows: T[], limit: number): FederationList
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function markCursorSource<T extends RowObject>(row: T, source: CursorSource): T {
|
|
||||||
Object.defineProperty(row, CURSOR_SOURCE, {
|
|
||||||
value: source,
|
|
||||||
enumerable: false,
|
|
||||||
configurable: false,
|
|
||||||
});
|
|
||||||
return row;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sortRows(rows: RowObject[]): RowObject[] {
|
function sortRows(rows: RowObject[]): RowObject[] {
|
||||||
return [...rows].sort((a, b) => {
|
return [...rows].sort((a, b) => {
|
||||||
const aTime = a['createdAt'] instanceof Date ? a['createdAt'].getTime() : 0;
|
const aTime = a['createdAt'] instanceof Date ? a['createdAt'].getTime() : 0;
|
||||||
@@ -181,8 +159,6 @@ export class FederationListQueryService implements FederationNativeRbacEvaluator
|
|||||||
case 'credentials':
|
case 'credentials':
|
||||||
case 'api_keys':
|
case 'api_keys':
|
||||||
return [];
|
return [];
|
||||||
default:
|
|
||||||
throw new Error(`Unsupported federation list resource: ${String(filter.resource)}`);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -335,25 +311,22 @@ export class FederationListQueryService implements FederationNativeRbacEvaluator
|
|||||||
if (!filter.includePersonal) {
|
if (!filter.includePersonal) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
if (cursor && cursor.source === undefined) {
|
|
||||||
throw new Error('Invalid federation list cursor');
|
|
||||||
}
|
|
||||||
|
|
||||||
const rows: RowObject[] = [];
|
const insightCursorClause = cursor
|
||||||
|
? or(
|
||||||
|
lt(insights.createdAt, cursor.createdAt),
|
||||||
|
and(eq(insights.createdAt, cursor.createdAt), lt(insights.id, cursor.id)),
|
||||||
|
)
|
||||||
|
: undefined;
|
||||||
|
const preferenceCursorClause = cursor
|
||||||
|
? or(
|
||||||
|
lt(preferences.createdAt, cursor.createdAt),
|
||||||
|
and(eq(preferences.createdAt, cursor.createdAt), lt(preferences.id, cursor.id)),
|
||||||
|
)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
// Memory spans two physical tables. To keep pagination deterministic and
|
const [insightRows, preferenceRows] = await Promise.all([
|
||||||
// resumable without a SQL UNION, M3 emits a fixed block order: all insights
|
this.db
|
||||||
// first, then preferences. The opaque cursor records which table produced
|
|
||||||
// the boundary row, so the next page never re-applies one table's keyset to
|
|
||||||
// the other table (which could duplicate/skip rows at equal timestamps).
|
|
||||||
if (cursor?.source !== 'preferences') {
|
|
||||||
const insightCursorClause = cursor
|
|
||||||
? or(
|
|
||||||
lt(insights.createdAt, cursor.createdAt),
|
|
||||||
and(eq(insights.createdAt, cursor.createdAt), lt(insights.id, cursor.id)),
|
|
||||||
)
|
|
||||||
: undefined;
|
|
||||||
const insightRows = await this.db
|
|
||||||
.select({
|
.select({
|
||||||
id: insights.id,
|
id: insights.id,
|
||||||
kind: insights.source,
|
kind: insights.source,
|
||||||
@@ -367,42 +340,24 @@ export class FederationListQueryService implements FederationNativeRbacEvaluator
|
|||||||
.from(insights)
|
.from(insights)
|
||||||
.where(and(eq(insights.userId, filter.subjectUserId), insightCursorClause))
|
.where(and(eq(insights.userId, filter.subjectUserId), insightCursorClause))
|
||||||
.orderBy(desc(insights.createdAt), desc(insights.id))
|
.orderBy(desc(insights.createdAt), desc(insights.id))
|
||||||
.limit(rowLimit);
|
.limit(rowLimit),
|
||||||
|
this.db
|
||||||
|
.select({
|
||||||
|
id: preferences.id,
|
||||||
|
kind: preferences.category,
|
||||||
|
key: preferences.key,
|
||||||
|
value: preferences.value,
|
||||||
|
source: preferences.source,
|
||||||
|
mutable: preferences.mutable,
|
||||||
|
createdAt: preferences.createdAt,
|
||||||
|
updatedAt: preferences.updatedAt,
|
||||||
|
})
|
||||||
|
.from(preferences)
|
||||||
|
.where(and(eq(preferences.userId, filter.subjectUserId), preferenceCursorClause))
|
||||||
|
.orderBy(desc(preferences.createdAt), desc(preferences.id))
|
||||||
|
.limit(rowLimit),
|
||||||
|
]);
|
||||||
|
|
||||||
rows.push(...(insightRows as RowObject[]).map((row) => markCursorSource(row, 'insights')));
|
return sortRows([...(insightRows as RowObject[]), ...(preferenceRows as RowObject[])]);
|
||||||
}
|
|
||||||
|
|
||||||
const remaining = rowLimit - rows.length;
|
|
||||||
if (remaining <= 0) {
|
|
||||||
return rows;
|
|
||||||
}
|
|
||||||
|
|
||||||
const preferenceCursorClause =
|
|
||||||
cursor?.source === 'preferences'
|
|
||||||
? or(
|
|
||||||
lt(preferences.createdAt, cursor.createdAt),
|
|
||||||
and(eq(preferences.createdAt, cursor.createdAt), lt(preferences.id, cursor.id)),
|
|
||||||
)
|
|
||||||
: undefined;
|
|
||||||
const preferenceRows = await this.db
|
|
||||||
.select({
|
|
||||||
id: preferences.id,
|
|
||||||
kind: preferences.category,
|
|
||||||
key: preferences.key,
|
|
||||||
value: preferences.value,
|
|
||||||
source: preferences.source,
|
|
||||||
mutable: preferences.mutable,
|
|
||||||
createdAt: preferences.createdAt,
|
|
||||||
updatedAt: preferences.updatedAt,
|
|
||||||
})
|
|
||||||
.from(preferences)
|
|
||||||
.where(and(eq(preferences.userId, filter.subjectUserId), preferenceCursorClause))
|
|
||||||
.orderBy(desc(preferences.createdAt), desc(preferences.id))
|
|
||||||
.limit(remaining);
|
|
||||||
|
|
||||||
rows.push(
|
|
||||||
...(preferenceRows as RowObject[]).map((row) => markCursorSource(row, 'preferences')),
|
|
||||||
);
|
|
||||||
return rows;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ import type { FastifyRequest } from 'fastify';
|
|||||||
import {
|
import {
|
||||||
FederationInvalidRequestError,
|
FederationInvalidRequestError,
|
||||||
FederationScopeViolationError,
|
FederationScopeViolationError,
|
||||||
FederationUnauthorizedError,
|
|
||||||
SOURCE_LOCAL,
|
SOURCE_LOCAL,
|
||||||
tagWithSource,
|
tagWithSource,
|
||||||
type FederationListResponse,
|
type FederationListResponse,
|
||||||
@@ -94,10 +93,7 @@ export class ListController {
|
|||||||
@Body() body?: FederationListRequestBody,
|
@Body() body?: FederationListRequestBody,
|
||||||
): Promise<FederationListResponse<FederatedRow>> {
|
): Promise<FederationListResponse<FederatedRow>> {
|
||||||
if (!request.federationContext) {
|
if (!request.federationContext) {
|
||||||
throw new HttpException(
|
throw new Error('Federation context missing after auth guard');
|
||||||
new FederationUnauthorizedError('Federation context missing').toEnvelope(),
|
|
||||||
401,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const requestedLimit = parseLimit(body);
|
const requestedLimit = parseLimit(body);
|
||||||
|
|||||||
@@ -28,12 +28,10 @@ Implement `POST /api/federation/v1/list/:resource`.
|
|||||||
- `credentials` / `api_keys`: denied by native RBAC in M3 even if present in scope; sensitive-resource implementation is not part of FED-M3-05.
|
- `credentials` / `api_keys`: denied by native RBAC in M3 even if present in scope; sensitive-resource implementation is not part of FED-M3-05.
|
||||||
- Cursor pagination uses an opaque base64url keyset cursor over `(createdAt, id)`; DB reads fetch at most `limit + 1` rows per resource query.
|
- Cursor pagination uses an opaque base64url keyset cursor over `(createdAt, id)`; DB reads fetch at most `limit + 1` rows per resource query.
|
||||||
- Reviewer isolation fix: `mission_tasks.notes` rows are always constrained by `missionTasks.userId = subjectUserId` and accessible mission IDs; team scope narrows missions but never widens to other users' mission task notes.
|
- Reviewer isolation fix: `mission_tasks.notes` rows are always constrained by `missionTasks.userId = subjectUserId` and accessible mission IDs; team scope narrows missions but never widens to other users' mission task notes.
|
||||||
- Follow-up review fix: memory listing now uses deterministic table-block pagination (`insights` first, then `preferences`) with cursor source metadata, so one table's cursor is never applied to the other.
|
|
||||||
- Follow-up hardening: missing auth-guard context returns a structured federation `unauthorized` envelope; unsupported resources and non-encodable truncated cursors throw instead of silently crashing/truncating.
|
|
||||||
|
|
||||||
## Tests
|
## Tests
|
||||||
|
|
||||||
- `pnpm --filter @mosaicstack/gateway test -- list.controller.spec.ts list-query.service.spec.ts` — PASS (16 tests, including PGlite regression coverage for team-scoped notes isolation, unauthorized mission notes exclusion, `includePersonal: false`, deterministic memory pagination, missing context envelope, unsupported resource, and cursor encode failure).
|
- `pnpm --filter @mosaicstack/gateway test -- list.controller.spec.ts list-query.service.spec.ts` — PASS (11 tests, including PGlite regression coverage for team-scoped notes isolation and `includePersonal: false`).
|
||||||
- `pnpm --filter @mosaicstack/gateway typecheck` — PASS.
|
- `pnpm --filter @mosaicstack/gateway typecheck` — PASS.
|
||||||
- `pnpm --filter @mosaicstack/gateway lint` — PASS.
|
- `pnpm --filter @mosaicstack/gateway lint` — PASS.
|
||||||
- `pnpm format:check` — PASS.
|
- `pnpm format:check` — PASS.
|
||||||
@@ -43,8 +41,8 @@ Implement `POST /api/federation/v1/list/:resource`.
|
|||||||
|
|
||||||
## Review evidence
|
## Review evidence
|
||||||
|
|
||||||
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — PASS after follow-up remediation; approve, no findings.
|
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — PASS after remediation; approve, no findings.
|
||||||
- `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — PASS after follow-up remediation; risk level none, no findings.
|
- `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — PASS after cursor + notes isolation remediation; risk level none, no findings.
|
||||||
- Security-review note: read-path audit logging remains intentionally deferred to M4 per orchestrator clarification and FED-M3-05 scope.
|
- Security-review note: read-path audit logging remains intentionally deferred to M4 per orchestrator clarification and FED-M3-05 scope.
|
||||||
|
|
||||||
## Risks / follow-up
|
## Risks / follow-up
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import { dirname, join, resolve } from 'node:path';
|
|||||||
import { Command } from 'commander';
|
import { Command } from 'commander';
|
||||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||||
import {
|
import {
|
||||||
acquireRestartLock,
|
|
||||||
addAgentToRoster,
|
addAgentToRoster,
|
||||||
buildAgentSendCommand,
|
buildAgentSendCommand,
|
||||||
buildAgentWatchAttachCommand,
|
buildAgentWatchAttachCommand,
|
||||||
@@ -46,8 +45,6 @@ import {
|
|||||||
removeAgentFromRoster,
|
removeAgentFromRoster,
|
||||||
resolveFleetPaths,
|
resolveFleetPaths,
|
||||||
resolvePresetFilename,
|
resolvePresetFilename,
|
||||||
restartLockPath,
|
|
||||||
RESTART_LOCK_STALE_MS,
|
|
||||||
RUNTIME_ACCEPTABLE_COMMANDS,
|
RUNTIME_ACCEPTABLE_COMMANDS,
|
||||||
serializeRosterToYaml,
|
serializeRosterToYaml,
|
||||||
VERIFY_DEFAULT_TIMEOUT_MS,
|
VERIFY_DEFAULT_TIMEOUT_MS,
|
||||||
@@ -681,364 +678,6 @@ describe('fleet command construction', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('waits for an in-flight restart to clear before relaunching (re-entry guard)', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
|
||||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
|
||||||
await writeFile(
|
|
||||||
rosterPath,
|
|
||||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
|
||||||
'\n',
|
|
||||||
),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Simulate another `mosaic fleet restart` process mid-teardown: a fresh lock
|
|
||||||
// (recent timestamp, so it is NOT treated as stale) already held.
|
|
||||||
const lockPath = restartLockPath(home);
|
|
||||||
await mkdir(dirname(lockPath), { recursive: true });
|
|
||||||
await writeFile(lockPath, `4242\n${Date.now()}\n`);
|
|
||||||
|
|
||||||
const events: string[] = [];
|
|
||||||
const runner: CommandRunner = async (command, args) => {
|
|
||||||
events.push(`run:${args[args.length - 1]}`);
|
|
||||||
return { stdout: '', stderr: '', exitCode: 0 };
|
|
||||||
};
|
|
||||||
// The injected sleep stands in for time passing while we wait; the in-flight
|
|
||||||
// restart "finishes" (releases its lock) after the first poll.
|
|
||||||
let sleeps = 0;
|
|
||||||
const sleepFn: SleepFn = async () => {
|
|
||||||
sleeps += 1;
|
|
||||||
events.push(`sleep:${sleeps}`);
|
|
||||||
await rm(lockPath, { force: true });
|
|
||||||
};
|
|
||||||
|
|
||||||
const program = new Command();
|
|
||||||
program.exitOverride();
|
|
||||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
|
||||||
|
|
||||||
try {
|
|
||||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
|
||||||
|
|
||||||
// It must have waited at least once before issuing any systemctl restart.
|
|
||||||
expect(sleeps).toBeGreaterThan(0);
|
|
||||||
const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
|
|
||||||
const firstRun = events.findIndex((e) => e.startsWith('run:'));
|
|
||||||
expect(firstSleep).toBeGreaterThanOrEqual(0);
|
|
||||||
expect(firstRun).toBeGreaterThan(firstSleep);
|
|
||||||
|
|
||||||
// And it still performs the full restart once the lock clears.
|
|
||||||
expect(events).toContain('run:mosaic-tmux-holder.service');
|
|
||||||
expect(events).toContain('run:mosaic-agent@coder0.service');
|
|
||||||
|
|
||||||
// The lock is released after the restart completes.
|
|
||||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
|
||||||
} finally {
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('breaks a stale restart lock and proceeds without waiting', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
|
||||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
|
||||||
await writeFile(
|
|
||||||
rosterPath,
|
|
||||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
|
||||||
'\n',
|
|
||||||
),
|
|
||||||
);
|
|
||||||
|
|
||||||
// A lock left behind by a crashed owner: timestamp older than the stale window.
|
|
||||||
const lockPath = restartLockPath(home);
|
|
||||||
await mkdir(dirname(lockPath), { recursive: true });
|
|
||||||
await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n`);
|
|
||||||
|
|
||||||
const calls: string[][] = [];
|
|
||||||
const runner: CommandRunner = async (command, args) => {
|
|
||||||
calls.push([command, ...args]);
|
|
||||||
return { stdout: '', stderr: '', exitCode: 0 };
|
|
||||||
};
|
|
||||||
const sleepFn = vi.fn<SleepFn>(async () => {});
|
|
||||||
|
|
||||||
const program = new Command();
|
|
||||||
program.exitOverride();
|
|
||||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
|
||||||
|
|
||||||
try {
|
|
||||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
|
||||||
|
|
||||||
// Stale lock is broken immediately — no waiting.
|
|
||||||
expect(sleepFn).not.toHaveBeenCalled();
|
|
||||||
expect(calls).toEqual([
|
|
||||||
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
|
||||||
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
|
||||||
]);
|
|
||||||
// The stale lock is gone once the restart completes.
|
|
||||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
|
||||||
} finally {
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('releases the restart lock so a subsequent restart is not blocked', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
|
||||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
|
||||||
await writeFile(
|
|
||||||
rosterPath,
|
|
||||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
|
||||||
'\n',
|
|
||||||
),
|
|
||||||
);
|
|
||||||
|
|
||||||
const calls: string[][] = [];
|
|
||||||
const runner: CommandRunner = async (command, args) => {
|
|
||||||
calls.push([command, ...args]);
|
|
||||||
return { stdout: '', stderr: '', exitCode: 0 };
|
|
||||||
};
|
|
||||||
const sleepFn = vi.fn<SleepFn>(async () => {});
|
|
||||||
|
|
||||||
const program = new Command();
|
|
||||||
program.exitOverride();
|
|
||||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
|
||||||
|
|
||||||
try {
|
|
||||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
|
||||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
|
||||||
|
|
||||||
// Two sequential restarts both run fully and neither has to wait.
|
|
||||||
expect(sleepFn).not.toHaveBeenCalled();
|
|
||||||
expect(calls).toEqual([
|
|
||||||
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
|
||||||
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
|
||||||
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
|
||||||
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
|
||||||
]);
|
|
||||||
} finally {
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('guards the single-agent restart path behind the in-flight restart lock', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
|
||||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
|
||||||
await writeFile(
|
|
||||||
rosterPath,
|
|
||||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
|
||||||
'\n',
|
|
||||||
),
|
|
||||||
);
|
|
||||||
|
|
||||||
// A full restart is mid-flight (lock held); a single-agent restart re-enters.
|
|
||||||
const lockPath = restartLockPath(home);
|
|
||||||
await mkdir(dirname(lockPath), { recursive: true });
|
|
||||||
await writeFile(lockPath, `4242\n${Date.now()}\n`);
|
|
||||||
|
|
||||||
const events: string[] = [];
|
|
||||||
const runner: CommandRunner = async (command, args) => {
|
|
||||||
events.push(`run:${args[args.length - 1]}`);
|
|
||||||
return { stdout: '', stderr: '', exitCode: 0 };
|
|
||||||
};
|
|
||||||
let sleeps = 0;
|
|
||||||
const sleepFn: SleepFn = async () => {
|
|
||||||
sleeps += 1;
|
|
||||||
events.push(`sleep:${sleeps}`);
|
|
||||||
await rm(lockPath, { force: true });
|
|
||||||
};
|
|
||||||
|
|
||||||
const program = new Command();
|
|
||||||
program.exitOverride();
|
|
||||||
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
|
||||||
|
|
||||||
try {
|
|
||||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart', 'coder0']);
|
|
||||||
|
|
||||||
// The single-agent restart waits for the in-flight restart before acting.
|
|
||||||
expect(sleeps).toBeGreaterThan(0);
|
|
||||||
const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
|
|
||||||
const firstRun = events.findIndex((e) => e.startsWith('run:'));
|
|
||||||
expect(firstSleep).toBeGreaterThanOrEqual(0);
|
|
||||||
expect(firstRun).toBeGreaterThan(firstSleep);
|
|
||||||
// Only the named agent is restarted; the holder is untouched.
|
|
||||||
expect(events).toContain('run:mosaic-agent@coder0.service');
|
|
||||||
expect(events).not.toContain('run:mosaic-tmux-holder.service');
|
|
||||||
} finally {
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('does not let a timed-out owner drop a lock another restart broke and re-owned', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const runDir = join(home, 'fleet', 'run');
|
|
||||||
await mkdir(runDir, { recursive: true });
|
|
||||||
const lockPath = restartLockPath(home);
|
|
||||||
const tokenOf = async (): Promise<string> => {
|
|
||||||
const raw = await readFile(lockPath, 'utf8');
|
|
||||||
return raw.split('\n')[2]?.trim() ?? '';
|
|
||||||
};
|
|
||||||
const sleepFn = vi.fn<SleepFn>(async () => {});
|
|
||||||
|
|
||||||
// R1 acquires the lock and begins a restart that then hangs.
|
|
||||||
const r1 = await acquireRestartLock(home, sleepFn);
|
|
||||||
const tokenR1 = await tokenOf();
|
|
||||||
expect(tokenR1).not.toBe('');
|
|
||||||
|
|
||||||
// The hung R1 leaves a stale lock: rewrite its timestamp into the past while
|
|
||||||
// preserving R1's token — exactly the on-disk state a stuck owner leaves.
|
|
||||||
await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n${tokenR1}\n`);
|
|
||||||
|
|
||||||
// R2 re-enters, sees the stale lock, and atomically takes ownership.
|
|
||||||
const r2 = await acquireRestartLock(home, sleepFn);
|
|
||||||
const tokenR2 = await tokenOf();
|
|
||||||
expect(tokenR2).not.toBe(tokenR1);
|
|
||||||
expect(sleepFn).not.toHaveBeenCalled();
|
|
||||||
|
|
||||||
// R1 finally finishes and releases. It must NOT delete R2's lock — otherwise
|
|
||||||
// a third restart (R3) could acquire and interleave with R2 still running.
|
|
||||||
await r1.release();
|
|
||||||
expect(await tokenOf()).toBe(tokenR2);
|
|
||||||
|
|
||||||
// R2 releases cleanly and the lock is gone.
|
|
||||||
await r2.release();
|
|
||||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
|
||||||
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
it('lets only one of several concurrent breakers proceed past a stale lock', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const lockPath = restartLockPath(home);
|
|
||||||
await mkdir(dirname(lockPath), { recursive: true });
|
|
||||||
|
|
||||||
// A stale lock left by a crashed owner: every concurrent re-entrant restart
|
|
||||||
// will judge it stale and try to break it at the same instant. Breaking must
|
|
||||||
// NOT grant ownership — only the atomic re-create may — so exactly one
|
|
||||||
// contender can ever hold the lock at a time. (The v2 fix wrote our own token
|
|
||||||
// during the break and read it back, so two breakers each saw their own token
|
|
||||||
// and BOTH proceeded; this guards that regression.)
|
|
||||||
await writeFile(
|
|
||||||
lockPath,
|
|
||||||
`4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Yielding sleep so a waiting contender lets the current owner finish and
|
|
||||||
// release before it re-contends, instead of spinning the microtask queue.
|
|
||||||
const sleepFn: SleepFn = async () => {
|
|
||||||
await new Promise((res) => setTimeout(res, 0));
|
|
||||||
};
|
|
||||||
|
|
||||||
let active = 0;
|
|
||||||
let maxActive = 0;
|
|
||||||
const tokens: string[] = [];
|
|
||||||
const tokenOf = async (): Promise<string> => {
|
|
||||||
const raw = await readFile(lockPath, 'utf8');
|
|
||||||
return raw.split('\n')[2]?.trim() ?? '';
|
|
||||||
};
|
|
||||||
|
|
||||||
// One "restart" = acquire the lock, do work in the critical section, release.
|
|
||||||
const restartOnce = async (): Promise<void> => {
|
|
||||||
const guard = await acquireRestartLock(home, sleepFn);
|
|
||||||
active += 1;
|
|
||||||
maxActive = Math.max(maxActive, active);
|
|
||||||
// Record the token we own while we hold it, then yield to interleave with
|
|
||||||
// any other contender that might (wrongly) believe it owns the lock too.
|
|
||||||
tokens.push(await tokenOf());
|
|
||||||
await new Promise((res) => setTimeout(res, 0));
|
|
||||||
active -= 1;
|
|
||||||
await guard.release();
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Three breakers race the single stale lock simultaneously.
|
|
||||||
await Promise.all([restartOnce(), restartOnce(), restartOnce()]);
|
|
||||||
|
|
||||||
// Mutual exclusion held: never two owners at once despite concurrent breaks.
|
|
||||||
expect(maxActive).toBe(1);
|
|
||||||
// Each acquire owned with its own distinct token — no two ever shared it.
|
|
||||||
expect(new Set(tokens).size).toBe(3);
|
|
||||||
// The lock is fully released at the end.
|
|
||||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
|
||||||
} finally {
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('lets exactly one of two breakers take over a stale lock while the other waits', async () => {
|
|
||||||
const home = await tempDir();
|
|
||||||
const lockPath = restartLockPath(home);
|
|
||||||
await mkdir(dirname(lockPath), { recursive: true });
|
|
||||||
|
|
||||||
// A single stale lock both contenders will judge stale at the same instant.
|
|
||||||
// Every transition runs under the registry mutex, so only one may take the
|
|
||||||
// lock over; the other must observe a now-fresh owner and WAIT/re-evaluate
|
|
||||||
// rather than also taking over. (A content-blind clobber let both believe
|
|
||||||
// they owned it — this asserts the mutex-gated CAS takeover instead.)
|
|
||||||
await writeFile(
|
|
||||||
lockPath,
|
|
||||||
`4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Barrier the winner holds against until the loser has observed the lock
|
|
||||||
// fresh and waited at least once — forcing the exact interleaving where one
|
|
||||||
// proceeds while the other waits, deterministically rather than by timing.
|
|
||||||
let resolveLoserWaited: () => void = () => {};
|
|
||||||
const loserWaited = new Promise<void>((res) => {
|
|
||||||
resolveLoserWaited = res;
|
|
||||||
});
|
|
||||||
let sleeps = 0;
|
|
||||||
const sleepFn: SleepFn = async () => {
|
|
||||||
sleeps += 1;
|
|
||||||
resolveLoserWaited();
|
|
||||||
await new Promise((res) => setTimeout(res, 0));
|
|
||||||
};
|
|
||||||
|
|
||||||
let active = 0;
|
|
||||||
let maxActive = 0;
|
|
||||||
const tokens: string[] = [];
|
|
||||||
const tokenOf = async (): Promise<string> => {
|
|
||||||
const raw = await readFile(lockPath, 'utf8');
|
|
||||||
return raw.split('\n')[2]?.trim() ?? '';
|
|
||||||
};
|
|
||||||
|
|
||||||
let firstOwner = true;
|
|
||||||
const restartOnce = async (): Promise<void> => {
|
|
||||||
const guard = await acquireRestartLock(home, sleepFn);
|
|
||||||
active += 1;
|
|
||||||
maxActive = Math.max(maxActive, active);
|
|
||||||
tokens.push(await tokenOf());
|
|
||||||
if (firstOwner) {
|
|
||||||
// Winner: keep holding the lock until the loser has waited once, so the
|
|
||||||
// loser is guaranteed to see a FRESH owner (not the stale one) and back
|
|
||||||
// off — proving it could not also take over.
|
|
||||||
firstOwner = false;
|
|
||||||
await loserWaited;
|
|
||||||
} else {
|
|
||||||
await new Promise((res) => setTimeout(res, 0));
|
|
||||||
}
|
|
||||||
active -= 1;
|
|
||||||
await guard.release();
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Exactly two breakers race the single stale lock.
|
|
||||||
await Promise.all([restartOnce(), restartOnce()]);
|
|
||||||
|
|
||||||
// Mutual exclusion: never two owners at once (if both took over the stale
|
|
||||||
// lock, this would be 2).
|
|
||||||
expect(maxActive).toBe(1);
|
|
||||||
// Both eventually owned, each with its own distinct token.
|
|
||||||
expect(new Set(tokens).size).toBe(2);
|
|
||||||
// The loser observed the winner's fresh lock and waited — it did NOT also
|
|
||||||
// take over the stale lock.
|
|
||||||
expect(sleeps).toBeGreaterThanOrEqual(1);
|
|
||||||
// The lock is fully released at the end.
|
|
||||||
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
|
||||||
} finally {
|
|
||||||
await rm(home, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
|
it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
|
||||||
const home = await tempDir();
|
const home = await tempDir();
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
|
|||||||
@@ -1,16 +1,5 @@
|
|||||||
import { constants } from 'node:fs';
|
import { constants } from 'node:fs';
|
||||||
import {
|
import { access, chmod, copyFile, mkdir, readFile, unlink, writeFile } from 'node:fs/promises';
|
||||||
access,
|
|
||||||
chmod,
|
|
||||||
copyFile,
|
|
||||||
mkdir,
|
|
||||||
open,
|
|
||||||
readFile,
|
|
||||||
stat,
|
|
||||||
unlink,
|
|
||||||
writeFile,
|
|
||||||
} from 'node:fs/promises';
|
|
||||||
import { randomUUID } from 'node:crypto';
|
|
||||||
import { homedir, hostname, userInfo } from 'node:os';
|
import { homedir, hostname, userInfo } from 'node:os';
|
||||||
import { dirname, join, resolve } from 'node:path';
|
import { dirname, join, resolve } from 'node:path';
|
||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
@@ -544,295 +533,6 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
|
|||||||
return ['systemctl', '--user', action, service];
|
return ['systemctl', '--user', action, service];
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Poll interval (ms) while waiting for an in-flight restart's lock to clear. */
|
|
||||||
export const RESTART_LOCK_POLL_INTERVAL_MS = 250;
|
|
||||||
/**
|
|
||||||
* Maximum time (ms) a re-entrant restart waits for the in-flight restart to
|
|
||||||
* finish before it breaks the lock and proceeds anyway. A bound is required so
|
|
||||||
* a crashed holder of the lock can never deadlock the fleet permanently.
|
|
||||||
*/
|
|
||||||
export const RESTART_LOCK_MAX_WAIT_MS = 30_000;
|
|
||||||
/**
|
|
||||||
* Age (ms) past which a restart lock is treated as stale (its owner died
|
|
||||||
* without releasing it) and is broken immediately rather than waited on.
|
|
||||||
*/
|
|
||||||
export const RESTART_LOCK_STALE_MS = 60_000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resolves the path of the cross-process restart lock for a given Mosaic home.
|
|
||||||
* Kept strictly under `<mosaicHome>/fleet/run` (not the heartbeat env override)
|
|
||||||
* so the lock is scoped to the same fleet the restart acts on.
|
|
||||||
*/
|
|
||||||
export function restartLockPath(mosaicHome: string): string {
|
|
||||||
return join(mosaicHome, 'fleet', 'run', 'restart.lock');
|
|
||||||
}
|
|
||||||
|
|
||||||
/** A held restart lock; `release()` removes the lock file iff we still own it. */
|
|
||||||
interface RestartGuard {
|
|
||||||
release(): Promise<void>;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Lock-file contents: pid (informational), timestamp, and a unique owner token. */
|
|
||||||
function formatRestartLockContent(token: string): string {
|
|
||||||
return `${process.pid}\n${Date.now()}\n${token}\n`;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads the owner token (line 3) from a lock file, or null if the file is
|
|
||||||
* missing/unreadable/tokenless. The token is what makes release and break
|
|
||||||
* ownership-safe: a process only ever acts on a lock whose token matches its own.
|
|
||||||
*/
|
|
||||||
async function readRestartLockToken(lockPath: string): Promise<string | null> {
|
|
||||||
let raw: string;
|
|
||||||
try {
|
|
||||||
raw = await readFile(lockPath, 'utf8');
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const token = raw.split('\n')[2]?.trim();
|
|
||||||
return token ? token : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true when a lock's contents are stale: older than RESTART_LOCK_STALE_MS,
|
|
||||||
* or unparseable (a corrupt or partially written lock left by a crashed owner).
|
|
||||||
*/
|
|
||||||
function isRestartLockContentStale(raw: string, now: number): boolean {
|
|
||||||
const stampLine = raw.split('\n')[1] ?? '';
|
|
||||||
const stamp = Number.parseInt(stampLine.trim(), 10);
|
|
||||||
if (!Number.isFinite(stamp)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return now - stamp >= RESTART_LOCK_STALE_MS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Path of the short-lived registry mutex that guards EVERY transition of the
|
|
||||||
* restart lock (acquire, release, takeover). Held only across a few filesystem
|
|
||||||
* ops — never across the restart itself — so contention clears in microseconds.
|
|
||||||
*/
|
|
||||||
function restartMutexPath(lockPath: string): string {
|
|
||||||
return `${lockPath}.mutex`;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Brief back-off between registry-mutex acquisition attempts (held microseconds). */
|
|
||||||
const RESTART_MUTEX_RETRY_MS = 20;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Staleness for the internal mutex / reclaim locks, judged by the file's mtime
|
|
||||||
* rather than its CONTENT. `open(path, 'wx')` creates the inode (with a fresh
|
|
||||||
* mtime) before any token/timestamp is written into it, so a content-based check
|
|
||||||
* would momentarily see that empty file as corrupt-and-stale and could reap a
|
|
||||||
* lock another contender is still acquiring. mtime is set atomically at creation,
|
|
||||||
* so a just-created lock always reads as live; only a lock whose holder died and
|
|
||||||
* stopped touching it ages past the threshold. These locks are never held across
|
|
||||||
* the restart itself (only a couple of filesystem ops), so any mtime this old can
|
|
||||||
* belong only to a dead holder.
|
|
||||||
*/
|
|
||||||
async function isRestartLockPathStale(path: string, now: number): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
const info = await stat(path);
|
|
||||||
return now - info.mtimeMs >= RESTART_LOCK_STALE_MS;
|
|
||||||
} catch (err) {
|
|
||||||
if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
|
|
||||||
return false; // Gone, not stale — the caller will re-contend.
|
|
||||||
}
|
|
||||||
return false; // Can't stat — treat as live and back off rather than reap.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Path of the reclaim lock that serializes reaping of a crashed-holder mutex. */
|
|
||||||
function restartReclaimPath(mutexPath: string): string {
|
|
||||||
return `${mutexPath}.reclaim`;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reap a registry mutex left behind by a process that CRASHED mid-transition —
|
|
||||||
* one whose file has aged past RESTART_LOCK_STALE_MS. Because the mutex is held
|
|
||||||
* only for a couple of filesystem ops (no sleeps, never across the restart), a
|
|
||||||
* mutex this old can only belong to a dead holder.
|
|
||||||
*
|
|
||||||
* The reap removes the dead mutex but never CREATES/holds it — acquisition stays
|
|
||||||
* the single `open('wx')` create in {@link acquireRestartMutex}, so exactly one
|
|
||||||
* contender wins ownership no matter how the reap and acquires interleave. The
|
|
||||||
* removal is made conditional by a dedicated reclaim lock: while it is held the
|
|
||||||
* dead mutex is stable (its dead holder will never touch it, and no other
|
|
||||||
* reclaimer can race), so re-reading it and removing it only if it is STILL stale
|
|
||||||
* is a true compare — a live holder's fresh mutex is never removed. This closes
|
|
||||||
* the reclaim race a content-blind rename-and-restore left open (a third
|
|
||||||
* contender slipping into the gap while a fresh mutex was moved aside).
|
|
||||||
*/
|
|
||||||
async function reclaimStaleRestartMutex(mutexPath: string): Promise<void> {
|
|
||||||
const reclaimPath = restartReclaimPath(mutexPath);
|
|
||||||
let handle: Awaited<ReturnType<typeof open>>;
|
|
||||||
try {
|
|
||||||
handle = await open(reclaimPath, 'wx');
|
|
||||||
} catch (err) {
|
|
||||||
if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
// Someone is already reclaiming. If their reclaim lock is itself stale by
|
|
||||||
// mtime, its holder crashed mid-reap (the lock spans only a stat + unlink,
|
|
||||||
// microseconds) — clear it so a later pass can retry. Otherwise a live
|
|
||||||
// reclaimer has it; back off. Either way we do not reap the mutex this pass.
|
|
||||||
if (await isRestartLockPathStale(reclaimPath, Date.now())) {
|
|
||||||
await unlink(reclaimPath).catch(() => {});
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
// Re-check the mutex UNDER the reclaim lock and remove it only if it is STILL
|
|
||||||
// stale by mtime. A live holder's mutex is fresh and is left untouched; a dead
|
|
||||||
// holder's mutex is stable here (its holder is gone and no other reclaimer can
|
|
||||||
// race us), so this re-check is authoritative.
|
|
||||||
if (await isRestartLockPathStale(mutexPath, Date.now())) {
|
|
||||||
await unlink(mutexPath).catch(() => {});
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
await handle.close();
|
|
||||||
await unlink(reclaimPath).catch(() => {});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Acquire the registry mutex, BLOCKING (with brief back-offs) until held, and
|
|
||||||
* return a token-gated release. This is the single point of mutual exclusion for
|
|
||||||
* the restart lock: acquire, release, and stale/timeout takeover all run under it,
|
|
||||||
* so "read the lock, then mutate it" is atomic — no acquirer, releaser, or breaker
|
|
||||||
* can ever interleave with another. A mutex left by a crashed holder is reclaimed
|
|
||||||
* once it ages past the stale threshold.
|
|
||||||
*/
|
|
||||||
async function acquireRestartMutex(
|
|
||||||
mutexPath: string,
|
|
||||||
token: string,
|
|
||||||
): Promise<RestartGuard['release']> {
|
|
||||||
for (;;) {
|
|
||||||
let handle: Awaited<ReturnType<typeof open>>;
|
|
||||||
try {
|
|
||||||
handle = await open(mutexPath, 'wx');
|
|
||||||
} catch (err) {
|
|
||||||
if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
// Staleness is judged by mtime, not content, so a mutex that exists but has
|
|
||||||
// not yet had its token written (the open-before-write window) reads as live
|
|
||||||
// and is never wrongly reaped.
|
|
||||||
if (!(await isRestartLockPathStale(mutexPath, Date.now()))) {
|
|
||||||
// A live holder has it — it will be gone in microseconds. Back off briefly.
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, RESTART_MUTEX_RETRY_MS));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
await reclaimStaleRestartMutex(mutexPath);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// We created the mutex. Populate it with our token; if writing fails, clean up
|
|
||||||
// our own file so we never leak an empty mutex that a peer would have to reap.
|
|
||||||
try {
|
|
||||||
await handle.writeFile(formatRestartLockContent(token));
|
|
||||||
await handle.close();
|
|
||||||
} catch (err) {
|
|
||||||
await handle.close().catch(() => {});
|
|
||||||
await unlink(mutexPath).catch(() => {});
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
return async (): Promise<void> => {
|
|
||||||
if ((await readRestartLockToken(mutexPath)) !== token) return;
|
|
||||||
await unlink(mutexPath).catch(() => {});
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Acquire the fleet restart lock, serializing concurrent `mosaic fleet restart`
|
|
||||||
* invocations across processes. Each restart tears the tmux holder (and the
|
|
||||||
* agent sessions inside it) down and back up; without this guard a re-entrant
|
|
||||||
* restart relaunches agents against a half-torn-down holder, which fails and
|
|
||||||
* tight-loops. A re-entrant caller waits for the in-flight restart to release
|
|
||||||
* the lock (clean shutdown settled) before proceeding, breaks a stale lock left
|
|
||||||
* by a crashed owner, and after RESTART_LOCK_MAX_WAIT_MS breaks the lock to
|
|
||||||
* avoid a permanent deadlock.
|
|
||||||
*
|
|
||||||
* Correctness rests on a single invariant: EVERY transition of the lock — taking
|
|
||||||
* a free lock, taking over a stale/timed-out one, and releasing — happens under
|
|
||||||
* the registry mutex. Because the check ("is the lock free / stale / fresh?") and
|
|
||||||
* the mutation that follows it both run while the mutex is held, they are atomic:
|
|
||||||
* no other acquirer, releaser, or breaker can slip in between. That is what makes
|
|
||||||
* takeover a true compare-and-swap rather than a content-blind clobber — a normal
|
|
||||||
* `open('wx')` acquirer cannot create a fresh lock in a gap, and the original
|
|
||||||
* owner's `release()` (also mutex-gated and token-checked) cannot drop a lock a
|
|
||||||
* breaker already took over. So no interleaving lets two restarts both own the
|
|
||||||
* lock and run concurrently.
|
|
||||||
*/
|
|
||||||
export async function acquireRestartLock(
|
|
||||||
mosaicHome: string,
|
|
||||||
sleepFn: SleepFn,
|
|
||||||
): Promise<RestartGuard> {
|
|
||||||
const token = randomUUID();
|
|
||||||
const lockPath = restartLockPath(mosaicHome);
|
|
||||||
const mutexPath = restartMutexPath(lockPath);
|
|
||||||
await mkdir(dirname(lockPath), { recursive: true });
|
|
||||||
const release = async (): Promise<void> => {
|
|
||||||
// Mutex-gated and token-gated: only remove the lock if it is still ours. If
|
|
||||||
// another caller took it over (after a stale/timeout break) the token no
|
|
||||||
// longer matches and we leave their lock intact.
|
|
||||||
const releaseMutex = await acquireRestartMutex(mutexPath, token);
|
|
||||||
try {
|
|
||||||
if ((await readRestartLockToken(lockPath)) === token) {
|
|
||||||
await unlink(lockPath).catch(() => {});
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
await releaseMutex();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
const deadline = Date.now() + RESTART_LOCK_MAX_WAIT_MS;
|
|
||||||
for (;;) {
|
|
||||||
let owned = false;
|
|
||||||
const releaseMutex = await acquireRestartMutex(mutexPath, token);
|
|
||||||
try {
|
|
||||||
// Read and (if appropriate) mutate the lock atomically under the mutex.
|
|
||||||
let current: string | null = null;
|
|
||||||
let absent = false;
|
|
||||||
try {
|
|
||||||
current = await readFile(lockPath, 'utf8');
|
|
||||||
} catch (readErr) {
|
|
||||||
if ((readErr as NodeJS.ErrnoException).code === 'ENOENT') {
|
|
||||||
absent = true;
|
|
||||||
} else {
|
|
||||||
current = null; // Unreadable/corrupt: treat as stale.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const now = Date.now();
|
|
||||||
if (absent) {
|
|
||||||
// Lock is free — take it.
|
|
||||||
await writeFile(lockPath, formatRestartLockContent(token));
|
|
||||||
owned = true;
|
|
||||||
} else {
|
|
||||||
const stale = current === null || isRestartLockContentStale(current, now);
|
|
||||||
const timedOut = now >= deadline;
|
|
||||||
if (stale || timedOut) {
|
|
||||||
process.stderr.write(
|
|
||||||
stale
|
|
||||||
? 'Breaking stale fleet restart lock.\n'
|
|
||||||
: `Timed out after ${RESTART_LOCK_MAX_WAIT_MS}ms waiting for the in-flight fleet ` +
|
|
||||||
'restart; breaking the lock.\n',
|
|
||||||
);
|
|
||||||
// Takeover is just an overwrite — safe because we hold the mutex, so no
|
|
||||||
// acquirer or releaser can touch the lock between our read and this write.
|
|
||||||
await writeFile(lockPath, formatRestartLockContent(token));
|
|
||||||
owned = true;
|
|
||||||
}
|
|
||||||
// else: a fresh restart owns it — wait below and re-evaluate.
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
await releaseMutex();
|
|
||||||
}
|
|
||||||
if (owned) {
|
|
||||||
return { release };
|
|
||||||
}
|
|
||||||
await sleepFn(RESTART_LOCK_POLL_INTERVAL_MS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the systemctl --user enable command for a given unit.
|
* Returns the systemctl --user enable command for a given unit.
|
||||||
* Used by the install auto-enable step to persist units across reboots.
|
* Used by the install auto-enable step to persist units across reboots.
|
||||||
@@ -1472,7 +1172,6 @@ export function isSendAccepted(capturedOutput: string): SendVerifyResult {
|
|||||||
|
|
||||||
export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
|
export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
|
||||||
const runner = deps.runner ?? runCommand;
|
const runner = deps.runner ?? runCommand;
|
||||||
const sleepFn = deps.sleepFn ?? defaultSleep;
|
|
||||||
const paths = resolveFleetPaths(deps.mosaicHome);
|
const paths = resolveFleetPaths(deps.mosaicHome);
|
||||||
const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
|
const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
|
||||||
|
|
||||||
@@ -1586,22 +1285,9 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
.command(`${action} [agent]`)
|
.command(`${action} [agent]`)
|
||||||
.description(`${action} the fleet holder or one agent`)
|
.description(`${action} the fleet holder or one agent`)
|
||||||
.action(async (agent?: string) => {
|
.action(async (agent?: string) => {
|
||||||
const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
|
|
||||||
const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
|
|
||||||
const roster = await loadRosterForCommand(cmd);
|
const roster = await loadRosterForCommand(cmd);
|
||||||
if (agent) {
|
if (agent) {
|
||||||
getRosterAgent(roster, agent);
|
getRosterAgent(roster, agent);
|
||||||
// Single-agent restart is guarded too: it can race a full restart that
|
|
||||||
// is tearing the shared holder down.
|
|
||||||
if (action === 'restart') {
|
|
||||||
const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
|
|
||||||
try {
|
|
||||||
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
|
||||||
} finally {
|
|
||||||
await guard.release();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1612,21 +1298,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (action === 'restart') {
|
|
||||||
// Serialize the holder+agents teardown/relaunch behind the restart lock
|
|
||||||
// so a re-entrant restart waits for clean shutdown before relaunching,
|
|
||||||
// instead of racing a half-torn-down holder into a tight loop.
|
|
||||||
const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
|
|
||||||
try {
|
|
||||||
await runChecked(runner, buildFleetServiceCommand(action));
|
|
||||||
for (const rosterAgent of roster.agents) {
|
|
||||||
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
await guard.release();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
await runChecked(runner, buildFleetServiceCommand(action));
|
await runChecked(runner, buildFleetServiceCommand(action));
|
||||||
for (const rosterAgent of roster.agents) {
|
for (const rosterAgent of roster.agents) {
|
||||||
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
||||||
|
|||||||
161
tools/install.sh
161
tools/install.sh
@@ -16,10 +16,6 @@
|
|||||||
# --framework Install/upgrade framework only (skip npm CLI)
|
# --framework Install/upgrade framework only (skip npm CLI)
|
||||||
# --cli Install/upgrade npm CLI only (skip framework)
|
# --cli Install/upgrade npm CLI only (skip framework)
|
||||||
# --ref <branch> Git ref for framework archive (default: main)
|
# --ref <branch> Git ref for framework archive (default: main)
|
||||||
# --dev Build CLI + gateway FROM SOURCE at --ref instead of the
|
|
||||||
# registry @latest. Zero registry writes — packs local
|
|
||||||
# tarballs and installs them globally. Use to test a branch
|
|
||||||
# end-to-end before cutting a release.
|
|
||||||
# --yes Accept all defaults; headless/non-interactive install
|
# --yes Accept all defaults; headless/non-interactive install
|
||||||
# --no-auto-launch Skip automatic mosaic wizard + gateway install on first install
|
# --no-auto-launch Skip automatic mosaic wizard + gateway install on first install
|
||||||
# --uninstall Reverse the install: remove framework dir, CLI package, and npmrc line
|
# --uninstall Reverse the install: remove framework dir, CLI package, and npmrc line
|
||||||
@@ -31,7 +27,6 @@
|
|||||||
# MOSAIC_PREFIX — npm global prefix (default: ~/.npm-global)
|
# MOSAIC_PREFIX — npm global prefix (default: ~/.npm-global)
|
||||||
# MOSAIC_NO_COLOR — disable colour (set to 1)
|
# MOSAIC_NO_COLOR — disable colour (set to 1)
|
||||||
# MOSAIC_REF — git ref for framework (default: main)
|
# MOSAIC_REF — git ref for framework (default: main)
|
||||||
# MOSAIC_DEV — equivalent to --dev (set to 1)
|
|
||||||
# MOSAIC_ASSUME_YES — equivalent to --yes (set to 1)
|
# MOSAIC_ASSUME_YES — equivalent to --yes (set to 1)
|
||||||
# ──────────────────────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────────────────────
|
||||||
#
|
#
|
||||||
@@ -48,7 +43,6 @@ FLAG_CLI=true
|
|||||||
FLAG_NO_AUTO_LAUNCH=false
|
FLAG_NO_AUTO_LAUNCH=false
|
||||||
FLAG_YES=false
|
FLAG_YES=false
|
||||||
FLAG_UNINSTALL=false
|
FLAG_UNINSTALL=false
|
||||||
FLAG_DEV=false
|
|
||||||
GIT_REF="${MOSAIC_REF:-main}"
|
GIT_REF="${MOSAIC_REF:-main}"
|
||||||
|
|
||||||
# MOSAIC_ASSUME_YES env var acts the same as --yes
|
# MOSAIC_ASSUME_YES env var acts the same as --yes
|
||||||
@@ -56,18 +50,12 @@ if [[ "${MOSAIC_ASSUME_YES:-0}" == "1" ]]; then
|
|||||||
FLAG_YES=true
|
FLAG_YES=true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# MOSAIC_DEV env var acts the same as --dev
|
|
||||||
if [[ "${MOSAIC_DEV:-0}" == "1" ]]; then
|
|
||||||
FLAG_DEV=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--check) FLAG_CHECK=true; shift ;;
|
--check) FLAG_CHECK=true; shift ;;
|
||||||
--framework) FLAG_CLI=false; shift ;;
|
--framework) FLAG_CLI=false; shift ;;
|
||||||
--cli) FLAG_FRAMEWORK=false; shift ;;
|
--cli) FLAG_FRAMEWORK=false; shift ;;
|
||||||
--ref) GIT_REF="${2:-main}"; shift 2 ;;
|
--ref) GIT_REF="${2:-main}"; shift 2 ;;
|
||||||
--dev) FLAG_DEV=true; shift ;;
|
|
||||||
--yes|-y) FLAG_YES=true; shift ;;
|
--yes|-y) FLAG_YES=true; shift ;;
|
||||||
--no-auto-launch) FLAG_NO_AUTO_LAUNCH=true; shift ;;
|
--no-auto-launch) FLAG_NO_AUTO_LAUNCH=true; shift ;;
|
||||||
--uninstall) FLAG_UNINSTALL=true; shift ;;
|
--uninstall) FLAG_UNINSTALL=true; shift ;;
|
||||||
@@ -84,17 +72,6 @@ CLI_PKG="${SCOPE}/mosaic"
|
|||||||
REPO_BASE="https://git.mosaicstack.dev/mosaicstack/stack"
|
REPO_BASE="https://git.mosaicstack.dev/mosaicstack/stack"
|
||||||
ARCHIVE_URL="${REPO_BASE}/archive/${GIT_REF}.tar.gz"
|
ARCHIVE_URL="${REPO_BASE}/archive/${GIT_REF}.tar.gz"
|
||||||
|
|
||||||
# In dev (build-from-source) mode the gateway is installed globally from a
|
|
||||||
# locally-built tarball. Tell the wizard / gateway-config stage NOT to overwrite
|
|
||||||
# it with the registry @latest build (honored by gatewayConfigStage).
|
|
||||||
if [[ "$FLAG_DEV" == "true" ]]; then
|
|
||||||
export MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Shared monorepo checkout (populated on demand by ensure_monorepo).
|
|
||||||
WORK_DIR=""
|
|
||||||
EXTRACTED_DIR=""
|
|
||||||
|
|
||||||
# ─── uninstall path ───────────────────────────────────────────────────────────
|
# ─── uninstall path ───────────────────────────────────────────────────────────
|
||||||
# Shell-level uninstall for when the CLI is broken or not available.
|
# Shell-level uninstall for when the CLI is broken or not available.
|
||||||
# Handles: framework directory, npm CLI package, npmrc scope line.
|
# Handles: framework directory, npm CLI package, npmrc scope line.
|
||||||
@@ -262,99 +239,6 @@ framework_version() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Download + extract the monorepo archive at $GIT_REF exactly once per run.
|
|
||||||
# Sets the script-level EXTRACTED_DIR to the repo root. Reused by both the
|
|
||||||
# framework install (Part 1) and the dev build-from-source path (Part 2).
|
|
||||||
ensure_monorepo() {
|
|
||||||
if [[ -n "$EXTRACTED_DIR" ]] && [[ -d "$EXTRACTED_DIR" ]]; then
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
require_cmd tar
|
|
||||||
|
|
||||||
WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
|
|
||||||
# shellcheck disable=SC2317
|
|
||||||
cleanup_work() { [[ -n "$WORK_DIR" ]] && rm -rf "$WORK_DIR"; }
|
|
||||||
trap cleanup_work EXIT
|
|
||||||
|
|
||||||
info "Downloading source from ${GIT_REF}…"
|
|
||||||
if command -v curl &>/dev/null; then
|
|
||||||
curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
|
||||||
elif command -v wget &>/dev/null; then
|
|
||||||
wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
|
||||||
else
|
|
||||||
fail "curl or wget required to download source."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Gitea archives extract to <repo-name>/ inside the work dir
|
|
||||||
EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
|
|
||||||
if [[ -z "$EXTRACTED_DIR" ]] || [[ ! -d "$EXTRACTED_DIR" ]]; then
|
|
||||||
fail "Could not locate extracted source in archive."
|
|
||||||
ls -la "$WORK_DIR" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Build @mosaicstack/mosaic + @mosaicstack/gateway from source and install both
|
|
||||||
# globally from locally-packed tarballs. ZERO registry writes. Workspace deps
|
|
||||||
# (brain/config/db/…) are pulled from the registry at the versions pinned in
|
|
||||||
# each package.json — `pnpm pack` rewrites `workspace:*` to those versions.
|
|
||||||
install_cli_from_source() {
|
|
||||||
local src="$EXTRACTED_DIR"
|
|
||||||
local out_dir="$WORK_DIR/dist-tarballs"
|
|
||||||
mkdir -p "$out_dir"
|
|
||||||
|
|
||||||
# pnpm via corepack (ships with Node >= 16.9; required by Node >= 20 preflight).
|
|
||||||
# Pin to the repo's packageManager version so the build matches CI. Surface
|
|
||||||
# corepack failures so the fresh-machine case gives an actionable error
|
|
||||||
# instead of a bare "command not found".
|
|
||||||
if ! command -v pnpm &>/dev/null; then
|
|
||||||
info "Activating pnpm via corepack…"
|
|
||||||
corepack enable 2>&1 | sed 's/^/ /' || warn "corepack enable failed — pnpm may need manual install."
|
|
||||||
corepack prepare pnpm@10.6.2 --activate 2>&1 | sed 's/^/ /' \
|
|
||||||
|| warn "corepack prepare failed — pnpm may need manual install."
|
|
||||||
fi
|
|
||||||
if ! command -v pnpm &>/dev/null; then
|
|
||||||
fail "pnpm not available after corepack activation."
|
|
||||||
echo " Install pnpm manually (https://pnpm.io/installation) and re-run with --dev."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
info "Installing workspace dependencies (pnpm install)…"
|
|
||||||
( cd "$src" && pnpm install ) 2>&1 | sed 's/^/ /'
|
|
||||||
|
|
||||||
info "Building CLI + gateway from source…"
|
|
||||||
( cd "$src" && pnpm --filter "@mosaicstack/mosaic..." --filter "@mosaicstack/gateway..." run build ) 2>&1 | sed 's/^/ /'
|
|
||||||
|
|
||||||
info "Packing local tarballs…"
|
|
||||||
( cd "$src/packages/mosaic" && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/ /'
|
|
||||||
( cd "$src/apps/gateway" && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/ /'
|
|
||||||
|
|
||||||
local cli_tgz gw_tgz
|
|
||||||
cli_tgz="$(ls -1t "$out_dir"/mosaicstack-mosaic-*.tgz 2>/dev/null | head -1)"
|
|
||||||
gw_tgz="$(ls -1t "$out_dir"/mosaicstack-gateway-*.tgz 2>/dev/null | head -1)"
|
|
||||||
|
|
||||||
if [[ ! -f "$cli_tgz" ]]; then
|
|
||||||
fail "CLI tarball was not produced by pnpm pack."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [[ ! -f "$gw_tgz" ]]; then
|
|
||||||
fail "Gateway tarball was not produced by pnpm pack."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Gateway first so it is present globally before the CLI's wizard runs (which
|
|
||||||
# skips its own gateway install via MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1).
|
|
||||||
info "Installing gateway from source tarball (global)…"
|
|
||||||
npm install -g "$gw_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/ /'
|
|
||||||
|
|
||||||
info "Installing CLI from source tarball (global)…"
|
|
||||||
npm install -g "$cli_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/ /'
|
|
||||||
|
|
||||||
ok "Installed from source: CLI $(installed_cli_version)"
|
|
||||||
}
|
|
||||||
|
|
||||||
# ─── preflight ────────────────────────────────────────────────────────────────
|
# ─── preflight ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
require_cmd node
|
require_cmd node
|
||||||
@@ -398,8 +282,25 @@ if [[ "$FLAG_FRAMEWORK" == "true" ]]; then
|
|||||||
warn "Framework not installed."
|
warn "Framework not installed."
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
# Download repo archive and extract framework (shared with the dev build)
|
# Download repo archive and extract framework
|
||||||
ensure_monorepo
|
require_cmd tar
|
||||||
|
|
||||||
|
WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
|
||||||
|
cleanup_work() { rm -rf "$WORK_DIR"; }
|
||||||
|
trap cleanup_work EXIT
|
||||||
|
|
||||||
|
info "Downloading framework from ${GIT_REF}…"
|
||||||
|
if command -v curl &>/dev/null; then
|
||||||
|
curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
||||||
|
elif command -v wget &>/dev/null; then
|
||||||
|
wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
||||||
|
else
|
||||||
|
fail "curl or wget required to download framework."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gitea archives extract to <repo-name>/ inside the work dir
|
||||||
|
EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
|
||||||
FRAMEWORK_SRC="$EXTRACTED_DIR/packages/mosaic/framework"
|
FRAMEWORK_SRC="$EXTRACTED_DIR/packages/mosaic/framework"
|
||||||
|
|
||||||
if [[ ! -d "$FRAMEWORK_SRC" ]]; then
|
if [[ ! -d "$FRAMEWORK_SRC" ]]; then
|
||||||
@@ -455,11 +356,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
CURRENT="$(installed_cli_version)"
|
CURRENT="$(installed_cli_version)"
|
||||||
if [[ "$FLAG_DEV" == "true" ]]; then
|
LATEST="$(latest_cli_version)"
|
||||||
LATEST=""
|
|
||||||
else
|
|
||||||
LATEST="$(latest_cli_version)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n "$CURRENT" ]]; then
|
if [[ -n "$CURRENT" ]]; then
|
||||||
dim " Installed: ${CLI_PKG}@${CURRENT}"
|
dim " Installed: ${CLI_PKG}@${CURRENT}"
|
||||||
@@ -467,9 +364,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
dim " Installed: (none)"
|
dim " Installed: (none)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$FLAG_DEV" == "true" ]]; then
|
if [[ -n "$LATEST" ]]; then
|
||||||
dim " Source: ${REPO_BASE} (ref: ${GIT_REF}, build-from-source)"
|
|
||||||
elif [[ -n "$LATEST" ]]; then
|
|
||||||
dim " Latest: ${CLI_PKG}@${LATEST}"
|
dim " Latest: ${CLI_PKG}@${LATEST}"
|
||||||
else
|
else
|
||||||
dim " Latest: (registry unreachable)"
|
dim " Latest: (registry unreachable)"
|
||||||
@@ -477,9 +372,7 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
if [[ "$FLAG_CHECK" == "true" ]]; then
|
if [[ "$FLAG_CHECK" == "true" ]]; then
|
||||||
if [[ "$FLAG_DEV" == "true" ]]; then
|
if [[ -z "$LATEST" ]]; then
|
||||||
info "Dev mode: installed version is ${CURRENT:-(none)} (no registry comparison)."
|
|
||||||
elif [[ -z "$LATEST" ]]; then
|
|
||||||
warn "Could not reach registry."
|
warn "Could not reach registry."
|
||||||
elif [[ -z "$CURRENT" ]]; then
|
elif [[ -z "$CURRENT" ]]; then
|
||||||
warn "Not installed."
|
warn "Not installed."
|
||||||
@@ -490,16 +383,6 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
else
|
else
|
||||||
ok "Up to date (or ahead of registry)."
|
ok "Up to date (or ahead of registry)."
|
||||||
fi
|
fi
|
||||||
elif [[ "$FLAG_DEV" == "true" ]]; then
|
|
||||||
info "Dev mode — building CLI + gateway from source at ref ${GIT_REF}…"
|
|
||||||
ensure_monorepo
|
|
||||||
install_cli_from_source
|
|
||||||
|
|
||||||
# PATH check for npm prefix
|
|
||||||
if [[ ":$PATH:" != *":$PREFIX/bin:"* ]]; then
|
|
||||||
warn "$PREFIX/bin is not on your PATH"
|
|
||||||
dim " Add to your shell rc: export PATH=\"$PREFIX/bin:\$PATH\""
|
|
||||||
fi
|
|
||||||
else
|
else
|
||||||
if [[ -z "$LATEST" ]]; then
|
if [[ -z "$LATEST" ]]; then
|
||||||
warn "Could not reach registry at $REGISTRY — skipping npm CLI."
|
warn "Could not reach registry at $REGISTRY — skipping npm CLI."
|
||||||
|
|||||||
Reference in New Issue
Block a user