Compare commits
9 Commits
chore/fed-
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 67135d3822 | |||
| adb153428b | |||
| c739256a2c | |||
| fc2970916f | |||
| 79eae2ffce | |||
| 7035cd23bf | |||
| 6b94d014a8 | |||
| 838c44086c | |||
| 3eeed04e17 |
@@ -0,0 +1,255 @@
|
|||||||
|
import 'reflect-metadata';
|
||||||
|
import { describe, expect, it, vi } from 'vitest';
|
||||||
|
import type { Db } from '@mosaicstack/db';
|
||||||
|
import type { FederationListResponse } from '@mosaicstack/types';
|
||||||
|
import {
|
||||||
|
FederationClientError,
|
||||||
|
type FederationClientService,
|
||||||
|
} from '../federation-client.service.js';
|
||||||
|
import { type QuerySourceError, QuerySourceService } from '../query-source.service.js';
|
||||||
|
|
||||||
|
interface TestRow {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PeerRow {
|
||||||
|
id: string;
|
||||||
|
commonName: string;
|
||||||
|
endpointUrl: string | null;
|
||||||
|
clientKeyPem: string | null;
|
||||||
|
state: 'active' | 'pending' | 'suspended' | 'revoked';
|
||||||
|
}
|
||||||
|
|
||||||
|
const LOCAL_ROWS: TestRow[] = [
|
||||||
|
{ id: 'local-1', title: 'Local One' },
|
||||||
|
{ id: 'local-2', title: 'Local Two' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const PEER_A: PeerRow = {
|
||||||
|
id: 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa',
|
||||||
|
commonName: 'peer-a',
|
||||||
|
endpointUrl: 'https://peer-a.example.com',
|
||||||
|
clientKeyPem: 'sealed-key-a',
|
||||||
|
state: 'active',
|
||||||
|
};
|
||||||
|
|
||||||
|
const PEER_B: PeerRow = {
|
||||||
|
id: 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
|
||||||
|
commonName: 'peer-b',
|
||||||
|
endpointUrl: 'https://peer-b.example.com',
|
||||||
|
clientKeyPem: 'sealed-key-b',
|
||||||
|
state: 'active',
|
||||||
|
};
|
||||||
|
|
||||||
|
const PEER_LOCALHOST: PeerRow = {
|
||||||
|
id: 'cccccccc-cccc-cccc-cccc-cccccccccccc',
|
||||||
|
commonName: 'peer-localhost',
|
||||||
|
endpointUrl: 'https://localhost:3001',
|
||||||
|
clientKeyPem: 'sealed-key-c',
|
||||||
|
state: 'active',
|
||||||
|
};
|
||||||
|
|
||||||
|
function makeDb(activePeers: PeerRow[]): Db {
|
||||||
|
const orderBy = vi.fn().mockResolvedValue(activePeers);
|
||||||
|
const where = vi.fn().mockReturnValue({ orderBy });
|
||||||
|
const from = vi.fn().mockReturnValue({ where });
|
||||||
|
const select = vi.fn().mockReturnValue({ from });
|
||||||
|
|
||||||
|
return {
|
||||||
|
select,
|
||||||
|
insert: vi.fn(),
|
||||||
|
update: vi.fn(),
|
||||||
|
delete: vi.fn(),
|
||||||
|
transaction: vi.fn(),
|
||||||
|
} as unknown as Db;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeFederationClient(
|
||||||
|
list: (
|
||||||
|
peerId: string,
|
||||||
|
resource: string,
|
||||||
|
request: Record<string, unknown>,
|
||||||
|
) => Promise<FederationListResponse<TestRow>>,
|
||||||
|
): FederationClientService {
|
||||||
|
return {
|
||||||
|
list: list as unknown as FederationClientService['list'],
|
||||||
|
} as FederationClientService;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeLocalResponse(rows: TestRow[] = LOCAL_ROWS): Promise<FederationListResponse<TestRow>> {
|
||||||
|
return Promise.resolve({ items: rows });
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('QuerySourceService', () => {
|
||||||
|
it('routes source="local" to the local executor and tags rows as local', async () => {
|
||||||
|
const list = vi.fn(async (): Promise<FederationListResponse<TestRow>> => ({ items: [] }));
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_A]), makeFederationClient(list));
|
||||||
|
|
||||||
|
const result = await service.list<TestRow>({
|
||||||
|
source: 'local',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: { cursor: 'ignored-for-local-test' },
|
||||||
|
local: () => makeLocalResponse(),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 'local-1', title: 'Local One', _source: 'local' },
|
||||||
|
{ id: 'local-2', title: 'Local Two', _source: 'local' },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(list).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('routes source="federated:<host>" to the matching active peer and tags rows with peer commonName', async () => {
|
||||||
|
const list = vi.fn(
|
||||||
|
async (): Promise<FederationListResponse<TestRow>> => ({
|
||||||
|
items: [{ id: 'remote-1', title: 'Remote One' }],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_A, PEER_B]), makeFederationClient(list));
|
||||||
|
|
||||||
|
const result = await service.list<TestRow>({
|
||||||
|
source: 'federated:peer-b.example.com',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: { status: 'open' },
|
||||||
|
local: () => makeLocalResponse(),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
items: [{ id: 'remote-1', title: 'Remote One', _source: 'peer-b' }],
|
||||||
|
});
|
||||||
|
expect(list).toHaveBeenCalledWith(PEER_B.id, 'tasks', { status: 'open' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches federated hosts by endpoint host including non-default port', async () => {
|
||||||
|
const list = vi.fn(
|
||||||
|
async (): Promise<FederationListResponse<TestRow>> => ({
|
||||||
|
items: [{ id: 'remote-port', title: 'Remote Port' }],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_LOCALHOST]), makeFederationClient(list));
|
||||||
|
|
||||||
|
const result = await service.list<TestRow>({
|
||||||
|
source: 'federated:localhost:3001',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: {},
|
||||||
|
local: () => makeLocalResponse(),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
items: [{ id: 'remote-port', title: 'Remote Port', _source: 'peer-localhost' }],
|
||||||
|
});
|
||||||
|
expect(list).toHaveBeenCalledWith(PEER_LOCALHOST.id, 'tasks', {});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fans out source="all" to local plus every active outbound peer in parallel and merges tagged rows', async () => {
|
||||||
|
const callOrder: string[] = [];
|
||||||
|
const list = vi.fn(async (peerId: string): Promise<FederationListResponse<TestRow>> => {
|
||||||
|
callOrder.push(`remote-start:${peerId}`);
|
||||||
|
await Promise.resolve();
|
||||||
|
return {
|
||||||
|
items: [{ id: `remote-${peerId.slice(0, 1)}`, title: `Remote ${peerId.slice(0, 1)}` }],
|
||||||
|
};
|
||||||
|
});
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_A, PEER_B]), makeFederationClient(list));
|
||||||
|
|
||||||
|
const result = await service.list<TestRow>({
|
||||||
|
source: 'all',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: { limit: 25 },
|
||||||
|
local: async () => {
|
||||||
|
callOrder.push('local-start');
|
||||||
|
await Promise.resolve();
|
||||||
|
return { items: [{ id: 'local-1', title: 'Local One' }] };
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 'local-1', title: 'Local One', _source: 'local' },
|
||||||
|
{ id: 'remote-a', title: 'Remote a', _source: 'peer-a' },
|
||||||
|
{ id: 'remote-b', title: 'Remote b', _source: 'peer-b' },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(list).toHaveBeenCalledTimes(2);
|
||||||
|
expect(callOrder).toEqual([
|
||||||
|
'local-start',
|
||||||
|
`remote-start:${PEER_A.id}`,
|
||||||
|
`remote-start:${PEER_B.id}`,
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('marks source="all" as partial and truncated when any subquery returns a cursor', async () => {
|
||||||
|
const list = vi.fn(
|
||||||
|
async (): Promise<FederationListResponse<TestRow>> => ({
|
||||||
|
items: [{ id: 'remote-a', title: 'Remote A' }],
|
||||||
|
nextCursor: 'remote-next',
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_A]), makeFederationClient(list));
|
||||||
|
|
||||||
|
const result = await service.list<TestRow>({
|
||||||
|
source: 'all',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: {},
|
||||||
|
local: () => makeLocalResponse([{ id: 'local-1', title: 'Local One' }]),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 'local-1', title: 'Local One', _source: 'local' },
|
||||||
|
{ id: 'remote-a', title: 'Remote A', _source: 'peer-a' },
|
||||||
|
],
|
||||||
|
_partial: true,
|
||||||
|
_truncated: true,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns _partial=true for source="all" when one peer fails without dropping successful sources', async () => {
|
||||||
|
const list = vi.fn(async (peerId: string): Promise<FederationListResponse<TestRow>> => {
|
||||||
|
if (peerId === PEER_B.id) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'NETWORK',
|
||||||
|
message: 'peer unavailable',
|
||||||
|
peerId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return { items: [{ id: 'remote-a', title: 'Remote A' }] };
|
||||||
|
});
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_A, PEER_B]), makeFederationClient(list));
|
||||||
|
|
||||||
|
const result = await service.list<TestRow>({
|
||||||
|
source: 'all',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: {},
|
||||||
|
local: () => makeLocalResponse([{ id: 'local-1', title: 'Local One' }]),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 'local-1', title: 'Local One', _source: 'local' },
|
||||||
|
{ id: 'remote-a', title: 'Remote A', _source: 'peer-a' },
|
||||||
|
],
|
||||||
|
_partial: true,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws QuerySourceError when a federated host does not match an active outbound peer', async () => {
|
||||||
|
const list = vi.fn(async (): Promise<FederationListResponse<TestRow>> => ({ items: [] }));
|
||||||
|
const service = new QuerySourceService(makeDb([PEER_A]), makeFederationClient(list));
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
service.list<TestRow>({
|
||||||
|
source: 'federated:missing.example.com',
|
||||||
|
resource: 'tasks',
|
||||||
|
request: {},
|
||||||
|
local: () => makeLocalResponse(),
|
||||||
|
}),
|
||||||
|
).rejects.toMatchObject({
|
||||||
|
name: 'QuerySourceError',
|
||||||
|
code: 'PEER_NOT_FOUND',
|
||||||
|
} satisfies Partial<QuerySourceError>);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -11,3 +11,13 @@ export {
|
|||||||
type FederationClientErrorCode,
|
type FederationClientErrorCode,
|
||||||
type FederationClientErrorOptions,
|
type FederationClientErrorOptions,
|
||||||
} from './federation-client.service.js';
|
} from './federation-client.service.js';
|
||||||
|
export {
|
||||||
|
QuerySourceService,
|
||||||
|
QuerySourceError,
|
||||||
|
type QuerySource,
|
||||||
|
type QuerySourceErrorCode,
|
||||||
|
type QuerySourceErrorOptions,
|
||||||
|
type QuerySourceListOptions,
|
||||||
|
type QuerySourceListResponse,
|
||||||
|
type LocalListExecutor,
|
||||||
|
} from './query-source.service.js';
|
||||||
|
|||||||
261
apps/gateway/src/federation/client/query-source.service.ts
Normal file
261
apps/gateway/src/federation/client/query-source.service.ts
Normal file
@@ -0,0 +1,261 @@
|
|||||||
|
/**
|
||||||
|
* QuerySourceService — gateway query source router (FED-M3-09).
|
||||||
|
*
|
||||||
|
* Accepts the federation query-layer `source` selector and routes list-style
|
||||||
|
* reads to local storage, one federated peer, or all active outbound peers.
|
||||||
|
*
|
||||||
|
* `source: "all"` is intentionally tolerant of per-peer failures: local data
|
||||||
|
* and successful peer responses are returned, and the envelope is marked
|
||||||
|
* `_partial: true`. Local failures still reject because there is no safe local
|
||||||
|
* fallback and the gateway's own storage is expected to be authoritative.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Inject, Injectable, Logger } from '@nestjs/common';
|
||||||
|
import { and, eq, federationPeers, isNotNull, type Db } from '@mosaicstack/db';
|
||||||
|
import {
|
||||||
|
SOURCE_LOCAL,
|
||||||
|
tagWithSource,
|
||||||
|
type FederationListResponse,
|
||||||
|
type SourceTag,
|
||||||
|
} from '@mosaicstack/types';
|
||||||
|
import { DB } from '../../database/database.module.js';
|
||||||
|
import { FederationClientService } from './federation-client.service.js';
|
||||||
|
|
||||||
|
export type QuerySource = 'local' | 'all' | `federated:${string}`;
|
||||||
|
|
||||||
|
export type QuerySourceErrorCode = 'INVALID_SOURCE' | 'PEER_NOT_FOUND';
|
||||||
|
|
||||||
|
export interface QuerySourceErrorOptions {
|
||||||
|
code: QuerySourceErrorCode;
|
||||||
|
message: string;
|
||||||
|
source: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class QuerySourceError extends Error {
|
||||||
|
readonly code: QuerySourceErrorCode;
|
||||||
|
readonly source: string;
|
||||||
|
|
||||||
|
constructor(opts: QuerySourceErrorOptions) {
|
||||||
|
super(opts.message);
|
||||||
|
this.name = 'QuerySourceError';
|
||||||
|
this.code = opts.code;
|
||||||
|
this.source = opts.source;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export type LocalListExecutor<T extends object> = () => Promise<FederationListResponse<T> | T[]>;
|
||||||
|
|
||||||
|
export interface QuerySourceListOptions<T extends object> {
|
||||||
|
source: QuerySource;
|
||||||
|
resource: string;
|
||||||
|
request?: Record<string, unknown>;
|
||||||
|
local: LocalListExecutor<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type QuerySourceListResponse<T extends object> = FederationListResponse<T & SourceTag>;
|
||||||
|
|
||||||
|
interface OutboundPeer {
|
||||||
|
id: string;
|
||||||
|
commonName: string;
|
||||||
|
endpointUrl: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TaggedList<T extends object> {
|
||||||
|
items: Array<T & SourceTag>;
|
||||||
|
partial: boolean;
|
||||||
|
truncated: boolean;
|
||||||
|
nextCursor?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class QuerySourceService {
|
||||||
|
private readonly logger = new Logger(QuerySourceService.name);
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
@Inject(DB) private readonly db: Db,
|
||||||
|
@Inject(FederationClientService) private readonly federationClient: FederationClientService,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
async list<T extends object>(
|
||||||
|
options: QuerySourceListOptions<T>,
|
||||||
|
): Promise<QuerySourceListResponse<T>> {
|
||||||
|
const request = options.request ?? {};
|
||||||
|
|
||||||
|
if (options.source === 'local') {
|
||||||
|
const local = await this.runLocal(options.local);
|
||||||
|
return this.toResponse(this.tagList(local, SOURCE_LOCAL));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.source === 'all') {
|
||||||
|
return this.listAll(options.resource, request, options.local);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.source.startsWith('federated:')) {
|
||||||
|
const host = options.source.slice('federated:'.length).trim();
|
||||||
|
if (!host) {
|
||||||
|
throw new QuerySourceError({
|
||||||
|
code: 'INVALID_SOURCE',
|
||||||
|
message: 'Federated source must include a host after federated:',
|
||||||
|
source: options.source,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const peer = await this.findPeerByHost(host, options.source);
|
||||||
|
const remote = await this.federationClient.list<T>(peer.id, options.resource, request);
|
||||||
|
return this.toResponse(this.tagList(remote, peer.commonName));
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new QuerySourceError({
|
||||||
|
code: 'INVALID_SOURCE',
|
||||||
|
message: `Unsupported query source: ${options.source}`,
|
||||||
|
source: options.source,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async listAll<T extends object>(
|
||||||
|
resource: string,
|
||||||
|
request: Record<string, unknown>,
|
||||||
|
local: LocalListExecutor<T>,
|
||||||
|
): Promise<QuerySourceListResponse<T>> {
|
||||||
|
const peers = await this.listActiveOutboundPeers();
|
||||||
|
|
||||||
|
const localPromise = this.runLocal(local).then((response) =>
|
||||||
|
this.tagList(response, SOURCE_LOCAL),
|
||||||
|
);
|
||||||
|
const remotePromises = peers.map(async (peer: OutboundPeer): Promise<TaggedList<T> | null> => {
|
||||||
|
try {
|
||||||
|
const response = await this.federationClient.list<T>(peer.id, resource, request);
|
||||||
|
return this.tagList(response, peer.commonName);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Federated query to peer ${peer.commonName} (${peer.id}) failed; returning partial all-source response: ${
|
||||||
|
error instanceof Error ? error.message : String(error)
|
||||||
|
}`,
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const [localResult, ...remoteResults] = await Promise.all([localPromise, ...remotePromises]);
|
||||||
|
const successfulRemoteResults = remoteResults.filter(
|
||||||
|
(result: TaggedList<T> | null): result is TaggedList<T> => result !== null,
|
||||||
|
);
|
||||||
|
const allResults = [localResult, ...successfulRemoteResults];
|
||||||
|
const peerFailure = successfulRemoteResults.length !== peers.length;
|
||||||
|
|
||||||
|
return this.mergeTaggedLists(allResults, peerFailure);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runLocal<T extends object>(
|
||||||
|
local: LocalListExecutor<T>,
|
||||||
|
): Promise<FederationListResponse<T>> {
|
||||||
|
const response = await local();
|
||||||
|
if (Array.isArray(response)) {
|
||||||
|
return { items: response };
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private tagList<T extends object>(
|
||||||
|
response: FederationListResponse<T>,
|
||||||
|
source: string,
|
||||||
|
): TaggedList<T> {
|
||||||
|
return {
|
||||||
|
items: tagWithSource(response.items, source),
|
||||||
|
partial: response._partial === true,
|
||||||
|
truncated: response._truncated === true || response.nextCursor !== undefined,
|
||||||
|
nextCursor: response.nextCursor,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private mergeTaggedLists<T extends object>(
|
||||||
|
lists: Array<TaggedList<T>>,
|
||||||
|
peerFailure: boolean,
|
||||||
|
): QuerySourceListResponse<T> {
|
||||||
|
const items = lists.flatMap((list: TaggedList<T>) => list.items);
|
||||||
|
const partial =
|
||||||
|
peerFailure ||
|
||||||
|
lists.some((list: TaggedList<T>) => list.partial || list.nextCursor !== undefined);
|
||||||
|
const truncated = lists.some((list: TaggedList<T>) => list.truncated);
|
||||||
|
|
||||||
|
const response: QuerySourceListResponse<T> = { items };
|
||||||
|
if (partial) {
|
||||||
|
response._partial = true;
|
||||||
|
}
|
||||||
|
if (truncated) {
|
||||||
|
response._truncated = true;
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private toResponse<T extends object>(tagged: TaggedList<T>): QuerySourceListResponse<T> {
|
||||||
|
const response: QuerySourceListResponse<T> = {
|
||||||
|
items: tagged.items,
|
||||||
|
};
|
||||||
|
if (tagged.nextCursor !== undefined) {
|
||||||
|
response.nextCursor = tagged.nextCursor;
|
||||||
|
}
|
||||||
|
if (tagged.partial) {
|
||||||
|
response._partial = true;
|
||||||
|
}
|
||||||
|
if (tagged.truncated) {
|
||||||
|
response._truncated = true;
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async findPeerByHost(sourceHost: string, source: string): Promise<OutboundPeer> {
|
||||||
|
const host = normalizeHost(sourceHost);
|
||||||
|
const peers = await this.listActiveOutboundPeers();
|
||||||
|
const peer = peers.find((candidate: OutboundPeer) => {
|
||||||
|
const commonName = normalizeHost(candidate.commonName);
|
||||||
|
const endpointHosts = endpointHostKeys(candidate.endpointUrl).map((endpointHost: string) =>
|
||||||
|
normalizeHost(endpointHost),
|
||||||
|
);
|
||||||
|
return commonName === host || endpointHosts.includes(host);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!peer) {
|
||||||
|
throw new QuerySourceError({
|
||||||
|
code: 'PEER_NOT_FOUND',
|
||||||
|
message: `No active outbound federation peer matches source ${source}`,
|
||||||
|
source,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async listActiveOutboundPeers(): Promise<OutboundPeer[]> {
|
||||||
|
const rows = await this.db
|
||||||
|
.select({
|
||||||
|
id: federationPeers.id,
|
||||||
|
commonName: federationPeers.commonName,
|
||||||
|
endpointUrl: federationPeers.endpointUrl,
|
||||||
|
})
|
||||||
|
.from(federationPeers)
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(federationPeers.state, 'active'),
|
||||||
|
isNotNull(federationPeers.endpointUrl),
|
||||||
|
isNotNull(federationPeers.clientKeyPem),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.orderBy(federationPeers.commonName);
|
||||||
|
|
||||||
|
return rows.filter((row): row is OutboundPeer => typeof row.endpointUrl === 'string');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeHost(host: string): string {
|
||||||
|
return host.trim().toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function endpointHostKeys(endpointUrl: string): string[] {
|
||||||
|
try {
|
||||||
|
const url = new URL(endpointUrl);
|
||||||
|
return Array.from(new Set([url.host, url.hostname].filter((host: string) => host.length > 0)));
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,26 +4,31 @@ import { CaService } from './ca.service.js';
|
|||||||
import { EnrollmentController } from './enrollment.controller.js';
|
import { EnrollmentController } from './enrollment.controller.js';
|
||||||
import { EnrollmentService } from './enrollment.service.js';
|
import { EnrollmentService } from './enrollment.service.js';
|
||||||
import { FederationController } from './federation.controller.js';
|
import { FederationController } from './federation.controller.js';
|
||||||
|
import { CapabilitiesController } from './server/verbs/capabilities.controller.js';
|
||||||
import { GrantsService } from './grants.service.js';
|
import { GrantsService } from './grants.service.js';
|
||||||
import { FederationClientService } from './client/index.js';
|
import { FederationClientService, QuerySourceService } from './client/index.js';
|
||||||
import { FederationAuthGuard } from './server/index.js';
|
import { FederationAuthGuard, FederationScopeService } from './server/index.js';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
controllers: [EnrollmentController, FederationController],
|
controllers: [EnrollmentController, FederationController, CapabilitiesController],
|
||||||
providers: [
|
providers: [
|
||||||
AdminGuard,
|
AdminGuard,
|
||||||
CaService,
|
CaService,
|
||||||
EnrollmentService,
|
EnrollmentService,
|
||||||
GrantsService,
|
GrantsService,
|
||||||
FederationClientService,
|
FederationClientService,
|
||||||
|
QuerySourceService,
|
||||||
FederationAuthGuard,
|
FederationAuthGuard,
|
||||||
|
FederationScopeService,
|
||||||
],
|
],
|
||||||
exports: [
|
exports: [
|
||||||
CaService,
|
CaService,
|
||||||
EnrollmentService,
|
EnrollmentService,
|
||||||
GrantsService,
|
GrantsService,
|
||||||
FederationClientService,
|
FederationClientService,
|
||||||
|
QuerySourceService,
|
||||||
FederationAuthGuard,
|
FederationAuthGuard,
|
||||||
|
FederationScopeService,
|
||||||
],
|
],
|
||||||
})
|
})
|
||||||
export class FederationModule {}
|
export class FederationModule {}
|
||||||
|
|||||||
@@ -0,0 +1,324 @@
|
|||||||
|
/**
|
||||||
|
* Unit tests for FederationScopeService (FED-M3-04).
|
||||||
|
*
|
||||||
|
* Coverage:
|
||||||
|
* - resource allowlist deny
|
||||||
|
* - excluded resource deny
|
||||||
|
* - invalid scope deny
|
||||||
|
* - invalid requested limit deny
|
||||||
|
* - native RBAC deny as subjectUserId
|
||||||
|
* - scope/native filter intersection for personal and team rows
|
||||||
|
* - native RBAC personal deny wins over scope include_personal allow/default
|
||||||
|
* - max_rows_per_query cap
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
import { FederationScopeService, type FederationNativeRbacEvaluator } from '../scope.service.js';
|
||||||
|
import type { FederationContext } from '../federation-context.js';
|
||||||
|
|
||||||
|
const GRANT_ID = 'grant-1';
|
||||||
|
const PEER_ID = 'peer-1';
|
||||||
|
const SUBJECT_USER_ID = 'user-1';
|
||||||
|
|
||||||
|
function makeContext(scope: Record<string, unknown>): FederationContext {
|
||||||
|
return {
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
peerId: PEER_ID,
|
||||||
|
subjectUserId: SUBJECT_USER_ID,
|
||||||
|
scope,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeNativeRbac(
|
||||||
|
result: Awaited<ReturnType<FederationNativeRbacEvaluator['evaluateReadAccess']>>,
|
||||||
|
): FederationNativeRbacEvaluator {
|
||||||
|
return {
|
||||||
|
evaluateReadAccess: vi.fn().mockResolvedValue(result),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('FederationScopeService', () => {
|
||||||
|
let service: FederationScopeService;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
service = new FederationScopeService();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows a granted resource and returns a capped query filter', async () => {
|
||||||
|
const nativeRbac = makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: ['team-1', 'team-2'] },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({
|
||||||
|
resources: ['tasks'],
|
||||||
|
filters: { tasks: { include_teams: ['team-1', 'team-3'], include_personal: true } },
|
||||||
|
max_rows_per_query: 50,
|
||||||
|
}),
|
||||||
|
resource: 'tasks',
|
||||||
|
requestedLimit: 500,
|
||||||
|
nativeRbac,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
allowed: true,
|
||||||
|
filter: {
|
||||||
|
resource: 'tasks',
|
||||||
|
subjectUserId: SUBJECT_USER_ID,
|
||||||
|
includePersonal: true,
|
||||||
|
teamIds: ['team-1'],
|
||||||
|
limit: 50,
|
||||||
|
maxRowsPerQuery: 50,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(nativeRbac.evaluateReadAccess).toHaveBeenCalledWith({
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
peerId: PEER_ID,
|
||||||
|
subjectUserId: SUBJECT_USER_ID,
|
||||||
|
resource: 'tasks',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('defaults absent resource filters to native RBAC personal and team visibility', async () => {
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({ resources: ['notes'], max_rows_per_query: 100 }),
|
||||||
|
resource: 'notes',
|
||||||
|
nativeRbac: makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: ['team-1', 'team-2'] },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: true,
|
||||||
|
filter: {
|
||||||
|
includePersonal: true,
|
||||||
|
teamIds: ['team-1', 'team-2'],
|
||||||
|
limit: 100,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('honors include_personal false even when native RBAC allows personal rows', async () => {
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({
|
||||||
|
resources: ['memory'],
|
||||||
|
filters: { memory: { include_personal: false } },
|
||||||
|
max_rows_per_query: 25,
|
||||||
|
}),
|
||||||
|
resource: 'memory',
|
||||||
|
nativeRbac: makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: [] },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: true,
|
||||||
|
filter: {
|
||||||
|
includePersonal: false,
|
||||||
|
teamIds: [],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not leak personal rows when scope allows personal but native RBAC denies personal', async () => {
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({
|
||||||
|
resources: ['tasks'],
|
||||||
|
filters: { tasks: { include_personal: true } },
|
||||||
|
max_rows_per_query: 25,
|
||||||
|
}),
|
||||||
|
resource: 'tasks',
|
||||||
|
nativeRbac: makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: false, teamIds: ['team-1'] },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: true,
|
||||||
|
filter: {
|
||||||
|
includePersonal: false,
|
||||||
|
teamIds: ['team-1'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not widen native RBAC when scope includes teams the user cannot access', async () => {
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({
|
||||||
|
resources: ['tasks'],
|
||||||
|
filters: { tasks: { include_teams: ['team-2'], include_personal: false } },
|
||||||
|
max_rows_per_query: 25,
|
||||||
|
}),
|
||||||
|
resource: 'tasks',
|
||||||
|
nativeRbac: makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: ['team-1'] },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: true,
|
||||||
|
filter: {
|
||||||
|
includePersonal: false,
|
||||||
|
teamIds: [],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies invalid grant scope before RBAC evaluation', async () => {
|
||||||
|
const nativeRbac = makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: [] },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({ resources: [], max_rows_per_query: 100 }),
|
||||||
|
resource: 'tasks',
|
||||||
|
nativeRbac,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: 'invalid_scope',
|
||||||
|
stage: 'scope_parse',
|
||||||
|
statusCode: 400,
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
subjectUserId: SUBJECT_USER_ID,
|
||||||
|
resource: 'tasks',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies unsupported resource names before RBAC evaluation', async () => {
|
||||||
|
const nativeRbac = makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: [] },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
|
||||||
|
resource: 'unknown_resource',
|
||||||
|
nativeRbac,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: 'invalid_resource',
|
||||||
|
stage: 'resource_allowlist',
|
||||||
|
statusCode: 403,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies resources explicitly present in excluded_resources before allowlist miss', async () => {
|
||||||
|
const nativeRbac = makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: [] },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: ['credentials'],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
}),
|
||||||
|
resource: 'credentials',
|
||||||
|
nativeRbac,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: 'resource_excluded',
|
||||||
|
stage: 'resource_exclusion',
|
||||||
|
statusCode: 403,
|
||||||
|
resource: 'credentials',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies supported resources that are not granted by scope', async () => {
|
||||||
|
const nativeRbac = makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: [] },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
|
||||||
|
resource: 'notes',
|
||||||
|
nativeRbac,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: 'resource_not_granted',
|
||||||
|
stage: 'resource_allowlist',
|
||||||
|
statusCode: 403,
|
||||||
|
resource: 'notes',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies invalid requested row limits before RBAC evaluation', async () => {
|
||||||
|
const nativeRbac = makeNativeRbac({
|
||||||
|
allowed: true,
|
||||||
|
access: { includePersonal: true, teamIds: [] },
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
|
||||||
|
resource: 'tasks',
|
||||||
|
requestedLimit: 0,
|
||||||
|
nativeRbac,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toMatchObject({
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: 'invalid_limit',
|
||||||
|
stage: 'row_cap',
|
||||||
|
statusCode: 400,
|
||||||
|
details: { requestedLimit: 0 },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(nativeRbac.evaluateReadAccess).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('denies when native RBAC rejects subjectUserId access to the resource', async () => {
|
||||||
|
const result = await service.evaluateAccess({
|
||||||
|
context: makeContext({ resources: ['tasks'], max_rows_per_query: 100 }),
|
||||||
|
resource: 'tasks',
|
||||||
|
nativeRbac: makeNativeRbac({
|
||||||
|
allowed: false,
|
||||||
|
reason: 'read:tasks denied',
|
||||||
|
details: { permission: 'tasks:read' },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: 'native_rbac_denied',
|
||||||
|
stage: 'native_rbac',
|
||||||
|
statusCode: 403,
|
||||||
|
message: 'read:tasks denied',
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
peerId: PEER_ID,
|
||||||
|
subjectUserId: SUBJECT_USER_ID,
|
||||||
|
resource: 'tasks',
|
||||||
|
details: { permission: 'tasks:read' },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -10,4 +10,22 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export { FederationAuthGuard } from './federation-auth.guard.js';
|
export { FederationAuthGuard } from './federation-auth.guard.js';
|
||||||
|
export { FederationScopeService } from './scope.service.js';
|
||||||
export type { FederationContext } from './federation-context.js';
|
export type { FederationContext } from './federation-context.js';
|
||||||
|
export type {
|
||||||
|
FederationNativeRbacAccess,
|
||||||
|
FederationNativeRbacAllowedResult,
|
||||||
|
FederationNativeRbacDeniedResult,
|
||||||
|
FederationNativeRbacEvaluator,
|
||||||
|
FederationNativeRbacRequest,
|
||||||
|
FederationNativeRbacResult,
|
||||||
|
FederationScopeAllowedResult,
|
||||||
|
FederationScopeDeniedResult,
|
||||||
|
FederationScopeDenyCode,
|
||||||
|
FederationScopeDenyDetails,
|
||||||
|
FederationScopeDenyReason,
|
||||||
|
FederationScopeDenyStage,
|
||||||
|
FederationScopeEvaluationInput,
|
||||||
|
FederationScopeEvaluationResult,
|
||||||
|
FederationScopeQueryFilter,
|
||||||
|
} from './scope.service.js';
|
||||||
|
|||||||
272
apps/gateway/src/federation/server/scope.service.ts
Normal file
272
apps/gateway/src/federation/server/scope.service.ts
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
/**
|
||||||
|
* FederationScopeService — M3 server-side scope enforcement pipeline.
|
||||||
|
*
|
||||||
|
* Pure trust-boundary service: it validates the grant scope, asks an injected
|
||||||
|
* native RBAC evaluator what the subject user can read locally, intersects that
|
||||||
|
* answer with the federation scope filters, and returns a query filter for the
|
||||||
|
* verb controllers. The service performs no DB calls directly.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Injectable } from '@nestjs/common';
|
||||||
|
import {
|
||||||
|
FEDERATION_RESOURCE_VALUES,
|
||||||
|
type FederationResource,
|
||||||
|
FederationScopeError,
|
||||||
|
parseFederationScope,
|
||||||
|
} from '../scope-schema.js';
|
||||||
|
import type { FederationContext } from './federation-context.js';
|
||||||
|
|
||||||
|
const federationResourceSet: ReadonlySet<string> = new Set<string>(FEDERATION_RESOURCE_VALUES);
|
||||||
|
|
||||||
|
export type FederationScopeDenyStage =
|
||||||
|
| 'scope_parse'
|
||||||
|
| 'resource_allowlist'
|
||||||
|
| 'resource_exclusion'
|
||||||
|
| 'native_rbac'
|
||||||
|
| 'row_cap';
|
||||||
|
|
||||||
|
export type FederationScopeDenyCode =
|
||||||
|
| 'invalid_scope'
|
||||||
|
| 'invalid_resource'
|
||||||
|
| 'resource_not_granted'
|
||||||
|
| 'resource_excluded'
|
||||||
|
| 'native_rbac_denied'
|
||||||
|
| 'invalid_limit';
|
||||||
|
|
||||||
|
export type FederationScopeDenyStatus = 400 | 403;
|
||||||
|
|
||||||
|
export interface FederationScopeDenyDetails {
|
||||||
|
readonly [key: string]: string | number | boolean | readonly string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationScopeDenyReason {
|
||||||
|
readonly code: FederationScopeDenyCode;
|
||||||
|
readonly stage: FederationScopeDenyStage;
|
||||||
|
readonly statusCode: FederationScopeDenyStatus;
|
||||||
|
readonly message: string;
|
||||||
|
readonly grantId: string;
|
||||||
|
readonly peerId: string;
|
||||||
|
readonly subjectUserId: string;
|
||||||
|
readonly resource: string;
|
||||||
|
readonly details?: FederationScopeDenyDetails;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationNativeRbacRequest {
|
||||||
|
readonly grantId: string;
|
||||||
|
readonly peerId: string;
|
||||||
|
readonly subjectUserId: string;
|
||||||
|
readonly resource: FederationResource;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationNativeRbacAccess {
|
||||||
|
/** Whether this user may read personal rows for this resource. */
|
||||||
|
readonly includePersonal: boolean;
|
||||||
|
|
||||||
|
/** Team IDs this user may read for this resource under native RBAC. */
|
||||||
|
readonly teamIds: readonly string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationNativeRbacAllowedResult {
|
||||||
|
readonly allowed: true;
|
||||||
|
readonly access: FederationNativeRbacAccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationNativeRbacDeniedResult {
|
||||||
|
readonly allowed: false;
|
||||||
|
readonly reason?: string;
|
||||||
|
readonly details?: FederationScopeDenyDetails;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type FederationNativeRbacResult =
|
||||||
|
| FederationNativeRbacAllowedResult
|
||||||
|
| FederationNativeRbacDeniedResult;
|
||||||
|
|
||||||
|
export interface FederationNativeRbacEvaluator {
|
||||||
|
evaluateReadAccess(request: FederationNativeRbacRequest): Promise<FederationNativeRbacResult>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationScopeEvaluationInput {
|
||||||
|
readonly context: FederationContext;
|
||||||
|
readonly resource: string;
|
||||||
|
readonly requestedLimit?: number;
|
||||||
|
readonly nativeRbac: FederationNativeRbacEvaluator;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationScopeQueryFilter {
|
||||||
|
readonly resource: FederationResource;
|
||||||
|
readonly subjectUserId: string;
|
||||||
|
readonly includePersonal: boolean;
|
||||||
|
readonly teamIds: readonly string[];
|
||||||
|
readonly limit: number;
|
||||||
|
readonly maxRowsPerQuery: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationScopeAllowedResult {
|
||||||
|
readonly allowed: true;
|
||||||
|
readonly filter: FederationScopeQueryFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FederationScopeDeniedResult {
|
||||||
|
readonly allowed: false;
|
||||||
|
readonly deny: FederationScopeDenyReason;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type FederationScopeEvaluationResult =
|
||||||
|
| FederationScopeAllowedResult
|
||||||
|
| FederationScopeDeniedResult;
|
||||||
|
|
||||||
|
function isFederationResource(resource: string): resource is FederationResource {
|
||||||
|
return federationResourceSet.has(resource);
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueStrings(values: readonly string[]): readonly string[] {
|
||||||
|
return Array.from(new Set<string>(values));
|
||||||
|
}
|
||||||
|
|
||||||
|
function intersectTeamIds(
|
||||||
|
nativeTeamIds: readonly string[],
|
||||||
|
scopedTeamIds: readonly string[] | undefined,
|
||||||
|
): readonly string[] {
|
||||||
|
const uniqueNativeTeamIds = uniqueStrings(nativeTeamIds);
|
||||||
|
|
||||||
|
if (scopedTeamIds === undefined) {
|
||||||
|
return uniqueNativeTeamIds;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nativeSet = new Set<string>(uniqueNativeTeamIds);
|
||||||
|
return uniqueStrings(scopedTeamIds).filter((teamId: string): boolean => nativeSet.has(teamId));
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeDenyReason(params: {
|
||||||
|
readonly code: FederationScopeDenyCode;
|
||||||
|
readonly stage: FederationScopeDenyStage;
|
||||||
|
readonly statusCode?: FederationScopeDenyStatus;
|
||||||
|
readonly message: string;
|
||||||
|
readonly context: FederationContext;
|
||||||
|
readonly resource: string;
|
||||||
|
readonly details?: FederationScopeDenyDetails;
|
||||||
|
}): FederationScopeDeniedResult {
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
deny: {
|
||||||
|
code: params.code,
|
||||||
|
stage: params.stage,
|
||||||
|
statusCode: params.statusCode ?? 403,
|
||||||
|
message: params.message,
|
||||||
|
grantId: params.context.grantId,
|
||||||
|
peerId: params.context.peerId,
|
||||||
|
subjectUserId: params.context.subjectUserId,
|
||||||
|
resource: params.resource,
|
||||||
|
...(params.details !== undefined ? { details: params.details } : {}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class FederationScopeService {
|
||||||
|
async evaluateAccess(
|
||||||
|
input: FederationScopeEvaluationInput,
|
||||||
|
): Promise<FederationScopeEvaluationResult> {
|
||||||
|
const { context, resource, requestedLimit, nativeRbac } = input;
|
||||||
|
|
||||||
|
let scope: ReturnType<typeof parseFederationScope>;
|
||||||
|
try {
|
||||||
|
scope = parseFederationScope(context.scope);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const message =
|
||||||
|
error instanceof FederationScopeError
|
||||||
|
? 'Federation grant scope is invalid'
|
||||||
|
: 'Federation grant scope could not be parsed';
|
||||||
|
const details = error instanceof Error ? { reason: error.message } : undefined;
|
||||||
|
return makeDenyReason({
|
||||||
|
code: 'invalid_scope',
|
||||||
|
stage: 'scope_parse',
|
||||||
|
statusCode: 400,
|
||||||
|
message,
|
||||||
|
context,
|
||||||
|
resource,
|
||||||
|
...(details !== undefined ? { details } : {}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isFederationResource(resource)) {
|
||||||
|
return makeDenyReason({
|
||||||
|
code: 'invalid_resource',
|
||||||
|
stage: 'resource_allowlist',
|
||||||
|
message: 'Requested federation resource is not supported',
|
||||||
|
context,
|
||||||
|
resource,
|
||||||
|
details: { supportedResources: FEDERATION_RESOURCE_VALUES },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scope.excluded_resources.includes(resource)) {
|
||||||
|
return makeDenyReason({
|
||||||
|
code: 'resource_excluded',
|
||||||
|
stage: 'resource_exclusion',
|
||||||
|
message: 'Requested federation resource is explicitly excluded by grant scope',
|
||||||
|
context,
|
||||||
|
resource,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!scope.resources.includes(resource)) {
|
||||||
|
return makeDenyReason({
|
||||||
|
code: 'resource_not_granted',
|
||||||
|
stage: 'resource_allowlist',
|
||||||
|
message: 'Requested federation resource is not granted by scope',
|
||||||
|
context,
|
||||||
|
resource,
|
||||||
|
details: { grantedResources: scope.resources },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (requestedLimit !== undefined && (!Number.isInteger(requestedLimit) || requestedLimit < 1)) {
|
||||||
|
return makeDenyReason({
|
||||||
|
code: 'invalid_limit',
|
||||||
|
stage: 'row_cap',
|
||||||
|
statusCode: 400,
|
||||||
|
message: 'Requested row limit must be a positive integer',
|
||||||
|
context,
|
||||||
|
resource,
|
||||||
|
details: { requestedLimit },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const nativeResult = await nativeRbac.evaluateReadAccess({
|
||||||
|
grantId: context.grantId,
|
||||||
|
peerId: context.peerId,
|
||||||
|
subjectUserId: context.subjectUserId,
|
||||||
|
resource,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!nativeResult.allowed) {
|
||||||
|
return makeDenyReason({
|
||||||
|
code: 'native_rbac_denied',
|
||||||
|
stage: 'native_rbac',
|
||||||
|
message: nativeResult.reason ?? 'Subject user is not allowed to read this resource',
|
||||||
|
context,
|
||||||
|
resource,
|
||||||
|
...(nativeResult.details !== undefined ? { details: nativeResult.details } : {}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const scopeFilter = scope.filters?.[resource];
|
||||||
|
const includePersonal =
|
||||||
|
Boolean(scopeFilter?.include_personal ?? true) && nativeResult.access.includePersonal;
|
||||||
|
const teamIds = intersectTeamIds(nativeResult.access.teamIds, scopeFilter?.include_teams);
|
||||||
|
const limit = Math.min(requestedLimit ?? scope.max_rows_per_query, scope.max_rows_per_query);
|
||||||
|
|
||||||
|
return {
|
||||||
|
allowed: true,
|
||||||
|
filter: {
|
||||||
|
resource,
|
||||||
|
subjectUserId: context.subjectUserId,
|
||||||
|
includePersonal,
|
||||||
|
teamIds,
|
||||||
|
limit,
|
||||||
|
maxRowsPerQuery: scope.max_rows_per_query,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
import 'reflect-metadata';
|
||||||
|
import { RequestMethod } from '@nestjs/common';
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import type { FastifyRequest } from 'fastify';
|
||||||
|
import { FederationCapabilitiesResponseSchema, FEDERATION_VERBS } from '@mosaicstack/types';
|
||||||
|
import { FederationScopeError } from '../../../scope-schema.js';
|
||||||
|
import { FederationAuthGuard } from '../../federation-auth.guard.js';
|
||||||
|
import { CapabilitiesController } from '../capabilities.controller.js';
|
||||||
|
|
||||||
|
const VALID_SCOPE = {
|
||||||
|
resources: ['tasks', 'notes'],
|
||||||
|
excluded_resources: ['credentials'],
|
||||||
|
max_rows_per_query: 250,
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
const DEFAULTED_SCOPE = {
|
||||||
|
resources: ['memory'],
|
||||||
|
max_rows_per_query: 10,
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
function makeRequest(scope: Record<string, unknown>): FastifyRequest {
|
||||||
|
return {
|
||||||
|
federationContext: {
|
||||||
|
grantId: 'grant-1',
|
||||||
|
peerId: 'peer-1',
|
||||||
|
subjectUserId: 'user-1',
|
||||||
|
scope,
|
||||||
|
},
|
||||||
|
} as FastifyRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('CapabilitiesController', () => {
|
||||||
|
it('declares GET /api/federation/v1/capabilities', () => {
|
||||||
|
expect(Reflect.getMetadata('path', CapabilitiesController)).toBe(
|
||||||
|
'api/federation/v1/capabilities',
|
||||||
|
);
|
||||||
|
expect(Reflect.getMetadata('path', CapabilitiesController.prototype.getCapabilities)).toBe('/');
|
||||||
|
expect(Reflect.getMetadata('method', CapabilitiesController.prototype.getCapabilities)).toBe(
|
||||||
|
RequestMethod.GET,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is protected only by FederationAuthGuard', () => {
|
||||||
|
const guards = Reflect.getMetadata('__guards__', CapabilitiesController) as unknown[];
|
||||||
|
|
||||||
|
expect(guards).toEqual([FederationAuthGuard]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns resources, excluded resources, max rows, and M3 supported verbs from the active grant scope', () => {
|
||||||
|
const controller = new CapabilitiesController();
|
||||||
|
|
||||||
|
const response = controller.getCapabilities(makeRequest(VALID_SCOPE));
|
||||||
|
|
||||||
|
expect(response).toEqual({
|
||||||
|
resources: ['tasks', 'notes'],
|
||||||
|
excluded_resources: ['credentials'],
|
||||||
|
max_rows_per_query: 250,
|
||||||
|
supported_verbs: [...FEDERATION_VERBS],
|
||||||
|
});
|
||||||
|
expect(FederationCapabilitiesResponseSchema.safeParse(response).success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies scope defaults without RBAC or resource filtering', () => {
|
||||||
|
const controller = new CapabilitiesController();
|
||||||
|
|
||||||
|
const response = controller.getCapabilities(makeRequest(DEFAULTED_SCOPE));
|
||||||
|
|
||||||
|
expect(response).toEqual({
|
||||||
|
resources: ['memory'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 10,
|
||||||
|
supported_verbs: ['list', 'get', 'capabilities'],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid scope state instead of returning an invalid capabilities contract', () => {
|
||||||
|
const controller = new CapabilitiesController();
|
||||||
|
|
||||||
|
expect(() =>
|
||||||
|
controller.getCapabilities(
|
||||||
|
makeRequest({
|
||||||
|
resources: [],
|
||||||
|
max_rows_per_query: 0,
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
).toThrow(FederationScopeError);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
/**
|
||||||
|
* Federation capabilities verb (FED-M3-07).
|
||||||
|
*
|
||||||
|
* Returns the read-only capability envelope for the active grant attached by
|
||||||
|
* FederationAuthGuard. This endpoint intentionally does not invoke native RBAC
|
||||||
|
* or ScopeService: an active grant is enough to ask what the grant allows.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Controller, Get, Req, UseGuards } from '@nestjs/common';
|
||||||
|
import type { FastifyRequest } from 'fastify';
|
||||||
|
import {
|
||||||
|
FEDERATION_VERBS,
|
||||||
|
type FederationCapabilitiesResponse,
|
||||||
|
type FederationVerb,
|
||||||
|
} from '@mosaicstack/types';
|
||||||
|
import { parseFederationScope } from '../../scope-schema.js';
|
||||||
|
import { FederationAuthGuard } from '../federation-auth.guard.js';
|
||||||
|
import '../federation-context.js';
|
||||||
|
|
||||||
|
@Controller('api/federation/v1/capabilities')
|
||||||
|
@UseGuards(FederationAuthGuard)
|
||||||
|
export class CapabilitiesController {
|
||||||
|
@Get()
|
||||||
|
getCapabilities(@Req() request: FastifyRequest): FederationCapabilitiesResponse {
|
||||||
|
if (!request.federationContext) {
|
||||||
|
throw new Error('Federation context missing after auth guard');
|
||||||
|
}
|
||||||
|
|
||||||
|
const scope = parseFederationScope(request.federationContext.scope);
|
||||||
|
|
||||||
|
return {
|
||||||
|
resources: [...scope.resources],
|
||||||
|
excluded_resources: [...scope.excluded_resources],
|
||||||
|
max_rows_per_query: scope.max_rows_per_query,
|
||||||
|
supported_verbs: [...FEDERATION_VERBS] satisfies FederationVerb[],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -91,22 +91,22 @@ Goal: Two federated gateways exchange real data over mTLS. Inbound requests pass
|
|||||||
>
|
>
|
||||||
> **Tracking issue:** #462.
|
> **Tracking issue:** #462.
|
||||||
|
|
||||||
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | ---------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| FED-M3-01 | done | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462 | sonnet | feat/federation-m3-types | — | 4K | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS. |
|
| FED-M3-01 | done | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462 | sonnet | feat/federation-m3-types | — | 4K | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS. |
|
||||||
| FED-M3-02 | done | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use. | #462 | sonnet | feat/federation-m3-harness | DEPLOY-04 (soft) | 8K | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+. |
|
| FED-M3-02 | done | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use. | #462 | sonnet | feat/federation-m3-harness | DEPLOY-04 (soft) | 8K | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+. |
|
||||||
| FED-M3-03 | done | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request. | #462 | sonnet | feat/federation-m3-auth-guard | M3-01 | 8K | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant. |
|
| FED-M3-03 | done | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request. | #462 | sonnet | feat/federation-m3-auth-guard | M3-01 | 8K | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant. |
|
||||||
| FED-M3-04 | in-progress | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected. | #462 | sonnet | feat/federation-m3-scope-service | M3-01 | 10K | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
|
| FED-M3-04 | in-progress | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected. | #462 | sonnet | feat/federation-m3-scope-service | M3-01 | 10K | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
|
||||||
| FED-M3-05 | not-started | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param. | #462 | sonnet | feat/federation-m3-verb-list | M3-03, M3-04 | 6K | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4. |
|
| FED-M3-05 | in-progress | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param. | #462 | sonnet | feat/federation-m3-verb-list | M3-03, M3-04 | 6K | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4. |
|
||||||
| FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way. | #462 | sonnet | feat/federation-m3-verb-get | M3-03, M3-04 | 6K | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns. |
|
| FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way. | #462 | sonnet | feat/federation-m3-verb-get | M3-03, M3-04 | 6K | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns. |
|
||||||
| FED-M3-07 | in-progress | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval. | #462 | sonnet | feat/federation-m3-verb-capabilities | M3-03 | 4K | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end. |
|
| FED-M3-07 | done | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval. | #462 | sonnet | feat/federation-m3-verb-capabilities | M3-03 | 4K | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end. |
|
||||||
| FED-M3-08 | done | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`. | #462 | sonnet | feat/federation-m3-client | M3-01 | 8K | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic. |
|
| FED-M3-08 | done | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`. | #462 | sonnet | feat/federation-m3-client | M3-01 | 8K | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic. |
|
||||||
| FED-M3-09 | in-progress | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`. | #462 | sonnet | feat/federation-m3-query-source | M3-08 | 8K | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top. |
|
| FED-M3-09 | done | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`. | #462 | sonnet | feat/federation-m3-query-source | M3-08 | 8K | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top. |
|
||||||
| FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim). | #462 | sonnet | feat/federation-m3-integration | M3-05, M3-06 | 8K | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required. |
|
| FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim). | #462 | sonnet | feat/federation-m3-integration | M3-05, M3-06 | 8K | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required. |
|
||||||
| FED-M3-11 | not-started | E2E tests for MILESTONES.md M3 acceptance #1, #2, #3, #4, #5, #8, #9, #10 (8 cases). Uses harness from M3-02; two real gateways, real Step-CA, real mTLS. Each test asserts both happy-path response and audit/no-persist invariants. | #462 | sonnet | feat/federation-m3-e2e | M3-02, M3-09 | 12K | Largest single task. Each acceptance gets its own `it(...)` for clear failure attribution. |
|
| FED-M3-11 | not-started | E2E tests for MILESTONES.md M3 acceptance #1, #2, #3, #4, #5, #8, #9, #10 (8 cases). Uses harness from M3-02; two real gateways, real Step-CA, real mTLS. Each test asserts both happy-path response and audit/no-persist invariants. | #462 | sonnet | feat/federation-m3-e2e | M3-02, M3-04, M3-05, M3-06, M3-09 | 12K | Largest single task. Each acceptance gets its own `it(...)` for clear failure attribution. |
|
||||||
| FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny. | #462 | sonnet | feat/federation-m3-security-review | M3-11 | 10K | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage. |
|
| FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny. | #462 | sonnet | feat/federation-m3-security-review | M3-11 | 10K | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage. |
|
||||||
| FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred. | #462 | haiku | feat/federation-m3-docs | M3-12 | 5K | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths. |
|
| FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred. | #462 | haiku | feat/federation-m3-docs | M3-12 | 5K | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths. |
|
||||||
| FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins. | #462 | sonnet | chore/federation-m3-close | M3-13 | 3K | Same close pattern as M1-12 / M2-13. |
|
| FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins. | #462 | sonnet | chore/federation-m3-close | M3-13 | 3K | Same close pattern as M1-12 / M2-13. |
|
||||||
|
|
||||||
**M3 estimate:** ~100K tokens (vs MILESTONES.md 40K — same per-task breakdown pattern as M1/M2: tests, review, and docs split out from implementation cost). Largest milestone in the federation mission.
|
**M3 estimate:** ~100K tokens (vs MILESTONES.md 40K — same per-task breakdown pattern as M1/M2: tests, review, and docs split out from implementation cost). Largest milestone in the federation mission.
|
||||||
|
|
||||||
@@ -120,6 +120,8 @@ Goal: Two federated gateways exchange real data over mTLS. Inbound requests pass
|
|||||||
|
|
||||||
**Backlog sync — 2026-06-24 (orchestrator):** Status reconciled against `origin/main` (release 0.0.48). Landed on main: **FED-M3-01** (DTOs, PR #506), **FED-M3-02** (harness scaffold, PR #505), **FED-M3-03** (mTLS auth-guard, PR #509 — CRIT-1/2 + HIGH-1..4 remediated in-PR), **FED-M3-08** (outbound mTLS client, PR #508). With M3-01/03/08 merged, three cards became dependency-clear and were dispatched to the idle coder lane: **FED-M3-04** scope.service → coder0 (`feat/federation-m3-scope-service`); **FED-M3-09** query-source + **FED-M3-07** capabilities verb → coder1 (`feat/federation-m3-query-source` first). Reviewer warmed for the M3 trust-boundary PRs. Remaining blocked-by-DAG: M3-05/06 (await M3-04), M3-10 (await M3-05/06), M3-11 (await M3-09), M3-12→14 (tail). Deploy chain (DEPLOY-IMG-FIX → 03/04) still independent of M3 code — harness local docker-compose fallback covers M3-11.
|
**Backlog sync — 2026-06-24 (orchestrator):** Status reconciled against `origin/main` (release 0.0.48). Landed on main: **FED-M3-01** (DTOs, PR #506), **FED-M3-02** (harness scaffold, PR #505), **FED-M3-03** (mTLS auth-guard, PR #509 — CRIT-1/2 + HIGH-1..4 remediated in-PR), **FED-M3-08** (outbound mTLS client, PR #508). With M3-01/03/08 merged, three cards became dependency-clear and were dispatched to the idle coder lane: **FED-M3-04** scope.service → coder0 (`feat/federation-m3-scope-service`); **FED-M3-09** query-source + **FED-M3-07** capabilities verb → coder1 (`feat/federation-m3-query-source` first). Reviewer warmed for the M3 trust-boundary PRs. Remaining blocked-by-DAG: M3-05/06 (await M3-04), M3-10 (await M3-05/06), M3-11 (await M3-09), M3-12→14 (tail). Deploy chain (DEPLOY-IMG-FIX → 03/04) still independent of M3 code — harness local docker-compose fallback covers M3-11.
|
||||||
|
|
||||||
|
**Backlog sync #2 — 2026-06-24 (orchestrator):** **FED-M3-09** (query-source) merged via PR #673 and **FED-M3-07** (capabilities) merged via PR #674 — both squash-merged on independent agent review-of-record + green CI (formal Gitea approve unavailable under the shared service account; merge is not gated by the self-approve guard). **FED-M3-05** (list verb) dispatched to coder1 (based on the M3-04 branch, rebase onto main once #672 lands). **FED-M3-04** (scope.service, PR #672) is in review-changes (one include_personal no-leak test outstanding). **DAG fix:** corrected `FED-M3-11` depends_on from `M3-02, M3-09` → `M3-02, M3-04, M3-05, M3-06, M3-09` — the E2E acceptance cases (#1–#5, #8–#10) exercise list/get over mTLS, so the server verbs + scope service are hard prerequisites; the original edge set omitted them and caused a premature M3-11 dispatch. Note: M3 read-path invariant for M3-11 is **no-persist + existing enrollment audit only** — read-verb audit-log writes are deferred to M4 (see M3-05/06 notes), so M3-11 must not assert read-audit-log entries.
|
||||||
|
|
||||||
## Milestone 4 — search + audit + rate limit (FED-M4)
|
## Milestone 4 — search + audit + rate limit (FED-M4)
|
||||||
|
|
||||||
_Deferred. Issue #463._
|
_Deferred. Issue #463._
|
||||||
|
|||||||
60
docs/scratchpads/462-fed-m3-04-scope-service.md
Normal file
60
docs/scratchpads/462-fed-m3-04-scope-service.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# Scratchpad — FED-M3-04 Scope Service
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Implement `apps/gateway/src/federation/server/scope.service.ts` for the M3 inbound federation scope-enforcement pipeline.
|
||||||
|
|
||||||
|
## Scope / Constraints
|
||||||
|
|
||||||
|
- Task: FED-M3-04, issue #462.
|
||||||
|
- Branch: `feat/federation-m3-scope-service` from `origin/main` @ 0.0.48.
|
||||||
|
- Pure service: no direct DB access; native RBAC/data access is injected per evaluation call.
|
||||||
|
- Reuse `parseFederationScope` from M2-03.
|
||||||
|
- Workers do not edit `docs/federation/TASKS.md` per repo AGENTS.md.
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
|
||||||
|
1. Resource allowlist and `excluded_resources` enforced.
|
||||||
|
2. Native RBAC evaluated as `subjectUserId` through an injected evaluator.
|
||||||
|
3. Scope filter intersection supports `include_teams` and `include_personal` without widening native RBAC.
|
||||||
|
4. `max_rows_per_query` caps requested limits.
|
||||||
|
5. Service returns `{ allowed: true, filter }` or a structured deny reason usable by M4 audit.
|
||||||
|
6. Unit tests cover every deny path.
|
||||||
|
|
||||||
|
## Plan
|
||||||
|
|
||||||
|
1. Inspect existing federation scope/schema/auth guard contracts.
|
||||||
|
2. Add pure `FederationScopeService` plus typed result/filter/deny interfaces.
|
||||||
|
3. Add focused unit tests for happy paths, filter intersection, row cap, and deny paths.
|
||||||
|
4. Export/register service for future verb controllers.
|
||||||
|
5. Run situational tests, baseline gates, code review, then PR.
|
||||||
|
|
||||||
|
## Budget
|
||||||
|
|
||||||
|
- Provided model tier: sonnet.
|
||||||
|
- Estimate from task row: 10K tokens.
|
||||||
|
- Working cap assumption: keep implementation focused to FED-M3-04 surfaces only.
|
||||||
|
|
||||||
|
## Progress
|
||||||
|
|
||||||
|
- Intake complete; dirty base worktree avoided by creating isolated worktree at `/home/jarvis/src/mosaic-mono-v1-fed-m3-04`.
|
||||||
|
- Project PRD and federation task spec reviewed.
|
||||||
|
- Added `FederationScopeService` with structured allow/deny result types and injected native RBAC evaluator contract.
|
||||||
|
- Added unit coverage for happy path, row cap, filter intersection, and every deny path.
|
||||||
|
- Exported/registered the service for upcoming M3 verb controllers.
|
||||||
|
|
||||||
|
## Verification Evidence
|
||||||
|
|
||||||
|
- `pnpm --filter @mosaicstack/gateway test -- src/federation/server/__tests__/scope.service.spec.ts` — pass (10 tests before review update; 11 tests after adding include_personal no-leak coverage).
|
||||||
|
- `pnpm build` — pass (23 successful tasks).
|
||||||
|
- `pnpm typecheck` — pass (41 successful tasks; re-run after review update).
|
||||||
|
- `pnpm lint` — pass (23 successful tasks; re-run after review update).
|
||||||
|
- `pnpm format:check` — pass (re-run after review update).
|
||||||
|
- `pnpm test` — pass after starting local `postgres`/`valkey` and running `pnpm --filter @mosaicstack/db db:push` for the DB-backed cross-user isolation suite (41 successful tasks; gateway 477 passed / 11 skipped).
|
||||||
|
- Code review: `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — approve, 0 findings.
|
||||||
|
- Security review: `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — risk none, 0 findings.
|
||||||
|
|
||||||
|
## Risks / Blockers
|
||||||
|
|
||||||
|
- Issue #462 is already closed in provider output; likely milestone tracking mismatch. Will still reference #462 in PR body unless orchestrator redirects.
|
||||||
|
- Local full-test setup required `docker compose up -d postgres valkey` + `db:push`; containers were stopped with `docker compose down` after verification.
|
||||||
25
docs/scratchpads/672-fleet-personas-timeout.md
Normal file
25
docs/scratchpads/672-fleet-personas-timeout.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Scratchpad — fleet-personas spec timeout
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Raise the `@mosaicstack/mosaic` Vitest timeout to 30s at config level so filesystem-backed fleet drift-guard specs (`fleet-personas`, `fleet-profiles`, and siblings) stop false-reding under contended CI.
|
||||||
|
|
||||||
|
## Plan
|
||||||
|
|
||||||
|
1. Move timeout policy into `packages/mosaic/vitest.config.ts` with `testTimeout: 30_000`.
|
||||||
|
2. Remove the narrower `fleet-personas.spec.ts` local override so PR #677 fixes the suite class, not one file.
|
||||||
|
3. Run targeted fleet specs plus typecheck/lint/format gates.
|
||||||
|
4. Commit, queue guard, push, PR update.
|
||||||
|
|
||||||
|
## Evidence
|
||||||
|
|
||||||
|
- `pnpm --filter @mosaicstack/mosaic test -- src/commands/fleet-personas.spec.ts` — pass (8 tests; initial narrow fix).
|
||||||
|
- `pnpm typecheck` — pass (41 tasks; initial narrow fix).
|
||||||
|
- `pnpm lint` — pass (23 tasks; initial narrow fix).
|
||||||
|
- `pnpm format:check` — pass after formatting this scratchpad (initial narrow fix).
|
||||||
|
- Package-wide timeout follow-up:
|
||||||
|
- `pnpm --filter @mosaicstack/mosaic test -- src/commands/fleet-personas.spec.ts src/commands/fleet-profiles.spec.ts` — pass (24 tests).
|
||||||
|
- `pnpm --filter @mosaicstack/mosaic test` — pass (44 files / 618 tests).
|
||||||
|
- `pnpm typecheck` — pass (41 tasks).
|
||||||
|
- `pnpm lint` — pass (23 tasks).
|
||||||
|
- `pnpm format:check` — pass.
|
||||||
65
docs/scratchpads/FED-M3-07-capabilities.md
Normal file
65
docs/scratchpads/FED-M3-07-capabilities.md
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# FED-M3-07 — Capabilities Verb Scratchpad
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Implement `GET /api/federation/v1/capabilities` in `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Add read-only capabilities controller under federation server verbs.
|
||||||
|
- Use `FederationAuthGuard` only; active grant is sufficient and no native RBAC/scope-service eval runs.
|
||||||
|
- Response shape: `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope.
|
||||||
|
- Register controller in `FederationModule`.
|
||||||
|
- Unit-test happy path, defaults, no-context guard seam, and invalid scope handling.
|
||||||
|
|
||||||
|
## Constraints / assumptions
|
||||||
|
|
||||||
|
- Issue: #462.
|
||||||
|
- Branch: `feat/federation-m3-verb-capabilities` from `origin/main` (`3eeed04e`).
|
||||||
|
- Depends on M3-03 auth guard; guard attaches `request.federationContext.scope` after active-grant validation.
|
||||||
|
- ASSUMPTION: `supported_verbs` is the M3 verb set from `@mosaicstack/types` (`list`, `get`, `capabilities`).
|
||||||
|
- ASSUMPTION: `filters`/`rate_limit` are intentionally omitted for FED-M3-07 because the card’s response shape lists only the four required fields.
|
||||||
|
- Budget: no explicit hard cap from orchestrator; working cap ~4K-8K tokens for card implementation + tests + PR cycle.
|
||||||
|
|
||||||
|
## Plan
|
||||||
|
|
||||||
|
1. Write controller unit tests first.
|
||||||
|
2. Implement controller and module registration.
|
||||||
|
3. Run scoped tests + typecheck/lint/format.
|
||||||
|
4. Run Codex code/security review and remediate.
|
||||||
|
5. Commit, queue guard, push, PR via wrapper.
|
||||||
|
|
||||||
|
## Progress
|
||||||
|
|
||||||
|
- 2026-06-24: Intake complete; fresh worktree created from origin/main.
|
||||||
|
- 2026-06-24: Added `CapabilitiesController`, registered it in `FederationModule`, and added 5 unit tests.
|
||||||
|
- 2026-06-24: Code/security reviews passed with no findings.
|
||||||
|
|
||||||
|
## Tests run
|
||||||
|
|
||||||
|
- `pnpm --filter @mosaicstack/gateway test -- capabilities.controller.spec.ts` — PASS (5 tests).
|
||||||
|
- `pnpm --filter @mosaicstack/gateway typecheck` — PASS.
|
||||||
|
- `pnpm --filter @mosaicstack/gateway lint` — PASS.
|
||||||
|
- `pnpm format:check` — PASS.
|
||||||
|
- `pnpm typecheck` — PASS (41/41 turbo tasks).
|
||||||
|
- `pnpm lint` — PASS (23/23 turbo tasks).
|
||||||
|
- `pnpm test` — FAIL in pre-existing/live-DB integration suite: `apps/gateway/src/__tests__/cross-user-isolation.test.ts` cleanup hit PostgreSQL connection/schema state for the `messages` table. Changed capabilities tests passed; failure is outside FED-M3-07 surface. No `fleet-personas.spec` flake encountered.
|
||||||
|
|
||||||
|
## Review evidence
|
||||||
|
|
||||||
|
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — PASS/approve, no findings.
|
||||||
|
- `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — PASS, risk level none, no findings.
|
||||||
|
|
||||||
|
## Risks / blockers
|
||||||
|
|
||||||
|
- Full repo `pnpm test` may hit known `fleet-personas.spec` flake per orchestrator; ignore that specific flake if encountered.
|
||||||
|
- Previous card saw local DB schema issue in `cross-user-isolation.test.ts`; scoped capabilities tests should be authoritative for this surface.
|
||||||
|
|
||||||
|
## Acceptance evidence mapping
|
||||||
|
|
||||||
|
| Acceptance criterion | Evidence |
|
||||||
|
| -------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| GET `/api/federation/v1/capabilities` exists | Route metadata test in `capabilities.controller.spec.ts`; scoped test PASS |
|
||||||
|
| Uses active-grant auth guard and no RBAC eval | Guard metadata test confirms only `FederationAuthGuard`; controller has no service injections/RBAC calls; scoped test PASS |
|
||||||
|
| Response enumerates resources/excluded/max rows/supported verbs from scope | Happy-path/default scope tests + response schema parse; scoped test PASS |
|
||||||
|
| Read-only/no persistence side effects | Controller only parses request `federationContext.scope` and returns a DTO; no DB/service dependency; code review PASS |
|
||||||
67
docs/scratchpads/FED-M3-09-query-source.md
Normal file
67
docs/scratchpads/FED-M3-09-query-source.md
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
# FED-M3-09 — Query Source Service Scratchpad
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Implement `apps/gateway/src/federation/client/query-source.service.ts` for `source: "local" | "federated:<host>" | "all"` routing.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Add QuerySourceService in gateway federation client layer.
|
||||||
|
- Unit-test local-only, single federated peer, all-source fan-out/merge, and per-peer partial failures.
|
||||||
|
- Keep `docs/federation/TASKS.md` read-only per project agent guidance.
|
||||||
|
|
||||||
|
## Constraints / assumptions
|
||||||
|
|
||||||
|
- Issue: #462.
|
||||||
|
- Branch: `feat/federation-m3-query-source` from `origin/main` (`e0e7be70`).
|
||||||
|
- ASSUMPTION: `federated:<host>` should match active outbound peers by `commonName` first and by `endpointUrl` host/hostname as compatibility fallback; source tags use `peer.commonName` per `@mosaicstack/types` source-tag docs.
|
||||||
|
- ASSUMPTION: QuerySourceService provides list/fan-out behavior; get/source routing can be layered later because card acceptance says merge rows.
|
||||||
|
- ASSUMPTION: `source: "all"` cannot safely return a single continuation cursor for multiple sub-sources; any subquery cursor marks the merged response `_partial: true` + `_truncated: true` while omitting `nextCursor`.
|
||||||
|
- Budget: no explicit hard cap from orchestrator; working cap ~8K-12K tokens for card 1 implementation + tests + PR cycle.
|
||||||
|
- OpenBrain unavailable: credential loader failed with missing `/home/jarvis/.config/mosaic/credentials.json`; not blocking code delivery.
|
||||||
|
|
||||||
|
## Plan
|
||||||
|
|
||||||
|
1. Review federation client/types/db patterns.
|
||||||
|
2. Write unit tests for source behavior.
|
||||||
|
3. Implement QuerySourceService and export/register it in FederationModule.
|
||||||
|
4. Run scoped tests, typecheck, lint, format.
|
||||||
|
5. Run codex uncommitted review and remediate.
|
||||||
|
6. Commit, queue guard, push, PR via wrapper.
|
||||||
|
|
||||||
|
## Progress
|
||||||
|
|
||||||
|
- 2026-06-24: Intake complete; using isolated worktree to avoid dirty orchestrator files in original checkout.
|
||||||
|
- 2026-06-24: Added QuerySourceService, module export, barrel export, and 7 unit tests.
|
||||||
|
- 2026-06-24: First Codex review found pagination and port-host matching issues; both remediated with tests.
|
||||||
|
|
||||||
|
## Tests run
|
||||||
|
|
||||||
|
- `pnpm --filter @mosaicstack/gateway test -- query-source.service.spec.ts` — PASS (7 tests).
|
||||||
|
- `pnpm --filter @mosaicstack/gateway typecheck` — PASS.
|
||||||
|
- `pnpm --filter @mosaicstack/gateway lint` — PASS.
|
||||||
|
- `pnpm format:check` — PASS.
|
||||||
|
- `pnpm typecheck` — PASS (41/41 turbo tasks).
|
||||||
|
- `pnpm lint` — PASS (23/23 turbo tasks).
|
||||||
|
- `pnpm test` — FAIL in pre-existing/live-DB integration suite: `apps/gateway/src/__tests__/cross-user-isolation.test.ts` cleanup hit `relation "messages" does not exist` against local PostgreSQL. Changed QuerySource unit tests passed; failure is outside FED-M3-09 surface and appears tied to local DB schema state.
|
||||||
|
|
||||||
|
## Review evidence
|
||||||
|
|
||||||
|
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — first pass request-changes, 2 should-fix findings (all-source cursor handling; endpoint port host matching).
|
||||||
|
- Remediation: `_partial` + `_truncated` when any all-source subquery has `nextCursor`; endpoint match accepts URL `host` and `hostname`; added tests for both.
|
||||||
|
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — PASS/approve, no findings.
|
||||||
|
- `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted` — PASS, risk level none, no findings.
|
||||||
|
|
||||||
|
## Risks / blockers
|
||||||
|
|
||||||
|
- Federation query layer is not yet wired; service API needs to be stable and easy to compose.
|
||||||
|
- Must avoid hard-failing `source: all` on remote peer failures.
|
||||||
|
|
||||||
|
## Acceptance evidence mapping
|
||||||
|
|
||||||
|
| Acceptance criterion | Evidence |
|
||||||
|
| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- |
|
||||||
|
| local source returns local rows tagged `_source: local` | `query-source.service.spec.ts` local test; scoped test PASS |
|
||||||
|
| `federated:<host>` queries selected peer and tags rows with peer source | `query-source.service.spec.ts` commonName/endpoint-host tests; scoped test PASS |
|
||||||
|
| `all` fans out local + active outbound peers in parallel and merges tagged rows | `query-source.service.spec.ts` all-source call-order/merge test; scoped test PASS |
|
||||||
|
| per-peer failure on `all` returns `_partial: true`, not throw | `query-source.service.spec.ts` peer failure test; scoped test PASS |
|
||||||
@@ -30,6 +30,7 @@ export default tseslint.config(
|
|||||||
'apps/gateway/vitest.config.ts',
|
'apps/gateway/vitest.config.ts',
|
||||||
'packages/db/vitest.config.ts',
|
'packages/db/vitest.config.ts',
|
||||||
'packages/storage/vitest.config.ts',
|
'packages/storage/vitest.config.ts',
|
||||||
|
'packages/mosaic/vitest.config.ts',
|
||||||
'packages/mosaic/__tests__/*.ts',
|
'packages/mosaic/__tests__/*.ts',
|
||||||
'tools/federation-harness/*.ts',
|
'tools/federation-harness/*.ts',
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import { dirname, join, resolve } from 'node:path';
|
|||||||
import { Command } from 'commander';
|
import { Command } from 'commander';
|
||||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||||
import {
|
import {
|
||||||
|
acquireRestartLock,
|
||||||
addAgentToRoster,
|
addAgentToRoster,
|
||||||
buildAgentSendCommand,
|
buildAgentSendCommand,
|
||||||
buildAgentWatchAttachCommand,
|
buildAgentWatchAttachCommand,
|
||||||
@@ -45,6 +46,8 @@ import {
|
|||||||
removeAgentFromRoster,
|
removeAgentFromRoster,
|
||||||
resolveFleetPaths,
|
resolveFleetPaths,
|
||||||
resolvePresetFilename,
|
resolvePresetFilename,
|
||||||
|
restartLockPath,
|
||||||
|
RESTART_LOCK_STALE_MS,
|
||||||
RUNTIME_ACCEPTABLE_COMMANDS,
|
RUNTIME_ACCEPTABLE_COMMANDS,
|
||||||
serializeRosterToYaml,
|
serializeRosterToYaml,
|
||||||
VERIFY_DEFAULT_TIMEOUT_MS,
|
VERIFY_DEFAULT_TIMEOUT_MS,
|
||||||
@@ -678,6 +681,364 @@ describe('fleet command construction', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('waits for an in-flight restart to clear before relaunching (re-entry guard)', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
|
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
rosterPath,
|
||||||
|
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||||
|
'\n',
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Simulate another `mosaic fleet restart` process mid-teardown: a fresh lock
|
||||||
|
// (recent timestamp, so it is NOT treated as stale) already held.
|
||||||
|
const lockPath = restartLockPath(home);
|
||||||
|
await mkdir(dirname(lockPath), { recursive: true });
|
||||||
|
await writeFile(lockPath, `4242\n${Date.now()}\n`);
|
||||||
|
|
||||||
|
const events: string[] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
events.push(`run:${args[args.length - 1]}`);
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
// The injected sleep stands in for time passing while we wait; the in-flight
|
||||||
|
// restart "finishes" (releases its lock) after the first poll.
|
||||||
|
let sleeps = 0;
|
||||||
|
const sleepFn: SleepFn = async () => {
|
||||||
|
sleeps += 1;
|
||||||
|
events.push(`sleep:${sleeps}`);
|
||||||
|
await rm(lockPath, { force: true });
|
||||||
|
};
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
program.exitOverride();
|
||||||
|
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||||
|
|
||||||
|
try {
|
||||||
|
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||||
|
|
||||||
|
// It must have waited at least once before issuing any systemctl restart.
|
||||||
|
expect(sleeps).toBeGreaterThan(0);
|
||||||
|
const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
|
||||||
|
const firstRun = events.findIndex((e) => e.startsWith('run:'));
|
||||||
|
expect(firstSleep).toBeGreaterThanOrEqual(0);
|
||||||
|
expect(firstRun).toBeGreaterThan(firstSleep);
|
||||||
|
|
||||||
|
// And it still performs the full restart once the lock clears.
|
||||||
|
expect(events).toContain('run:mosaic-tmux-holder.service');
|
||||||
|
expect(events).toContain('run:mosaic-agent@coder0.service');
|
||||||
|
|
||||||
|
// The lock is released after the restart completes.
|
||||||
|
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||||
|
} finally {
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('breaks a stale restart lock and proceeds without waiting', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
|
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
rosterPath,
|
||||||
|
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||||
|
'\n',
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// A lock left behind by a crashed owner: timestamp older than the stale window.
|
||||||
|
const lockPath = restartLockPath(home);
|
||||||
|
await mkdir(dirname(lockPath), { recursive: true });
|
||||||
|
await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n`);
|
||||||
|
|
||||||
|
const calls: string[][] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
calls.push([command, ...args]);
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
const sleepFn = vi.fn<SleepFn>(async () => {});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
program.exitOverride();
|
||||||
|
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||||
|
|
||||||
|
try {
|
||||||
|
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||||
|
|
||||||
|
// Stale lock is broken immediately — no waiting.
|
||||||
|
expect(sleepFn).not.toHaveBeenCalled();
|
||||||
|
expect(calls).toEqual([
|
||||||
|
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
||||||
|
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
||||||
|
]);
|
||||||
|
// The stale lock is gone once the restart completes.
|
||||||
|
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||||
|
} finally {
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('releases the restart lock so a subsequent restart is not blocked', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
|
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
rosterPath,
|
||||||
|
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||||
|
'\n',
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
const calls: string[][] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
calls.push([command, ...args]);
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
const sleepFn = vi.fn<SleepFn>(async () => {});
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
program.exitOverride();
|
||||||
|
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||||
|
|
||||||
|
try {
|
||||||
|
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||||
|
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart']);
|
||||||
|
|
||||||
|
// Two sequential restarts both run fully and neither has to wait.
|
||||||
|
expect(sleepFn).not.toHaveBeenCalled();
|
||||||
|
expect(calls).toEqual([
|
||||||
|
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
||||||
|
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
||||||
|
['systemctl', '--user', 'restart', 'mosaic-tmux-holder.service'],
|
||||||
|
['systemctl', '--user', 'restart', 'mosaic-agent@coder0.service'],
|
||||||
|
]);
|
||||||
|
} finally {
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('guards the single-agent restart path behind the in-flight restart lock', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
|
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
rosterPath,
|
||||||
|
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||||
|
'\n',
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// A full restart is mid-flight (lock held); a single-agent restart re-enters.
|
||||||
|
const lockPath = restartLockPath(home);
|
||||||
|
await mkdir(dirname(lockPath), { recursive: true });
|
||||||
|
await writeFile(lockPath, `4242\n${Date.now()}\n`);
|
||||||
|
|
||||||
|
const events: string[] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
events.push(`run:${args[args.length - 1]}`);
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
let sleeps = 0;
|
||||||
|
const sleepFn: SleepFn = async () => {
|
||||||
|
sleeps += 1;
|
||||||
|
events.push(`sleep:${sleeps}`);
|
||||||
|
await rm(lockPath, { force: true });
|
||||||
|
};
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
program.exitOverride();
|
||||||
|
registerFleetCommand(program, { runner, sleepFn, mosaicHome: home });
|
||||||
|
|
||||||
|
try {
|
||||||
|
await program.parseAsync(['node', 'mosaic', 'fleet', 'restart', 'coder0']);
|
||||||
|
|
||||||
|
// The single-agent restart waits for the in-flight restart before acting.
|
||||||
|
expect(sleeps).toBeGreaterThan(0);
|
||||||
|
const firstSleep = events.findIndex((e) => e.startsWith('sleep:'));
|
||||||
|
const firstRun = events.findIndex((e) => e.startsWith('run:'));
|
||||||
|
expect(firstSleep).toBeGreaterThanOrEqual(0);
|
||||||
|
expect(firstRun).toBeGreaterThan(firstSleep);
|
||||||
|
// Only the named agent is restarted; the holder is untouched.
|
||||||
|
expect(events).toContain('run:mosaic-agent@coder0.service');
|
||||||
|
expect(events).not.toContain('run:mosaic-tmux-holder.service');
|
||||||
|
} finally {
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not let a timed-out owner drop a lock another restart broke and re-owned', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const runDir = join(home, 'fleet', 'run');
|
||||||
|
await mkdir(runDir, { recursive: true });
|
||||||
|
const lockPath = restartLockPath(home);
|
||||||
|
const tokenOf = async (): Promise<string> => {
|
||||||
|
const raw = await readFile(lockPath, 'utf8');
|
||||||
|
return raw.split('\n')[2]?.trim() ?? '';
|
||||||
|
};
|
||||||
|
const sleepFn = vi.fn<SleepFn>(async () => {});
|
||||||
|
|
||||||
|
// R1 acquires the lock and begins a restart that then hangs.
|
||||||
|
const r1 = await acquireRestartLock(home, sleepFn);
|
||||||
|
const tokenR1 = await tokenOf();
|
||||||
|
expect(tokenR1).not.toBe('');
|
||||||
|
|
||||||
|
// The hung R1 leaves a stale lock: rewrite its timestamp into the past while
|
||||||
|
// preserving R1's token — exactly the on-disk state a stuck owner leaves.
|
||||||
|
await writeFile(lockPath, `4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\n${tokenR1}\n`);
|
||||||
|
|
||||||
|
// R2 re-enters, sees the stale lock, and atomically takes ownership.
|
||||||
|
const r2 = await acquireRestartLock(home, sleepFn);
|
||||||
|
const tokenR2 = await tokenOf();
|
||||||
|
expect(tokenR2).not.toBe(tokenR1);
|
||||||
|
expect(sleepFn).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
// R1 finally finishes and releases. It must NOT delete R2's lock — otherwise
|
||||||
|
// a third restart (R3) could acquire and interleave with R2 still running.
|
||||||
|
await r1.release();
|
||||||
|
expect(await tokenOf()).toBe(tokenR2);
|
||||||
|
|
||||||
|
// R2 releases cleanly and the lock is gone.
|
||||||
|
await r2.release();
|
||||||
|
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||||
|
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('lets only one of several concurrent breakers proceed past a stale lock', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const lockPath = restartLockPath(home);
|
||||||
|
await mkdir(dirname(lockPath), { recursive: true });
|
||||||
|
|
||||||
|
// A stale lock left by a crashed owner: every concurrent re-entrant restart
|
||||||
|
// will judge it stale and try to break it at the same instant. Breaking must
|
||||||
|
// NOT grant ownership — only the atomic re-create may — so exactly one
|
||||||
|
// contender can ever hold the lock at a time. (The v2 fix wrote our own token
|
||||||
|
// during the break and read it back, so two breakers each saw their own token
|
||||||
|
// and BOTH proceeded; this guards that regression.)
|
||||||
|
await writeFile(
|
||||||
|
lockPath,
|
||||||
|
`4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Yielding sleep so a waiting contender lets the current owner finish and
|
||||||
|
// release before it re-contends, instead of spinning the microtask queue.
|
||||||
|
const sleepFn: SleepFn = async () => {
|
||||||
|
await new Promise((res) => setTimeout(res, 0));
|
||||||
|
};
|
||||||
|
|
||||||
|
let active = 0;
|
||||||
|
let maxActive = 0;
|
||||||
|
const tokens: string[] = [];
|
||||||
|
const tokenOf = async (): Promise<string> => {
|
||||||
|
const raw = await readFile(lockPath, 'utf8');
|
||||||
|
return raw.split('\n')[2]?.trim() ?? '';
|
||||||
|
};
|
||||||
|
|
||||||
|
// One "restart" = acquire the lock, do work in the critical section, release.
|
||||||
|
const restartOnce = async (): Promise<void> => {
|
||||||
|
const guard = await acquireRestartLock(home, sleepFn);
|
||||||
|
active += 1;
|
||||||
|
maxActive = Math.max(maxActive, active);
|
||||||
|
// Record the token we own while we hold it, then yield to interleave with
|
||||||
|
// any other contender that might (wrongly) believe it owns the lock too.
|
||||||
|
tokens.push(await tokenOf());
|
||||||
|
await new Promise((res) => setTimeout(res, 0));
|
||||||
|
active -= 1;
|
||||||
|
await guard.release();
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Three breakers race the single stale lock simultaneously.
|
||||||
|
await Promise.all([restartOnce(), restartOnce(), restartOnce()]);
|
||||||
|
|
||||||
|
// Mutual exclusion held: never two owners at once despite concurrent breaks.
|
||||||
|
expect(maxActive).toBe(1);
|
||||||
|
// Each acquire owned with its own distinct token — no two ever shared it.
|
||||||
|
expect(new Set(tokens).size).toBe(3);
|
||||||
|
// The lock is fully released at the end.
|
||||||
|
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||||
|
} finally {
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('lets exactly one of two breakers take over a stale lock while the other waits', async () => {
|
||||||
|
const home = await tempDir();
|
||||||
|
const lockPath = restartLockPath(home);
|
||||||
|
await mkdir(dirname(lockPath), { recursive: true });
|
||||||
|
|
||||||
|
// A single stale lock both contenders will judge stale at the same instant.
|
||||||
|
// Every transition runs under the registry mutex, so only one may take the
|
||||||
|
// lock over; the other must observe a now-fresh owner and WAIT/re-evaluate
|
||||||
|
// rather than also taking over. (A content-blind clobber let both believe
|
||||||
|
// they owned it — this asserts the mutex-gated CAS takeover instead.)
|
||||||
|
await writeFile(
|
||||||
|
lockPath,
|
||||||
|
`4242\n${Date.now() - RESTART_LOCK_STALE_MS - 1_000}\nstale-owner-token\n`,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Barrier the winner holds against until the loser has observed the lock
|
||||||
|
// fresh and waited at least once — forcing the exact interleaving where one
|
||||||
|
// proceeds while the other waits, deterministically rather than by timing.
|
||||||
|
let resolveLoserWaited: () => void = () => {};
|
||||||
|
const loserWaited = new Promise<void>((res) => {
|
||||||
|
resolveLoserWaited = res;
|
||||||
|
});
|
||||||
|
let sleeps = 0;
|
||||||
|
const sleepFn: SleepFn = async () => {
|
||||||
|
sleeps += 1;
|
||||||
|
resolveLoserWaited();
|
||||||
|
await new Promise((res) => setTimeout(res, 0));
|
||||||
|
};
|
||||||
|
|
||||||
|
let active = 0;
|
||||||
|
let maxActive = 0;
|
||||||
|
const tokens: string[] = [];
|
||||||
|
const tokenOf = async (): Promise<string> => {
|
||||||
|
const raw = await readFile(lockPath, 'utf8');
|
||||||
|
return raw.split('\n')[2]?.trim() ?? '';
|
||||||
|
};
|
||||||
|
|
||||||
|
let firstOwner = true;
|
||||||
|
const restartOnce = async (): Promise<void> => {
|
||||||
|
const guard = await acquireRestartLock(home, sleepFn);
|
||||||
|
active += 1;
|
||||||
|
maxActive = Math.max(maxActive, active);
|
||||||
|
tokens.push(await tokenOf());
|
||||||
|
if (firstOwner) {
|
||||||
|
// Winner: keep holding the lock until the loser has waited once, so the
|
||||||
|
// loser is guaranteed to see a FRESH owner (not the stale one) and back
|
||||||
|
// off — proving it could not also take over.
|
||||||
|
firstOwner = false;
|
||||||
|
await loserWaited;
|
||||||
|
} else {
|
||||||
|
await new Promise((res) => setTimeout(res, 0));
|
||||||
|
}
|
||||||
|
active -= 1;
|
||||||
|
await guard.release();
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Exactly two breakers race the single stale lock.
|
||||||
|
await Promise.all([restartOnce(), restartOnce()]);
|
||||||
|
|
||||||
|
// Mutual exclusion: never two owners at once (if both took over the stale
|
||||||
|
// lock, this would be 2).
|
||||||
|
expect(maxActive).toBe(1);
|
||||||
|
// Both eventually owned, each with its own distinct token.
|
||||||
|
expect(new Set(tokens).size).toBe(2);
|
||||||
|
// The loser observed the winner's fresh lock and waited — it did NOT also
|
||||||
|
// take over the stale lock.
|
||||||
|
expect(sleeps).toBeGreaterThanOrEqual(1);
|
||||||
|
// The lock is fully released at the end.
|
||||||
|
await expect(readFile(lockPath, 'utf8')).rejects.toMatchObject({ code: 'ENOENT' });
|
||||||
|
} finally {
|
||||||
|
await rm(home, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
|
it('attempts every agent and the holder during fleet stop even when an agent stop fails', async () => {
|
||||||
const home = await tempDir();
|
const home = await tempDir();
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
|
|||||||
@@ -1,5 +1,16 @@
|
|||||||
import { constants } from 'node:fs';
|
import { constants } from 'node:fs';
|
||||||
import { access, chmod, copyFile, mkdir, readFile, unlink, writeFile } from 'node:fs/promises';
|
import {
|
||||||
|
access,
|
||||||
|
chmod,
|
||||||
|
copyFile,
|
||||||
|
mkdir,
|
||||||
|
open,
|
||||||
|
readFile,
|
||||||
|
stat,
|
||||||
|
unlink,
|
||||||
|
writeFile,
|
||||||
|
} from 'node:fs/promises';
|
||||||
|
import { randomUUID } from 'node:crypto';
|
||||||
import { homedir, hostname, userInfo } from 'node:os';
|
import { homedir, hostname, userInfo } from 'node:os';
|
||||||
import { dirname, join, resolve } from 'node:path';
|
import { dirname, join, resolve } from 'node:path';
|
||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
@@ -533,6 +544,295 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
|
|||||||
return ['systemctl', '--user', action, service];
|
return ['systemctl', '--user', action, service];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Poll interval (ms) while waiting for an in-flight restart's lock to clear. */
|
||||||
|
export const RESTART_LOCK_POLL_INTERVAL_MS = 250;
|
||||||
|
/**
|
||||||
|
* Maximum time (ms) a re-entrant restart waits for the in-flight restart to
|
||||||
|
* finish before it breaks the lock and proceeds anyway. A bound is required so
|
||||||
|
* a crashed holder of the lock can never deadlock the fleet permanently.
|
||||||
|
*/
|
||||||
|
export const RESTART_LOCK_MAX_WAIT_MS = 30_000;
|
||||||
|
/**
|
||||||
|
* Age (ms) past which a restart lock is treated as stale (its owner died
|
||||||
|
* without releasing it) and is broken immediately rather than waited on.
|
||||||
|
*/
|
||||||
|
export const RESTART_LOCK_STALE_MS = 60_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves the path of the cross-process restart lock for a given Mosaic home.
|
||||||
|
* Kept strictly under `<mosaicHome>/fleet/run` (not the heartbeat env override)
|
||||||
|
* so the lock is scoped to the same fleet the restart acts on.
|
||||||
|
*/
|
||||||
|
export function restartLockPath(mosaicHome: string): string {
|
||||||
|
return join(mosaicHome, 'fleet', 'run', 'restart.lock');
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A held restart lock; `release()` removes the lock file iff we still own it. */
|
||||||
|
interface RestartGuard {
|
||||||
|
release(): Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Lock-file contents: pid (informational), timestamp, and a unique owner token. */
|
||||||
|
function formatRestartLockContent(token: string): string {
|
||||||
|
return `${process.pid}\n${Date.now()}\n${token}\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the owner token (line 3) from a lock file, or null if the file is
|
||||||
|
* missing/unreadable/tokenless. The token is what makes release and break
|
||||||
|
* ownership-safe: a process only ever acts on a lock whose token matches its own.
|
||||||
|
*/
|
||||||
|
async function readRestartLockToken(lockPath: string): Promise<string | null> {
|
||||||
|
let raw: string;
|
||||||
|
try {
|
||||||
|
raw = await readFile(lockPath, 'utf8');
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const token = raw.split('\n')[2]?.trim();
|
||||||
|
return token ? token : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true when a lock's contents are stale: older than RESTART_LOCK_STALE_MS,
|
||||||
|
* or unparseable (a corrupt or partially written lock left by a crashed owner).
|
||||||
|
*/
|
||||||
|
function isRestartLockContentStale(raw: string, now: number): boolean {
|
||||||
|
const stampLine = raw.split('\n')[1] ?? '';
|
||||||
|
const stamp = Number.parseInt(stampLine.trim(), 10);
|
||||||
|
if (!Number.isFinite(stamp)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return now - stamp >= RESTART_LOCK_STALE_MS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Path of the short-lived registry mutex that guards EVERY transition of the
|
||||||
|
* restart lock (acquire, release, takeover). Held only across a few filesystem
|
||||||
|
* ops — never across the restart itself — so contention clears in microseconds.
|
||||||
|
*/
|
||||||
|
function restartMutexPath(lockPath: string): string {
|
||||||
|
return `${lockPath}.mutex`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Brief back-off between registry-mutex acquisition attempts (held microseconds). */
|
||||||
|
const RESTART_MUTEX_RETRY_MS = 20;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Staleness for the internal mutex / reclaim locks, judged by the file's mtime
|
||||||
|
* rather than its CONTENT. `open(path, 'wx')` creates the inode (with a fresh
|
||||||
|
* mtime) before any token/timestamp is written into it, so a content-based check
|
||||||
|
* would momentarily see that empty file as corrupt-and-stale and could reap a
|
||||||
|
* lock another contender is still acquiring. mtime is set atomically at creation,
|
||||||
|
* so a just-created lock always reads as live; only a lock whose holder died and
|
||||||
|
* stopped touching it ages past the threshold. These locks are never held across
|
||||||
|
* the restart itself (only a couple of filesystem ops), so any mtime this old can
|
||||||
|
* belong only to a dead holder.
|
||||||
|
*/
|
||||||
|
async function isRestartLockPathStale(path: string, now: number): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const info = await stat(path);
|
||||||
|
return now - info.mtimeMs >= RESTART_LOCK_STALE_MS;
|
||||||
|
} catch (err) {
|
||||||
|
if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||||
|
return false; // Gone, not stale — the caller will re-contend.
|
||||||
|
}
|
||||||
|
return false; // Can't stat — treat as live and back off rather than reap.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Path of the reclaim lock that serializes reaping of a crashed-holder mutex. */
|
||||||
|
function restartReclaimPath(mutexPath: string): string {
|
||||||
|
return `${mutexPath}.reclaim`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reap a registry mutex left behind by a process that CRASHED mid-transition —
|
||||||
|
* one whose file has aged past RESTART_LOCK_STALE_MS. Because the mutex is held
|
||||||
|
* only for a couple of filesystem ops (no sleeps, never across the restart), a
|
||||||
|
* mutex this old can only belong to a dead holder.
|
||||||
|
*
|
||||||
|
* The reap removes the dead mutex but never CREATES/holds it — acquisition stays
|
||||||
|
* the single `open('wx')` create in {@link acquireRestartMutex}, so exactly one
|
||||||
|
* contender wins ownership no matter how the reap and acquires interleave. The
|
||||||
|
* removal is made conditional by a dedicated reclaim lock: while it is held the
|
||||||
|
* dead mutex is stable (its dead holder will never touch it, and no other
|
||||||
|
* reclaimer can race), so re-reading it and removing it only if it is STILL stale
|
||||||
|
* is a true compare — a live holder's fresh mutex is never removed. This closes
|
||||||
|
* the reclaim race a content-blind rename-and-restore left open (a third
|
||||||
|
* contender slipping into the gap while a fresh mutex was moved aside).
|
||||||
|
*/
|
||||||
|
async function reclaimStaleRestartMutex(mutexPath: string): Promise<void> {
|
||||||
|
const reclaimPath = restartReclaimPath(mutexPath);
|
||||||
|
let handle: Awaited<ReturnType<typeof open>>;
|
||||||
|
try {
|
||||||
|
handle = await open(reclaimPath, 'wx');
|
||||||
|
} catch (err) {
|
||||||
|
if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
// Someone is already reclaiming. If their reclaim lock is itself stale by
|
||||||
|
// mtime, its holder crashed mid-reap (the lock spans only a stat + unlink,
|
||||||
|
// microseconds) — clear it so a later pass can retry. Otherwise a live
|
||||||
|
// reclaimer has it; back off. Either way we do not reap the mutex this pass.
|
||||||
|
if (await isRestartLockPathStale(reclaimPath, Date.now())) {
|
||||||
|
await unlink(reclaimPath).catch(() => {});
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
// Re-check the mutex UNDER the reclaim lock and remove it only if it is STILL
|
||||||
|
// stale by mtime. A live holder's mutex is fresh and is left untouched; a dead
|
||||||
|
// holder's mutex is stable here (its holder is gone and no other reclaimer can
|
||||||
|
// race us), so this re-check is authoritative.
|
||||||
|
if (await isRestartLockPathStale(mutexPath, Date.now())) {
|
||||||
|
await unlink(mutexPath).catch(() => {});
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await handle.close();
|
||||||
|
await unlink(reclaimPath).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Acquire the registry mutex, BLOCKING (with brief back-offs) until held, and
|
||||||
|
* return a token-gated release. This is the single point of mutual exclusion for
|
||||||
|
* the restart lock: acquire, release, and stale/timeout takeover all run under it,
|
||||||
|
* so "read the lock, then mutate it" is atomic — no acquirer, releaser, or breaker
|
||||||
|
* can ever interleave with another. A mutex left by a crashed holder is reclaimed
|
||||||
|
* once it ages past the stale threshold.
|
||||||
|
*/
|
||||||
|
async function acquireRestartMutex(
|
||||||
|
mutexPath: string,
|
||||||
|
token: string,
|
||||||
|
): Promise<RestartGuard['release']> {
|
||||||
|
for (;;) {
|
||||||
|
let handle: Awaited<ReturnType<typeof open>>;
|
||||||
|
try {
|
||||||
|
handle = await open(mutexPath, 'wx');
|
||||||
|
} catch (err) {
|
||||||
|
if ((err as NodeJS.ErrnoException).code !== 'EEXIST') {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
// Staleness is judged by mtime, not content, so a mutex that exists but has
|
||||||
|
// not yet had its token written (the open-before-write window) reads as live
|
||||||
|
// and is never wrongly reaped.
|
||||||
|
if (!(await isRestartLockPathStale(mutexPath, Date.now()))) {
|
||||||
|
// A live holder has it — it will be gone in microseconds. Back off briefly.
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, RESTART_MUTEX_RETRY_MS));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
await reclaimStaleRestartMutex(mutexPath);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// We created the mutex. Populate it with our token; if writing fails, clean up
|
||||||
|
// our own file so we never leak an empty mutex that a peer would have to reap.
|
||||||
|
try {
|
||||||
|
await handle.writeFile(formatRestartLockContent(token));
|
||||||
|
await handle.close();
|
||||||
|
} catch (err) {
|
||||||
|
await handle.close().catch(() => {});
|
||||||
|
await unlink(mutexPath).catch(() => {});
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
return async (): Promise<void> => {
|
||||||
|
if ((await readRestartLockToken(mutexPath)) !== token) return;
|
||||||
|
await unlink(mutexPath).catch(() => {});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Acquire the fleet restart lock, serializing concurrent `mosaic fleet restart`
|
||||||
|
* invocations across processes. Each restart tears the tmux holder (and the
|
||||||
|
* agent sessions inside it) down and back up; without this guard a re-entrant
|
||||||
|
* restart relaunches agents against a half-torn-down holder, which fails and
|
||||||
|
* tight-loops. A re-entrant caller waits for the in-flight restart to release
|
||||||
|
* the lock (clean shutdown settled) before proceeding, breaks a stale lock left
|
||||||
|
* by a crashed owner, and after RESTART_LOCK_MAX_WAIT_MS breaks the lock to
|
||||||
|
* avoid a permanent deadlock.
|
||||||
|
*
|
||||||
|
* Correctness rests on a single invariant: EVERY transition of the lock — taking
|
||||||
|
* a free lock, taking over a stale/timed-out one, and releasing — happens under
|
||||||
|
* the registry mutex. Because the check ("is the lock free / stale / fresh?") and
|
||||||
|
* the mutation that follows it both run while the mutex is held, they are atomic:
|
||||||
|
* no other acquirer, releaser, or breaker can slip in between. That is what makes
|
||||||
|
* takeover a true compare-and-swap rather than a content-blind clobber — a normal
|
||||||
|
* `open('wx')` acquirer cannot create a fresh lock in a gap, and the original
|
||||||
|
* owner's `release()` (also mutex-gated and token-checked) cannot drop a lock a
|
||||||
|
* breaker already took over. So no interleaving lets two restarts both own the
|
||||||
|
* lock and run concurrently.
|
||||||
|
*/
|
||||||
|
export async function acquireRestartLock(
|
||||||
|
mosaicHome: string,
|
||||||
|
sleepFn: SleepFn,
|
||||||
|
): Promise<RestartGuard> {
|
||||||
|
const token = randomUUID();
|
||||||
|
const lockPath = restartLockPath(mosaicHome);
|
||||||
|
const mutexPath = restartMutexPath(lockPath);
|
||||||
|
await mkdir(dirname(lockPath), { recursive: true });
|
||||||
|
const release = async (): Promise<void> => {
|
||||||
|
// Mutex-gated and token-gated: only remove the lock if it is still ours. If
|
||||||
|
// another caller took it over (after a stale/timeout break) the token no
|
||||||
|
// longer matches and we leave their lock intact.
|
||||||
|
const releaseMutex = await acquireRestartMutex(mutexPath, token);
|
||||||
|
try {
|
||||||
|
if ((await readRestartLockToken(lockPath)) === token) {
|
||||||
|
await unlink(lockPath).catch(() => {});
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await releaseMutex();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const deadline = Date.now() + RESTART_LOCK_MAX_WAIT_MS;
|
||||||
|
for (;;) {
|
||||||
|
let owned = false;
|
||||||
|
const releaseMutex = await acquireRestartMutex(mutexPath, token);
|
||||||
|
try {
|
||||||
|
// Read and (if appropriate) mutate the lock atomically under the mutex.
|
||||||
|
let current: string | null = null;
|
||||||
|
let absent = false;
|
||||||
|
try {
|
||||||
|
current = await readFile(lockPath, 'utf8');
|
||||||
|
} catch (readErr) {
|
||||||
|
if ((readErr as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||||
|
absent = true;
|
||||||
|
} else {
|
||||||
|
current = null; // Unreadable/corrupt: treat as stale.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const now = Date.now();
|
||||||
|
if (absent) {
|
||||||
|
// Lock is free — take it.
|
||||||
|
await writeFile(lockPath, formatRestartLockContent(token));
|
||||||
|
owned = true;
|
||||||
|
} else {
|
||||||
|
const stale = current === null || isRestartLockContentStale(current, now);
|
||||||
|
const timedOut = now >= deadline;
|
||||||
|
if (stale || timedOut) {
|
||||||
|
process.stderr.write(
|
||||||
|
stale
|
||||||
|
? 'Breaking stale fleet restart lock.\n'
|
||||||
|
: `Timed out after ${RESTART_LOCK_MAX_WAIT_MS}ms waiting for the in-flight fleet ` +
|
||||||
|
'restart; breaking the lock.\n',
|
||||||
|
);
|
||||||
|
// Takeover is just an overwrite — safe because we hold the mutex, so no
|
||||||
|
// acquirer or releaser can touch the lock between our read and this write.
|
||||||
|
await writeFile(lockPath, formatRestartLockContent(token));
|
||||||
|
owned = true;
|
||||||
|
}
|
||||||
|
// else: a fresh restart owns it — wait below and re-evaluate.
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await releaseMutex();
|
||||||
|
}
|
||||||
|
if (owned) {
|
||||||
|
return { release };
|
||||||
|
}
|
||||||
|
await sleepFn(RESTART_LOCK_POLL_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the systemctl --user enable command for a given unit.
|
* Returns the systemctl --user enable command for a given unit.
|
||||||
* Used by the install auto-enable step to persist units across reboots.
|
* Used by the install auto-enable step to persist units across reboots.
|
||||||
@@ -1172,6 +1472,7 @@ export function isSendAccepted(capturedOutput: string): SendVerifyResult {
|
|||||||
|
|
||||||
export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
|
export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
|
||||||
const runner = deps.runner ?? runCommand;
|
const runner = deps.runner ?? runCommand;
|
||||||
|
const sleepFn = deps.sleepFn ?? defaultSleep;
|
||||||
const paths = resolveFleetPaths(deps.mosaicHome);
|
const paths = resolveFleetPaths(deps.mosaicHome);
|
||||||
const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
|
const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
|
||||||
|
|
||||||
@@ -1285,9 +1586,22 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
.command(`${action} [agent]`)
|
.command(`${action} [agent]`)
|
||||||
.description(`${action} the fleet holder or one agent`)
|
.description(`${action} the fleet holder or one agent`)
|
||||||
.action(async (agent?: string) => {
|
.action(async (agent?: string) => {
|
||||||
|
const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
|
||||||
|
const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
|
||||||
const roster = await loadRosterForCommand(cmd);
|
const roster = await loadRosterForCommand(cmd);
|
||||||
if (agent) {
|
if (agent) {
|
||||||
getRosterAgent(roster, agent);
|
getRosterAgent(roster, agent);
|
||||||
|
// Single-agent restart is guarded too: it can race a full restart that
|
||||||
|
// is tearing the shared holder down.
|
||||||
|
if (action === 'restart') {
|
||||||
|
const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
|
||||||
|
try {
|
||||||
|
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
||||||
|
} finally {
|
||||||
|
await guard.release();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1298,6 +1612,21 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (action === 'restart') {
|
||||||
|
// Serialize the holder+agents teardown/relaunch behind the restart lock
|
||||||
|
// so a re-entrant restart waits for clean shutdown before relaunching,
|
||||||
|
// instead of racing a half-torn-down holder into a tight loop.
|
||||||
|
const guard = await acquireRestartLock(activePaths.mosaicHome, sleepFn);
|
||||||
|
try {
|
||||||
|
await runChecked(runner, buildFleetServiceCommand(action));
|
||||||
|
for (const rosterAgent of roster.agents) {
|
||||||
|
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await guard.release();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
await runChecked(runner, buildFleetServiceCommand(action));
|
await runChecked(runner, buildFleetServiceCommand(action));
|
||||||
for (const rosterAgent of roster.agents) {
|
for (const rosterAgent of roster.agents) {
|
||||||
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
||||||
|
|||||||
@@ -4,5 +4,6 @@ export default defineConfig({
|
|||||||
test: {
|
test: {
|
||||||
globals: true,
|
globals: true,
|
||||||
environment: 'node',
|
environment: 'node',
|
||||||
|
testTimeout: 30_000,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
161
tools/install.sh
161
tools/install.sh
@@ -16,6 +16,10 @@
|
|||||||
# --framework Install/upgrade framework only (skip npm CLI)
|
# --framework Install/upgrade framework only (skip npm CLI)
|
||||||
# --cli Install/upgrade npm CLI only (skip framework)
|
# --cli Install/upgrade npm CLI only (skip framework)
|
||||||
# --ref <branch> Git ref for framework archive (default: main)
|
# --ref <branch> Git ref for framework archive (default: main)
|
||||||
|
# --dev Build CLI + gateway FROM SOURCE at --ref instead of the
|
||||||
|
# registry @latest. Zero registry writes — packs local
|
||||||
|
# tarballs and installs them globally. Use to test a branch
|
||||||
|
# end-to-end before cutting a release.
|
||||||
# --yes Accept all defaults; headless/non-interactive install
|
# --yes Accept all defaults; headless/non-interactive install
|
||||||
# --no-auto-launch Skip automatic mosaic wizard + gateway install on first install
|
# --no-auto-launch Skip automatic mosaic wizard + gateway install on first install
|
||||||
# --uninstall Reverse the install: remove framework dir, CLI package, and npmrc line
|
# --uninstall Reverse the install: remove framework dir, CLI package, and npmrc line
|
||||||
@@ -27,6 +31,7 @@
|
|||||||
# MOSAIC_PREFIX — npm global prefix (default: ~/.npm-global)
|
# MOSAIC_PREFIX — npm global prefix (default: ~/.npm-global)
|
||||||
# MOSAIC_NO_COLOR — disable colour (set to 1)
|
# MOSAIC_NO_COLOR — disable colour (set to 1)
|
||||||
# MOSAIC_REF — git ref for framework (default: main)
|
# MOSAIC_REF — git ref for framework (default: main)
|
||||||
|
# MOSAIC_DEV — equivalent to --dev (set to 1)
|
||||||
# MOSAIC_ASSUME_YES — equivalent to --yes (set to 1)
|
# MOSAIC_ASSUME_YES — equivalent to --yes (set to 1)
|
||||||
# ──────────────────────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────────────────────
|
||||||
#
|
#
|
||||||
@@ -43,6 +48,7 @@ FLAG_CLI=true
|
|||||||
FLAG_NO_AUTO_LAUNCH=false
|
FLAG_NO_AUTO_LAUNCH=false
|
||||||
FLAG_YES=false
|
FLAG_YES=false
|
||||||
FLAG_UNINSTALL=false
|
FLAG_UNINSTALL=false
|
||||||
|
FLAG_DEV=false
|
||||||
GIT_REF="${MOSAIC_REF:-main}"
|
GIT_REF="${MOSAIC_REF:-main}"
|
||||||
|
|
||||||
# MOSAIC_ASSUME_YES env var acts the same as --yes
|
# MOSAIC_ASSUME_YES env var acts the same as --yes
|
||||||
@@ -50,12 +56,18 @@ if [[ "${MOSAIC_ASSUME_YES:-0}" == "1" ]]; then
|
|||||||
FLAG_YES=true
|
FLAG_YES=true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# MOSAIC_DEV env var acts the same as --dev
|
||||||
|
if [[ "${MOSAIC_DEV:-0}" == "1" ]]; then
|
||||||
|
FLAG_DEV=true
|
||||||
|
fi
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--check) FLAG_CHECK=true; shift ;;
|
--check) FLAG_CHECK=true; shift ;;
|
||||||
--framework) FLAG_CLI=false; shift ;;
|
--framework) FLAG_CLI=false; shift ;;
|
||||||
--cli) FLAG_FRAMEWORK=false; shift ;;
|
--cli) FLAG_FRAMEWORK=false; shift ;;
|
||||||
--ref) GIT_REF="${2:-main}"; shift 2 ;;
|
--ref) GIT_REF="${2:-main}"; shift 2 ;;
|
||||||
|
--dev) FLAG_DEV=true; shift ;;
|
||||||
--yes|-y) FLAG_YES=true; shift ;;
|
--yes|-y) FLAG_YES=true; shift ;;
|
||||||
--no-auto-launch) FLAG_NO_AUTO_LAUNCH=true; shift ;;
|
--no-auto-launch) FLAG_NO_AUTO_LAUNCH=true; shift ;;
|
||||||
--uninstall) FLAG_UNINSTALL=true; shift ;;
|
--uninstall) FLAG_UNINSTALL=true; shift ;;
|
||||||
@@ -72,6 +84,17 @@ CLI_PKG="${SCOPE}/mosaic"
|
|||||||
REPO_BASE="https://git.mosaicstack.dev/mosaicstack/stack"
|
REPO_BASE="https://git.mosaicstack.dev/mosaicstack/stack"
|
||||||
ARCHIVE_URL="${REPO_BASE}/archive/${GIT_REF}.tar.gz"
|
ARCHIVE_URL="${REPO_BASE}/archive/${GIT_REF}.tar.gz"
|
||||||
|
|
||||||
|
# In dev (build-from-source) mode the gateway is installed globally from a
|
||||||
|
# locally-built tarball. Tell the wizard / gateway-config stage NOT to overwrite
|
||||||
|
# it with the registry @latest build (honored by gatewayConfigStage).
|
||||||
|
if [[ "$FLAG_DEV" == "true" ]]; then
|
||||||
|
export MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Shared monorepo checkout (populated on demand by ensure_monorepo).
|
||||||
|
WORK_DIR=""
|
||||||
|
EXTRACTED_DIR=""
|
||||||
|
|
||||||
# ─── uninstall path ───────────────────────────────────────────────────────────
|
# ─── uninstall path ───────────────────────────────────────────────────────────
|
||||||
# Shell-level uninstall for when the CLI is broken or not available.
|
# Shell-level uninstall for when the CLI is broken or not available.
|
||||||
# Handles: framework directory, npm CLI package, npmrc scope line.
|
# Handles: framework directory, npm CLI package, npmrc scope line.
|
||||||
@@ -239,6 +262,99 @@ framework_version() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Download + extract the monorepo archive at $GIT_REF exactly once per run.
|
||||||
|
# Sets the script-level EXTRACTED_DIR to the repo root. Reused by both the
|
||||||
|
# framework install (Part 1) and the dev build-from-source path (Part 2).
|
||||||
|
ensure_monorepo() {
|
||||||
|
if [[ -n "$EXTRACTED_DIR" ]] && [[ -d "$EXTRACTED_DIR" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
require_cmd tar
|
||||||
|
|
||||||
|
WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
|
||||||
|
# shellcheck disable=SC2317
|
||||||
|
cleanup_work() { [[ -n "$WORK_DIR" ]] && rm -rf "$WORK_DIR"; }
|
||||||
|
trap cleanup_work EXIT
|
||||||
|
|
||||||
|
info "Downloading source from ${GIT_REF}…"
|
||||||
|
if command -v curl &>/dev/null; then
|
||||||
|
curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
||||||
|
elif command -v wget &>/dev/null; then
|
||||||
|
wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
||||||
|
else
|
||||||
|
fail "curl or wget required to download source."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gitea archives extract to <repo-name>/ inside the work dir
|
||||||
|
EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
|
||||||
|
if [[ -z "$EXTRACTED_DIR" ]] || [[ ! -d "$EXTRACTED_DIR" ]]; then
|
||||||
|
fail "Could not locate extracted source in archive."
|
||||||
|
ls -la "$WORK_DIR" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build @mosaicstack/mosaic + @mosaicstack/gateway from source and install both
|
||||||
|
# globally from locally-packed tarballs. ZERO registry writes. Workspace deps
|
||||||
|
# (brain/config/db/…) are pulled from the registry at the versions pinned in
|
||||||
|
# each package.json — `pnpm pack` rewrites `workspace:*` to those versions.
|
||||||
|
install_cli_from_source() {
|
||||||
|
local src="$EXTRACTED_DIR"
|
||||||
|
local out_dir="$WORK_DIR/dist-tarballs"
|
||||||
|
mkdir -p "$out_dir"
|
||||||
|
|
||||||
|
# pnpm via corepack (ships with Node >= 16.9; required by Node >= 20 preflight).
|
||||||
|
# Pin to the repo's packageManager version so the build matches CI. Surface
|
||||||
|
# corepack failures so the fresh-machine case gives an actionable error
|
||||||
|
# instead of a bare "command not found".
|
||||||
|
if ! command -v pnpm &>/dev/null; then
|
||||||
|
info "Activating pnpm via corepack…"
|
||||||
|
corepack enable 2>&1 | sed 's/^/ /' || warn "corepack enable failed — pnpm may need manual install."
|
||||||
|
corepack prepare pnpm@10.6.2 --activate 2>&1 | sed 's/^/ /' \
|
||||||
|
|| warn "corepack prepare failed — pnpm may need manual install."
|
||||||
|
fi
|
||||||
|
if ! command -v pnpm &>/dev/null; then
|
||||||
|
fail "pnpm not available after corepack activation."
|
||||||
|
echo " Install pnpm manually (https://pnpm.io/installation) and re-run with --dev."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Installing workspace dependencies (pnpm install)…"
|
||||||
|
( cd "$src" && pnpm install ) 2>&1 | sed 's/^/ /'
|
||||||
|
|
||||||
|
info "Building CLI + gateway from source…"
|
||||||
|
( cd "$src" && pnpm --filter "@mosaicstack/mosaic..." --filter "@mosaicstack/gateway..." run build ) 2>&1 | sed 's/^/ /'
|
||||||
|
|
||||||
|
info "Packing local tarballs…"
|
||||||
|
( cd "$src/packages/mosaic" && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/ /'
|
||||||
|
( cd "$src/apps/gateway" && pnpm pack --pack-destination "$out_dir" ) 2>&1 | sed 's/^/ /'
|
||||||
|
|
||||||
|
local cli_tgz gw_tgz
|
||||||
|
cli_tgz="$(ls -1t "$out_dir"/mosaicstack-mosaic-*.tgz 2>/dev/null | head -1)"
|
||||||
|
gw_tgz="$(ls -1t "$out_dir"/mosaicstack-gateway-*.tgz 2>/dev/null | head -1)"
|
||||||
|
|
||||||
|
if [[ ! -f "$cli_tgz" ]]; then
|
||||||
|
fail "CLI tarball was not produced by pnpm pack."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ ! -f "$gw_tgz" ]]; then
|
||||||
|
fail "Gateway tarball was not produced by pnpm pack."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Gateway first so it is present globally before the CLI's wizard runs (which
|
||||||
|
# skips its own gateway install via MOSAIC_GATEWAY_SKIP_NPM_INSTALL=1).
|
||||||
|
info "Installing gateway from source tarball (global)…"
|
||||||
|
npm install -g "$gw_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/ /'
|
||||||
|
|
||||||
|
info "Installing CLI from source tarball (global)…"
|
||||||
|
npm install -g "$cli_tgz" --prefix="$PREFIX" 2>&1 | sed 's/^/ /'
|
||||||
|
|
||||||
|
ok "Installed from source: CLI $(installed_cli_version)"
|
||||||
|
}
|
||||||
|
|
||||||
# ─── preflight ────────────────────────────────────────────────────────────────
|
# ─── preflight ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
require_cmd node
|
require_cmd node
|
||||||
@@ -282,25 +398,8 @@ if [[ "$FLAG_FRAMEWORK" == "true" ]]; then
|
|||||||
warn "Framework not installed."
|
warn "Framework not installed."
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
# Download repo archive and extract framework
|
# Download repo archive and extract framework (shared with the dev build)
|
||||||
require_cmd tar
|
ensure_monorepo
|
||||||
|
|
||||||
WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-install-XXXXXX")"
|
|
||||||
cleanup_work() { rm -rf "$WORK_DIR"; }
|
|
||||||
trap cleanup_work EXIT
|
|
||||||
|
|
||||||
info "Downloading framework from ${GIT_REF}…"
|
|
||||||
if command -v curl &>/dev/null; then
|
|
||||||
curl -fsSL "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
|
||||||
elif command -v wget &>/dev/null; then
|
|
||||||
wget -qO- "$ARCHIVE_URL" | tar xz -C "$WORK_DIR"
|
|
||||||
else
|
|
||||||
fail "curl or wget required to download framework."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Gitea archives extract to <repo-name>/ inside the work dir
|
|
||||||
EXTRACTED_DIR="$(find "$WORK_DIR" -maxdepth 1 -mindepth 1 -type d | head -1)"
|
|
||||||
FRAMEWORK_SRC="$EXTRACTED_DIR/packages/mosaic/framework"
|
FRAMEWORK_SRC="$EXTRACTED_DIR/packages/mosaic/framework"
|
||||||
|
|
||||||
if [[ ! -d "$FRAMEWORK_SRC" ]]; then
|
if [[ ! -d "$FRAMEWORK_SRC" ]]; then
|
||||||
@@ -356,7 +455,11 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
CURRENT="$(installed_cli_version)"
|
CURRENT="$(installed_cli_version)"
|
||||||
LATEST="$(latest_cli_version)"
|
if [[ "$FLAG_DEV" == "true" ]]; then
|
||||||
|
LATEST=""
|
||||||
|
else
|
||||||
|
LATEST="$(latest_cli_version)"
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ -n "$CURRENT" ]]; then
|
if [[ -n "$CURRENT" ]]; then
|
||||||
dim " Installed: ${CLI_PKG}@${CURRENT}"
|
dim " Installed: ${CLI_PKG}@${CURRENT}"
|
||||||
@@ -364,7 +467,9 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
dim " Installed: (none)"
|
dim " Installed: (none)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -n "$LATEST" ]]; then
|
if [[ "$FLAG_DEV" == "true" ]]; then
|
||||||
|
dim " Source: ${REPO_BASE} (ref: ${GIT_REF}, build-from-source)"
|
||||||
|
elif [[ -n "$LATEST" ]]; then
|
||||||
dim " Latest: ${CLI_PKG}@${LATEST}"
|
dim " Latest: ${CLI_PKG}@${LATEST}"
|
||||||
else
|
else
|
||||||
dim " Latest: (registry unreachable)"
|
dim " Latest: (registry unreachable)"
|
||||||
@@ -372,7 +477,9 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
if [[ "$FLAG_CHECK" == "true" ]]; then
|
if [[ "$FLAG_CHECK" == "true" ]]; then
|
||||||
if [[ -z "$LATEST" ]]; then
|
if [[ "$FLAG_DEV" == "true" ]]; then
|
||||||
|
info "Dev mode: installed version is ${CURRENT:-(none)} (no registry comparison)."
|
||||||
|
elif [[ -z "$LATEST" ]]; then
|
||||||
warn "Could not reach registry."
|
warn "Could not reach registry."
|
||||||
elif [[ -z "$CURRENT" ]]; then
|
elif [[ -z "$CURRENT" ]]; then
|
||||||
warn "Not installed."
|
warn "Not installed."
|
||||||
@@ -383,6 +490,16 @@ if [[ "$FLAG_CLI" == "true" ]]; then
|
|||||||
else
|
else
|
||||||
ok "Up to date (or ahead of registry)."
|
ok "Up to date (or ahead of registry)."
|
||||||
fi
|
fi
|
||||||
|
elif [[ "$FLAG_DEV" == "true" ]]; then
|
||||||
|
info "Dev mode — building CLI + gateway from source at ref ${GIT_REF}…"
|
||||||
|
ensure_monorepo
|
||||||
|
install_cli_from_source
|
||||||
|
|
||||||
|
# PATH check for npm prefix
|
||||||
|
if [[ ":$PATH:" != *":$PREFIX/bin:"* ]]; then
|
||||||
|
warn "$PREFIX/bin is not on your PATH"
|
||||||
|
dim " Add to your shell rc: export PATH=\"$PREFIX/bin:\$PATH\""
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
if [[ -z "$LATEST" ]]; then
|
if [[ -z "$LATEST" ]]; then
|
||||||
warn "Could not reach registry at $REGISTRY — skipping npm CLI."
|
warn "Could not reach registry at $REGISTRY — skipping npm CLI."
|
||||||
|
|||||||
Reference in New Issue
Block a user