1574 lines
52 KiB
TypeScript
1574 lines
52 KiB
TypeScript
import { constants } from 'node:fs';
|
|
import { access, chmod, copyFile, mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
import { homedir, hostname, userInfo } from 'node:os';
|
|
import { dirname, join, resolve } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { spawn } from 'node:child_process';
|
|
import type { Command } from 'commander';
|
|
import YAML from 'yaml';
|
|
|
|
/**
|
|
* A function that spawns a command with inherited stdio (TTY passthrough).
|
|
* Used for interactive commands like `tmux attach` that need a real terminal.
|
|
* Resolves with the process exit code.
|
|
*/
|
|
export type InteractiveRunner = (command: string, args: string[]) => Promise<number>;
|
|
|
|
export interface CommandResult {
|
|
stdout: string;
|
|
stderr: string;
|
|
exitCode: number;
|
|
}
|
|
|
|
export type CommandRunner = (command: string, args: string[]) => Promise<CommandResult>;
|
|
|
|
/**
|
|
* Injectable sleep helper used by the send --verify polling loop.
|
|
* Tests stub this to avoid real delays; production uses the default
|
|
* implementation backed by setTimeout.
|
|
*/
|
|
export type SleepFn = (ms: number) => Promise<void>;
|
|
|
|
export interface FleetCommandDeps {
|
|
runner?: CommandRunner;
|
|
/** Injectable interactive runner for commands needing inherited TTY (e.g., `tmux attach`). */
|
|
interactiveRunner?: InteractiveRunner;
|
|
/**
|
|
* Injectable sleep function for the send --verify polling loop.
|
|
* Defaults to a real setTimeout-based sleep. Tests stub this to avoid
|
|
* real delays; the default is used in production.
|
|
*/
|
|
sleepFn?: SleepFn;
|
|
mosaicHome?: string;
|
|
frameworkRoot?: string;
|
|
}
|
|
|
|
interface RawFleetRoster {
|
|
version?: unknown;
|
|
transport?: unknown;
|
|
tmux?: {
|
|
socket_name?: unknown;
|
|
socketName?: unknown;
|
|
holder_session?: unknown;
|
|
holderSession?: unknown;
|
|
};
|
|
defaults?: {
|
|
working_directory?: unknown;
|
|
workingDirectory?: unknown;
|
|
};
|
|
runtimes?: Record<string, { reset_command?: unknown; resetCommand?: unknown }>;
|
|
agents?: Array<{
|
|
name?: unknown;
|
|
runtime?: unknown;
|
|
class?: unknown;
|
|
working_directory?: unknown;
|
|
workingDirectory?: unknown;
|
|
model_hint?: unknown;
|
|
modelHint?: unknown;
|
|
persistent_persona?: unknown;
|
|
persistentPersona?: unknown;
|
|
reset_between_tasks?: unknown;
|
|
resetBetweenTasks?: unknown;
|
|
kickstart_template?: unknown;
|
|
kickstartTemplate?: unknown;
|
|
}>;
|
|
}
|
|
|
|
export interface FleetAgent {
|
|
name: string;
|
|
runtime: string;
|
|
className: string;
|
|
workingDirectory?: string;
|
|
modelHint?: string;
|
|
persistentPersona?: boolean | string;
|
|
resetBetweenTasks?: boolean;
|
|
kickstartTemplate?: string;
|
|
}
|
|
|
|
export interface FleetRoster {
|
|
version: 1;
|
|
transport: 'tmux';
|
|
tmux: {
|
|
socketName: string;
|
|
holderSession: string;
|
|
};
|
|
defaults: {
|
|
workingDirectory: string;
|
|
};
|
|
runtimes: Record<string, { resetCommand: string }>;
|
|
agents: FleetAgent[];
|
|
}
|
|
|
|
export interface FleetPaths {
|
|
mosaicHome: string;
|
|
rosterPath: string;
|
|
toolsDir: string;
|
|
fleetToolsDir: string;
|
|
tmuxToolsDir: string;
|
|
systemdUserDir: string;
|
|
agentEnvDir: string;
|
|
}
|
|
|
|
type FleetServiceAction = 'start' | 'stop' | 'restart' | 'status';
|
|
|
|
const DEFAULT_SOCKET_NAME = 'mosaic-factory';
|
|
const DEFAULT_HOLDER_SESSION = '_holder';
|
|
const DEFAULT_WORKING_DIRECTORY = '~/src';
|
|
|
|
/**
|
|
* Default poll interval (ms) between capture-pane checks in `send --verify`.
|
|
* Kept short enough to react quickly while not hammering tmux on busy hosts.
|
|
*/
|
|
export const VERIFY_POLL_INTERVAL_MS = 400;
|
|
|
|
/**
|
|
* Default total timeout (ms) for the `send --verify` polling loop.
|
|
* Configurable via `--verify-timeout <ms>` on `agent send`.
|
|
*/
|
|
export const VERIFY_DEFAULT_TIMEOUT_MS = 6_000;
|
|
const DEFAULT_RUNTIME_RESETS: Record<string, { resetCommand: string }> = {
|
|
claude: { resetCommand: '/clear' },
|
|
codex: { resetCommand: '/clear' },
|
|
opencode: { resetCommand: '/clear' },
|
|
pi: { resetCommand: '/new' },
|
|
};
|
|
|
|
export function resolveFleetPaths(mosaicHome = defaultMosaicHome()): FleetPaths {
|
|
return {
|
|
mosaicHome,
|
|
rosterPath: join(mosaicHome, 'fleet', 'roster.yaml'),
|
|
toolsDir: join(mosaicHome, 'tools'),
|
|
fleetToolsDir: join(mosaicHome, 'tools', 'fleet'),
|
|
tmuxToolsDir: join(mosaicHome, 'tools', 'tmux'),
|
|
systemdUserDir: join(homedir(), '.config', 'systemd', 'user'),
|
|
agentEnvDir: join(mosaicHome, 'fleet', 'agents'),
|
|
};
|
|
}
|
|
|
|
function defaultMosaicHome(): string {
|
|
return join(homedir(), '.config', 'mosaic');
|
|
}
|
|
|
|
function assertDefaultMosaicHomeForSystemd(mosaicHome: string): void {
|
|
if (resolve(mosaicHome) !== resolve(defaultMosaicHome())) {
|
|
throw new Error(
|
|
`install-systemd only supports the default Mosaic home (${defaultMosaicHome()}) because the user systemd units use %h/.config/mosaic paths.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
export async function loadFleetRoster(path: string): Promise<FleetRoster> {
|
|
const rawText = await readFile(path, 'utf8');
|
|
const parsed = parseRosterText(rawText, path);
|
|
return normalizeRoster(parsed);
|
|
}
|
|
|
|
export function getRosterAgent(roster: FleetRoster, name: string): FleetAgent {
|
|
const agent = roster.agents.find((candidate) => candidate.name === name);
|
|
if (!agent) {
|
|
throw new Error(`Agent "${name}" is not in the fleet roster.`);
|
|
}
|
|
return agent;
|
|
}
|
|
|
|
export function generateAgentEnv(roster: FleetRoster, agent: FleetAgent): string {
|
|
const workingDirectory = agent.workingDirectory ?? roster.defaults.workingDirectory;
|
|
return [
|
|
`MOSAIC_AGENT_NAME=${shellEnvValue(agent.name)}`,
|
|
`MOSAIC_AGENT_RUNTIME=${shellEnvValue(agent.runtime)}`,
|
|
`MOSAIC_AGENT_WORKDIR=${shellEnvValue(expandHome(workingDirectory))}`,
|
|
`MOSAIC_TMUX_SOCKET=${shellEnvValue(roster.tmux.socketName)}`,
|
|
'',
|
|
].join('\n');
|
|
}
|
|
|
|
export function mergeAgentEnv(generatedEnv: string, existingEnv?: string): string {
|
|
if (!existingEnv?.trim()) {
|
|
return generatedEnv;
|
|
}
|
|
const generatedKeys = new Set(
|
|
generatedEnv
|
|
.split('\n')
|
|
.map((line) => line.match(/^([A-Za-z_][A-Za-z0-9_]*)=/)?.[1])
|
|
.filter((key): key is string => key !== undefined),
|
|
);
|
|
const preservedLines = existingEnv.split('\n').filter((line) => {
|
|
if (!line.trim()) {
|
|
return false;
|
|
}
|
|
const key = line.match(/^([A-Za-z_][A-Za-z0-9_]*)=/)?.[1];
|
|
return key === undefined || !generatedKeys.has(key);
|
|
});
|
|
if (preservedLines.length === 0) {
|
|
return generatedEnv;
|
|
}
|
|
return [generatedEnv.trimEnd(), ...preservedLines, ''].join('\n');
|
|
}
|
|
|
|
export function buildFleetServiceCommand(action: FleetServiceAction, agentName?: string): string[] {
|
|
const service = agentName ? `mosaic-agent@${agentName}.service` : 'mosaic-tmux-holder.service';
|
|
return ['systemctl', '--user', action, service];
|
|
}
|
|
|
|
export function buildAgentSendCommand(
|
|
paths: FleetPaths,
|
|
agentName: string,
|
|
message: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
sourceLabel = getDefaultOperatorSourceLabel(),
|
|
): string[] {
|
|
return [
|
|
join(paths.tmuxToolsDir, 'agent-send.sh'),
|
|
'-L',
|
|
socketName,
|
|
'-S',
|
|
sourceLabel,
|
|
'-s',
|
|
agentName,
|
|
'-m',
|
|
message,
|
|
];
|
|
}
|
|
|
|
export function getDefaultOperatorSourceLabel(): string {
|
|
const shortHostname = hostname().split('.')[0] || 'localhost';
|
|
return `${shortHostname}:operator`;
|
|
}
|
|
|
|
export function buildAgentResetCommand(
|
|
paths: FleetPaths,
|
|
agentName: string,
|
|
resetCommand: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return [
|
|
join(paths.tmuxToolsDir, 'send-message.sh'),
|
|
'-L',
|
|
socketName,
|
|
'-t',
|
|
`=${agentName}`,
|
|
'-m',
|
|
resetCommand,
|
|
];
|
|
}
|
|
|
|
export function buildAgentTailCommand(
|
|
agentName: string,
|
|
lines: number,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return [
|
|
'tmux',
|
|
'-L',
|
|
socketName,
|
|
'capture-pane',
|
|
'-t',
|
|
`=${agentName}:0.0`,
|
|
'-p',
|
|
'-S',
|
|
`-${lines}`,
|
|
];
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Fleet ps — phase 2 observability helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export const HEARTBEAT_INTERVAL_MS = 15_000;
|
|
export const HEARTBEAT_HEALTHY_MULTIPLIER = 3;
|
|
|
|
export interface HeartbeatInfo {
|
|
ts: Date | null;
|
|
pid: number | null;
|
|
status: 'ok' | 'busy' | null;
|
|
/** healthy | stale | unknown */
|
|
health: 'healthy' | 'stale' | 'unknown';
|
|
ageMs: number | null;
|
|
}
|
|
|
|
export interface AgentPsRow {
|
|
name: string;
|
|
tenant_id: string;
|
|
host: string;
|
|
runtime: string;
|
|
systemdActive: string;
|
|
systemdEnabled: string;
|
|
paneAlive: boolean;
|
|
panePid: number | null;
|
|
paneCommand: string | null;
|
|
idleSeconds: number | null;
|
|
heartbeat: HeartbeatInfo;
|
|
/** roster runtime !== actual pane command */
|
|
driftFlag: boolean;
|
|
/** active but UnitFileState=disabled */
|
|
bootEnableWarning: boolean;
|
|
}
|
|
|
|
/**
|
|
* Returns the systemd show command for an agent unit (active+enabled state).
|
|
* Returns: `systemctl --user show <unit> -p ActiveState -p SubState -p UnitFileState`
|
|
*/
|
|
export function buildSystemdShowCommand(agentName: string): string[] {
|
|
const unit = `mosaic-agent@${agentName}.service`;
|
|
return [
|
|
'systemctl',
|
|
'--user',
|
|
'show',
|
|
unit,
|
|
'-p',
|
|
'ActiveState',
|
|
'-p',
|
|
'SubState',
|
|
'-p',
|
|
'UnitFileState',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Returns the tmux list-panes command for an agent pane.
|
|
* Format: `#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}`
|
|
*/
|
|
export function buildTmuxListPanesCommand(
|
|
agentName: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return [
|
|
'tmux',
|
|
'-L',
|
|
socketName,
|
|
'list-panes',
|
|
'-t',
|
|
`=${agentName}:0.0`,
|
|
'-F',
|
|
'#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Returns the heartbeat file path for an agent.
|
|
*/
|
|
export function heartbeatPath(agentName: string, mosaicHome = defaultMosaicHome()): string {
|
|
return join(mosaicHome, 'fleet', 'run', `${agentName}.hb`);
|
|
}
|
|
|
|
/**
|
|
* Parse a heartbeat file's contents into a HeartbeatInfo.
|
|
* File format (one key=value per line):
|
|
* ts=<iso8601>
|
|
* pid=<pid>
|
|
* status=<ok|busy>
|
|
*/
|
|
export function parseHeartbeat(content: string | null, nowMs = Date.now()): HeartbeatInfo {
|
|
if (content === null) {
|
|
return { ts: null, pid: null, status: null, health: 'unknown', ageMs: null };
|
|
}
|
|
const lines = content.split('\n');
|
|
let ts: Date | null = null;
|
|
let pid: number | null = null;
|
|
let status: 'ok' | 'busy' | null = null;
|
|
for (const line of lines) {
|
|
const [key, ...rest] = line.split('=');
|
|
const val = rest.join('=').trim();
|
|
if (key === 'ts' && val) {
|
|
const d = new Date(val);
|
|
if (!Number.isNaN(d.getTime())) ts = d;
|
|
} else if (key === 'pid' && val) {
|
|
const n = Number.parseInt(val, 10);
|
|
if (Number.isFinite(n)) pid = n;
|
|
} else if (key === 'status' && (val === 'ok' || val === 'busy')) {
|
|
status = val;
|
|
}
|
|
}
|
|
const thresholdMs = HEARTBEAT_INTERVAL_MS * HEARTBEAT_HEALTHY_MULTIPLIER;
|
|
let health: 'healthy' | 'stale' | 'unknown' = 'unknown';
|
|
let ageMs: number | null = null;
|
|
if (ts !== null) {
|
|
ageMs = nowMs - ts.getTime();
|
|
health = ageMs <= thresholdMs ? 'healthy' : 'stale';
|
|
}
|
|
return { ts, pid, status, health, ageMs };
|
|
}
|
|
|
|
/**
|
|
* Parse the output of `systemctl --user show ... -p ActiveState -p SubState -p UnitFileState`
|
|
* Returns an object with the three properties.
|
|
*/
|
|
export function parseSystemdShow(output: string): {
|
|
ActiveState: string;
|
|
SubState: string;
|
|
UnitFileState: string;
|
|
} {
|
|
const result: Record<string, string> = {};
|
|
for (const line of output.split('\n')) {
|
|
const eq = line.indexOf('=');
|
|
if (eq !== -1) {
|
|
result[line.slice(0, eq)] = line.slice(eq + 1).trim();
|
|
}
|
|
}
|
|
return {
|
|
ActiveState: result['ActiveState'] ?? 'unknown',
|
|
SubState: result['SubState'] ?? 'unknown',
|
|
UnitFileState: result['UnitFileState'] ?? 'unknown',
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse the output of `tmux list-panes -F '#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}'`
|
|
* pane_activity is a Unix epoch timestamp (seconds).
|
|
*/
|
|
export function parseTmuxListPanes(
|
|
output: string,
|
|
nowMs = Date.now(),
|
|
): { pid: number | null; command: string | null; dead: boolean; idleSeconds: number | null } {
|
|
const line = output.trim().split('\n')[0];
|
|
if (!line) {
|
|
return { pid: null, command: null, dead: true, idleSeconds: null };
|
|
}
|
|
// format: <pid> <command> <dead(0|1)> <activity_epoch>
|
|
const parts = line.split(' ');
|
|
const pid = parts[0] ? (Number.isFinite(Number(parts[0])) ? Number(parts[0]) : null) : null;
|
|
const command = parts[1] ?? null;
|
|
const dead = parts[2] === '1';
|
|
const activityEpoch = parts[3] ? Number(parts[3]) : NaN;
|
|
const idleSeconds =
|
|
Number.isFinite(activityEpoch) && activityEpoch > 0
|
|
? Math.floor((nowMs - activityEpoch * 1000) / 1000)
|
|
: null;
|
|
return { pid, command, dead, idleSeconds };
|
|
}
|
|
|
|
/**
|
|
* Determine if there is a runtime drift: roster says one runtime but the pane
|
|
* is actually running something from a different runtime. We detect this by
|
|
* checking if the pane command doesn't match a known canonical command for the
|
|
* roster's declared runtime.
|
|
*
|
|
* Known canonical commands per runtime:
|
|
* claude → claude
|
|
* codex → codex
|
|
* opencode → opencode
|
|
* pi → pi
|
|
*
|
|
* If the pane is running something else (e.g., python3/dogfood-agent.py) for
|
|
* an agent whose roster runtime is "pi", that's a drift.
|
|
*/
|
|
export function detectDrift(rosterRuntime: string, paneCommand: string | null): boolean {
|
|
if (!paneCommand) return false;
|
|
const knownCommands: Record<string, string[]> = {
|
|
claude: ['claude'],
|
|
codex: ['codex'],
|
|
opencode: ['opencode'],
|
|
pi: ['pi'],
|
|
};
|
|
const expected = knownCommands[rosterRuntime];
|
|
if (!expected) return false;
|
|
return !expected.includes(paneCommand);
|
|
}
|
|
|
|
/**
|
|
* Returns the default tenant_id (OS username) and host (short hostname).
|
|
* These MUST appear in every --json record for multi-tenant/multi-host zero-foreclosure.
|
|
*/
|
|
export function getDefaultTenantAndHost(): { tenant_id: string; host: string } {
|
|
let tenant_id: string;
|
|
try {
|
|
tenant_id = userInfo().username;
|
|
} catch {
|
|
tenant_id = process.env['USER'] ?? process.env['LOGNAME'] ?? 'unknown';
|
|
}
|
|
const host = hostname().split('.')[0] || 'localhost';
|
|
return { tenant_id, host };
|
|
}
|
|
|
|
/**
|
|
* Builds the command to create a grouped viewer session targeting an agent session.
|
|
* A grouped session shares the same windows as the target but gets INDEPENDENT sizing,
|
|
* so attaching the viewer never resizes the agent's window.
|
|
*
|
|
* The viewer session name is derived from the agent name and a unique suffix (typically
|
|
* the caller's PID) so multiple concurrent watchers don't collide.
|
|
*
|
|
* Usage sequence:
|
|
* 1. Run buildAgentWatchCreateViewerCommand → create grouped session (via capturing runner).
|
|
* 2. Run buildAgentWatchAttachCommand → attach -r to the viewer session (via interactiveRunner).
|
|
* 3. Run buildAgentWatchKillViewerCommand → kill the viewer session on detach (via capturing runner).
|
|
*/
|
|
export function buildAgentWatchCreateViewerCommand(
|
|
agentName: string,
|
|
viewerSessionName: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return [
|
|
'tmux',
|
|
'-L',
|
|
socketName,
|
|
'new-session',
|
|
'-d',
|
|
'-t',
|
|
`=${agentName}`,
|
|
'-s',
|
|
viewerSessionName,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Builds the interactive attach command for a viewer session (read-only).
|
|
* Must be run via interactiveRunner (stdio: 'inherit').
|
|
*/
|
|
export function buildAgentWatchAttachCommand(
|
|
viewerSessionName: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return ['tmux', '-L', socketName, 'attach', '-r', '-t', viewerSessionName];
|
|
}
|
|
|
|
/**
|
|
* Builds the kill-session command to clean up a viewer session after detach.
|
|
* Keeps the agent session intact.
|
|
*/
|
|
export function buildAgentWatchKillViewerCommand(
|
|
viewerSessionName: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return ['tmux', '-L', socketName, 'kill-session', '-t', viewerSessionName];
|
|
}
|
|
|
|
/**
|
|
* Returns a unique viewer session name for a given agent.
|
|
* Uses process.pid so concurrent watchers produce distinct names.
|
|
*/
|
|
export function buildViewerSessionName(agentName: string): string {
|
|
return `${agentName}-watch-${process.pid}`;
|
|
}
|
|
|
|
/**
|
|
* @deprecated Use buildAgentWatchCreateViewerCommand + buildAgentWatchAttachCommand +
|
|
* buildAgentWatchKillViewerCommand instead. This bare attach targets the agent session
|
|
* directly and can resize it when the viewer terminal is smaller than the agent's window.
|
|
*
|
|
* Kept for backward compatibility only.
|
|
*/
|
|
export function buildAgentWatchCommand(
|
|
agentName: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
): string[] {
|
|
return ['tmux', '-L', socketName, 'attach', '-r', '-t', `=${agentName}`];
|
|
}
|
|
|
|
/**
|
|
* Builds the capture-pane command used to verify that agent send was accepted
|
|
* (not left as an unsubmitted draft). Captures the last N lines and checks for
|
|
* the draft heuristic.
|
|
*/
|
|
export function buildAgentVerifyAcceptedCommand(
|
|
agentName: string,
|
|
socketName = DEFAULT_SOCKET_NAME,
|
|
lines = 5,
|
|
): string[] {
|
|
return [
|
|
'tmux',
|
|
'-L',
|
|
socketName,
|
|
'capture-pane',
|
|
'-t',
|
|
`=${agentName}:0.0`,
|
|
'-p',
|
|
'-S',
|
|
`-${lines}`,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Result of a send-verify check.
|
|
* - 'accepted': positive evidence that the message was accepted (response content present).
|
|
* - 'draft': last non-empty line matches the draft heuristic (unsubmitted input).
|
|
* - 'unverifiable': pane did not change after send (stale or blank) — we cannot determine
|
|
* acceptance; fails closed per FR-5.
|
|
*/
|
|
export type SendVerifyResult = 'accepted' | 'draft' | 'unverifiable';
|
|
|
|
/**
|
|
* Classify the result of a send-verify check by comparing BEFORE and AFTER pane snapshots.
|
|
*
|
|
* This is the primary classifier for `send --verify`. It addresses the stale-pane
|
|
* false-success problem: if the pane content did not change after the send, the new
|
|
* message never registered in the TUI (wedged pane, send dropped, etc.).
|
|
*
|
|
* Classification logic:
|
|
* 'unverifiable' — AFTER is blank/empty OR AFTER == BEFORE (no pane change after send).
|
|
* 'draft' — AFTER differs from BEFORE AND the last non-empty line of AFTER starts
|
|
* with the draft pattern ("> "); message was typed but not submitted.
|
|
* 'accepted' — AFTER differs from BEFORE AND AFTER does not end in a draft line;
|
|
* positive evidence that the TUI accepted the message.
|
|
*
|
|
* NOTE on blank AFTER: Full-screen TUIs (claude, codex, opencode, pi) render blank for
|
|
* `tmux capture-pane`. A blank AFTER is indistinguishable from a wedged pane, so it
|
|
* is always classified 'unverifiable' (fail-closed).
|
|
*
|
|
* NOTE on definitive acceptance: Phase-2 can only observe the pane surface — there is no
|
|
* runtime acknowledgement (heartbeat-ack) at this phase. The pane-change check is the best
|
|
* signal available against an opaque TUI. Definitive acceptance ultimately requires a
|
|
* runtime acknowledgement (Phase-3 heartbeat-ack).
|
|
*
|
|
* Draft heuristic: a last non-empty line (after stripping ANSI escapes) that starts
|
|
* with "> " is treated as an unsubmitted input line. This pattern is specific to
|
|
* pi/claude TUIs; draft detection for codex/opencode TUIs is best-effort only.
|
|
*
|
|
* FR-5 requires `send --verify` to return non-zero when delivery cannot be verified.
|
|
*
|
|
* @param before Pane snapshot captured BEFORE the send command.
|
|
* @param after Pane snapshot captured AFTER the send command (after the delay).
|
|
*/
|
|
export function classifySendResult(before: string, after: string): SendVerifyResult {
|
|
const afterLines = after.split('\n').filter((l) => l.trim().length > 0);
|
|
// Blank/empty AFTER => full-screen TUI rendered blank, or pane is wedged => unverifiable.
|
|
if (afterLines.length === 0) return 'unverifiable';
|
|
// No change => message didn't register in the TUI (stale/wedged pane) => unverifiable.
|
|
if (after === before) return 'unverifiable';
|
|
// AFTER differs from BEFORE — check whether the pane is now showing a draft line.
|
|
const lastLine = afterLines[afterLines.length - 1]!;
|
|
const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim();
|
|
// Heuristic: if stripped last line starts with "> " — that's the common draft pattern
|
|
// in pi/claude TUIs for showing user input before submission.
|
|
// NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode
|
|
// TUIs is best-effort only and may miss other unsubmitted-input indicators.
|
|
if (/^>\s/.test(stripped)) return 'draft';
|
|
return 'accepted';
|
|
}
|
|
|
|
/**
|
|
* Check whether a send was accepted (not left as draft), using only the AFTER snapshot.
|
|
*
|
|
* @deprecated Prefer classifySendResult(before, after) which guards against stale-pane
|
|
* false-successes. This single-snapshot variant cannot detect a wedged pane that still
|
|
* shows old non-empty content — it will incorrectly return 'accepted' in that case.
|
|
*
|
|
* Retained for unit-test compatibility with single-snapshot assertions.
|
|
*
|
|
* Returns:
|
|
* 'unverifiable' — blank/empty capture (full-screen TUIs render blank; we cannot tell).
|
|
* 'draft' — last non-empty line matches the draft heuristic.
|
|
* 'accepted' — non-blank and not a draft line (but may be stale — see above).
|
|
*/
|
|
export function isSendAccepted(capturedOutput: string): SendVerifyResult {
|
|
const lines = capturedOutput.split('\n').filter((l) => l.trim().length > 0);
|
|
// Blank/empty capture => full-screen TUI rendered blank => unverifiable.
|
|
// This is the known-unverifiable case; fail closed (not treated as success).
|
|
if (lines.length === 0) return 'unverifiable';
|
|
const lastLine = lines[lines.length - 1]!;
|
|
const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim();
|
|
// Heuristic: if stripped last line starts with "> " — that's the common draft pattern
|
|
// in pi/claude TUIs for showing user input before submission.
|
|
// NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode
|
|
// TUIs is best-effort only and may miss other unsubmitted-input indicators.
|
|
if (/^>\s/.test(stripped)) return 'draft';
|
|
return 'accepted';
|
|
}
|
|
|
|
export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
|
|
const runner = deps.runner ?? runCommand;
|
|
const paths = resolveFleetPaths(deps.mosaicHome);
|
|
const frameworkRoot = deps.frameworkRoot ?? resolveFrameworkRoot();
|
|
|
|
const cmd = program
|
|
.command('fleet')
|
|
.description('Manage the local Mosaic tmux fleet canary')
|
|
.option('--mosaic-home <path>', 'Mosaic home directory', paths.mosaicHome)
|
|
.option('--roster <path>', 'Fleet roster path');
|
|
|
|
cmd
|
|
.command('init')
|
|
.description('Initialize a local fleet roster')
|
|
.option('--profile <name>', 'Roster profile: minimal or local-canary', 'minimal')
|
|
.option('--write', 'Write the roster to Mosaic home')
|
|
.option('--force', 'Overwrite an existing roster when used with --write')
|
|
.action(async (opts: { profile: string; write?: boolean; force?: boolean }) => {
|
|
const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
|
|
const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
|
|
const profile = parseInitProfile(opts.profile);
|
|
const source = join(frameworkRoot, 'fleet', 'examples', `${profile}.yaml`);
|
|
const content = await readFile(source, 'utf8');
|
|
if (!opts.write) {
|
|
console.log(content.trimEnd());
|
|
return;
|
|
}
|
|
const destination = commandOpts.roster ?? activePaths.rosterPath;
|
|
if (!opts.force && (await canRead(destination))) {
|
|
throw new Error(
|
|
`Fleet roster already exists: ${destination}. Re-run with --force to overwrite.`,
|
|
);
|
|
}
|
|
await mkdir(dirname(destination), { recursive: true });
|
|
await writeFile(destination, content);
|
|
console.log(`Wrote fleet roster: ${destination}`);
|
|
});
|
|
|
|
cmd
|
|
.command('install')
|
|
.description('Install local fleet tools and user systemd units')
|
|
.action(async () => installFleet(cmd, frameworkRoot));
|
|
|
|
cmd
|
|
.command('install-systemd')
|
|
.description('Install local fleet tools and user systemd units')
|
|
.action(async () => installFleet(cmd, frameworkRoot));
|
|
|
|
for (const action of ['start', 'stop', 'restart'] as const) {
|
|
cmd
|
|
.command(`${action} [agent]`)
|
|
.description(`${action} the fleet holder or one agent`)
|
|
.action(async (agent?: string) => {
|
|
const roster = await loadRosterForCommand(cmd);
|
|
if (agent) {
|
|
getRosterAgent(roster, agent);
|
|
await runChecked(runner, buildFleetServiceCommand(action, agent));
|
|
return;
|
|
}
|
|
if (action === 'stop') {
|
|
await stopFleetBestEffort(
|
|
runner,
|
|
roster.agents.map((rosterAgent) => rosterAgent.name),
|
|
);
|
|
return;
|
|
}
|
|
await runChecked(runner, buildFleetServiceCommand(action));
|
|
for (const rosterAgent of roster.agents) {
|
|
await runChecked(runner, buildFleetServiceCommand(action, rosterAgent.name));
|
|
}
|
|
});
|
|
}
|
|
|
|
cmd
|
|
.command('status [agent]')
|
|
.description('Show fleet holder or agent systemd status')
|
|
.option('--json', 'Print JSON status')
|
|
.action(async (agent: string | undefined, opts: { json?: boolean }) => {
|
|
if (agent) {
|
|
const roster = await loadRosterForCommand(cmd);
|
|
getRosterAgent(roster, agent);
|
|
}
|
|
const result = await runner(...splitCommand(buildFleetServiceCommand('status', agent)));
|
|
if (opts.json) {
|
|
console.log(
|
|
JSON.stringify({
|
|
exitCode: result.exitCode,
|
|
stdout: result.stdout,
|
|
stderr: result.stderr,
|
|
}),
|
|
);
|
|
setExitCodeFromResult(result);
|
|
return;
|
|
}
|
|
writeCommandOutput(result);
|
|
});
|
|
|
|
cmd
|
|
.command('verify')
|
|
.description('Verify the local canary holder and roster sessions on the isolated socket')
|
|
.action(async () => {
|
|
const roster = await loadRosterForCommand(cmd);
|
|
const socketName = roster.tmux.socketName;
|
|
await runChecked(runner, [
|
|
'tmux',
|
|
'-L',
|
|
socketName,
|
|
'has-session',
|
|
'-t',
|
|
`=${roster.tmux.holderSession}:0.0`,
|
|
]);
|
|
for (const agent of roster.agents) {
|
|
await runChecked(runner, [
|
|
'tmux',
|
|
'-L',
|
|
socketName,
|
|
'has-session',
|
|
'-t',
|
|
`=${agent.name}:0.0`,
|
|
]);
|
|
}
|
|
console.log(`Verified fleet on tmux socket ${socketName}.`);
|
|
});
|
|
|
|
cmd
|
|
.command('ps')
|
|
.description('Show real-time status for all roster agents (systemd + tmux + heartbeat)')
|
|
.option('--json', 'Print JSON array')
|
|
.action(async (opts: { json?: boolean }) => {
|
|
const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>();
|
|
const activePaths = resolveFleetPaths(commandOpts.mosaicHome);
|
|
const roster = await loadRosterForCommand(cmd);
|
|
const { tenant_id, host } = getDefaultTenantAndHost();
|
|
const nowMs = Date.now();
|
|
|
|
const rows: AgentPsRow[] = [];
|
|
|
|
for (const agent of roster.agents) {
|
|
// systemd show
|
|
const showResult = await runner(...splitCommand(buildSystemdShowCommand(agent.name)));
|
|
const sysInfo = parseSystemdShow(showResult.stdout);
|
|
|
|
// tmux list-panes
|
|
const panesResult = await runner(
|
|
...splitCommand(buildTmuxListPanesCommand(agent.name, roster.tmux.socketName)),
|
|
);
|
|
const paneInfo = parseTmuxListPanes(panesResult.stdout, nowMs);
|
|
|
|
// heartbeat
|
|
const hbFile = heartbeatPath(agent.name, activePaths.mosaicHome);
|
|
let hbContent: string | null = null;
|
|
try {
|
|
hbContent = await readFile(hbFile, 'utf8');
|
|
} catch {
|
|
hbContent = null;
|
|
}
|
|
const hb = parseHeartbeat(hbContent, nowMs);
|
|
|
|
// drift and boot-enable
|
|
const driftFlag = detectDrift(agent.runtime, paneInfo.command);
|
|
const bootEnableWarning =
|
|
sysInfo.ActiveState === 'active' && sysInfo.UnitFileState === 'disabled';
|
|
|
|
rows.push({
|
|
name: agent.name,
|
|
tenant_id,
|
|
host,
|
|
runtime: agent.runtime,
|
|
systemdActive: sysInfo.ActiveState,
|
|
systemdEnabled: sysInfo.UnitFileState,
|
|
paneAlive: !paneInfo.dead,
|
|
panePid: paneInfo.pid,
|
|
paneCommand: paneInfo.command,
|
|
idleSeconds: paneInfo.idleSeconds,
|
|
heartbeat: hb,
|
|
driftFlag,
|
|
bootEnableWarning,
|
|
});
|
|
}
|
|
|
|
if (opts.json) {
|
|
console.log(JSON.stringify(rows, null, 2));
|
|
return;
|
|
}
|
|
|
|
// Table output
|
|
const header = [
|
|
'NAME'.padEnd(18),
|
|
'TENANT'.padEnd(12),
|
|
'HOST'.padEnd(12),
|
|
'RUNTIME'.padEnd(10),
|
|
'SYSTEMD'.padEnd(16),
|
|
'PANE'.padEnd(8),
|
|
'PID'.padEnd(8),
|
|
'IDLE'.padEnd(8),
|
|
'HB'.padEnd(12),
|
|
'FLAGS',
|
|
].join(' ');
|
|
console.log(header);
|
|
console.log('-'.repeat(header.length));
|
|
|
|
for (const row of rows) {
|
|
const systemd = `${row.systemdActive}/${row.systemdEnabled}`;
|
|
const pane = row.paneAlive ? 'alive' : 'dead';
|
|
const pid = row.panePid !== null ? String(row.panePid) : '-';
|
|
const idle = row.idleSeconds !== null ? `${row.idleSeconds}s` : '-';
|
|
const hbAge =
|
|
row.heartbeat.ageMs !== null
|
|
? `${Math.round(row.heartbeat.ageMs / 1000)}s/${row.heartbeat.health}`
|
|
: `unknown`;
|
|
const flags: string[] = [];
|
|
if (row.driftFlag) flags.push('DRIFT');
|
|
if (row.bootEnableWarning) flags.push('BOOT-ENABLE');
|
|
|
|
console.log(
|
|
[
|
|
row.name.padEnd(18),
|
|
row.tenant_id.padEnd(12),
|
|
row.host.padEnd(12),
|
|
row.runtime.padEnd(10),
|
|
systemd.padEnd(16),
|
|
pane.padEnd(8),
|
|
pid.padEnd(8),
|
|
idle.padEnd(8),
|
|
hbAge.padEnd(12),
|
|
flags.join(','),
|
|
].join(' '),
|
|
);
|
|
}
|
|
});
|
|
|
|
return cmd;
|
|
}
|
|
|
|
export function registerFleetAgentCommands(
|
|
agentCommand: Command,
|
|
deps: FleetCommandDeps = {},
|
|
): void {
|
|
const runner = deps.runner ?? runCommand;
|
|
const iRunner = deps.interactiveRunner ?? spawnInteractive;
|
|
const sleepFn = deps.sleepFn ?? defaultSleep;
|
|
|
|
agentCommand
|
|
.command('roster')
|
|
.description('List agents from the local fleet roster')
|
|
.option('--json', 'Print JSON')
|
|
.action(async (opts: { json?: boolean }) => {
|
|
const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
|
|
if (opts.json) {
|
|
console.log(JSON.stringify(roster, null, 2));
|
|
return;
|
|
}
|
|
for (const agent of roster.agents) {
|
|
console.log(`${agent.name}\t${agent.runtime}\t${agent.className}`);
|
|
}
|
|
});
|
|
|
|
agentCommand
|
|
.command('status [agent]')
|
|
.description('Show tmux status for the local fleet or one agent')
|
|
.option('--json', 'Print JSON')
|
|
.action(async (agent: string | undefined, opts: { json?: boolean }) => {
|
|
const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
|
|
if (agent) {
|
|
getRosterAgent(roster, agent);
|
|
}
|
|
const command = agent
|
|
? ['tmux', '-L', roster.tmux.socketName, 'has-session', '-t', `=${agent}:0.0`]
|
|
: ['tmux', '-L', roster.tmux.socketName, 'ls'];
|
|
const result = await runner(...splitCommand(command));
|
|
if (opts.json) {
|
|
console.log(
|
|
JSON.stringify({
|
|
exitCode: result.exitCode,
|
|
stdout: result.stdout,
|
|
stderr: result.stderr,
|
|
}),
|
|
);
|
|
setExitCodeFromResult(result);
|
|
return;
|
|
}
|
|
writeCommandOutput(result);
|
|
});
|
|
|
|
agentCommand
|
|
.command('send <agent>')
|
|
.description('Send a message to a local fleet agent')
|
|
.requiredOption('--message <text>', 'Message text')
|
|
.option('--source-label <label>', 'Source label for the message preamble')
|
|
.option('--source <label>', 'Alias for --source-label')
|
|
.option(
|
|
'--verify',
|
|
'Verify message was accepted (not left as a draft); exit non-zero if unverifiable',
|
|
)
|
|
.option(
|
|
'--verify-timeout <ms>',
|
|
`Maximum time (ms) to poll for pane change when --verify is set (default: ${VERIFY_DEFAULT_TIMEOUT_MS})`,
|
|
String(VERIFY_DEFAULT_TIMEOUT_MS),
|
|
)
|
|
.action(
|
|
async (
|
|
agent: string,
|
|
opts: {
|
|
message: string;
|
|
sourceLabel?: string;
|
|
source?: string;
|
|
verify?: boolean;
|
|
verifyTimeout?: string;
|
|
},
|
|
) => {
|
|
const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
|
|
getRosterAgent(roster, agent);
|
|
const paths = resolveFleetPaths(
|
|
resolveMosaicHomeFromCommand(agentCommand, deps.mosaicHome),
|
|
);
|
|
const sourceLabel = opts.sourceLabel ?? opts.source ?? getDefaultOperatorSourceLabel();
|
|
if (opts.verify) {
|
|
const parsedTimeout =
|
|
opts.verifyTimeout !== undefined ? Number.parseInt(opts.verifyTimeout, 10) : Number.NaN;
|
|
const timeoutMs = Number.isFinite(parsedTimeout)
|
|
? Math.max(0, parsedTimeout)
|
|
: VERIFY_DEFAULT_TIMEOUT_MS;
|
|
|
|
// Capture BEFORE snapshot so we can detect stale-pane false-successes.
|
|
// A wedged pane that still shows old non-empty content must not be reported
|
|
// as 'accepted' — we compare BEFORE vs AFTER to guard against that case.
|
|
const beforeResult = await runner(
|
|
...splitCommand(buildAgentVerifyAcceptedCommand(agent, roster.tmux.socketName)),
|
|
);
|
|
if (beforeResult.exitCode !== 0) {
|
|
throw new Error(
|
|
`send --verify: could not capture pane output before send (tmux exited ${beforeResult.exitCode}).`,
|
|
);
|
|
}
|
|
const beforeSnapshot = beforeResult.stdout;
|
|
|
|
await runChecked(
|
|
runner,
|
|
buildAgentSendCommand(paths, agent, opts.message, roster.tmux.socketName, sourceLabel),
|
|
);
|
|
|
|
// Bounded polling loop: poll capture-pane every VERIFY_POLL_INTERVAL_MS up to
|
|
// timeoutMs. Return immediately when the pane shows 'accepted' or 'draft';
|
|
// keep polling while 'unverifiable' (no pane change yet). Fail closed after
|
|
// timeout with the existing "no pane change after send" message.
|
|
const deadline = Date.now() + timeoutMs;
|
|
let verifyResult: SendVerifyResult = 'unverifiable';
|
|
|
|
while (true) {
|
|
await sleepFn(VERIFY_POLL_INTERVAL_MS);
|
|
const afterResult = await runner(
|
|
...splitCommand(buildAgentVerifyAcceptedCommand(agent, roster.tmux.socketName)),
|
|
);
|
|
if (afterResult.exitCode !== 0) {
|
|
throw new Error(
|
|
`send --verify: could not capture pane output to verify acceptance (tmux exited ${afterResult.exitCode}).`,
|
|
);
|
|
}
|
|
verifyResult = classifySendResult(beforeSnapshot, afterResult.stdout);
|
|
// Definitive result — stop polling immediately.
|
|
if (verifyResult === 'accepted' || verifyResult === 'draft') {
|
|
break;
|
|
}
|
|
// Still unverifiable — check if we have time left to poll again.
|
|
if (Date.now() >= deadline) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (verifyResult === 'draft') {
|
|
process.exitCode = 1;
|
|
process.stderr.write(
|
|
`send --verify: message left as unsubmitted draft in agent "${agent}".\n`,
|
|
);
|
|
} else if (verifyResult === 'unverifiable') {
|
|
process.exitCode = 1;
|
|
process.stderr.write(
|
|
`send --verify: could not verify delivery (no pane change after send) for agent "${agent}".\n`,
|
|
);
|
|
}
|
|
} else {
|
|
await runChecked(
|
|
runner,
|
|
buildAgentSendCommand(paths, agent, opts.message, roster.tmux.socketName, sourceLabel),
|
|
);
|
|
}
|
|
},
|
|
);
|
|
|
|
agentCommand
|
|
.command('watch <agent>')
|
|
.description('Open a read-only view of a fleet agent tmux session (cannot send keystrokes)')
|
|
.action(async (agent: string) => {
|
|
const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
|
|
getRosterAgent(roster, agent);
|
|
|
|
// Use a GROUPED VIEWER SESSION to prevent the observer from resizing the agent's
|
|
// window. A bare `tmux attach -r` against the agent session itself still lets the
|
|
// client shrink the session to its terminal size; a grouped session gets INDEPENDENT
|
|
// sizing so the agent's window is never affected by the viewer's terminal dimensions.
|
|
//
|
|
// Sequence:
|
|
// 1. Create a throwaway grouped session targeting the agent (capturing runner).
|
|
// 2. Attach -r (read-only) to the viewer session (interactiveRunner / TTY).
|
|
// 3. Kill the viewer session on detach so stale sessions don't accumulate.
|
|
const viewerName = buildViewerSessionName(agent);
|
|
const socketName = roster.tmux.socketName;
|
|
|
|
await runChecked(runner, buildAgentWatchCreateViewerCommand(agent, viewerName, socketName));
|
|
|
|
const [bin, args] = splitCommand(buildAgentWatchAttachCommand(viewerName, socketName));
|
|
const exitCode = await iRunner(bin, args);
|
|
|
|
// Best-effort cleanup of the viewer session regardless of how the user detached.
|
|
// Errors here are intentionally suppressed — the agent session is unaffected.
|
|
const killResult = await runner(
|
|
...splitCommand(buildAgentWatchKillViewerCommand(viewerName, socketName)),
|
|
);
|
|
void killResult; // result is intentionally ignored
|
|
|
|
if (exitCode !== 0) {
|
|
process.exitCode = exitCode;
|
|
}
|
|
});
|
|
|
|
agentCommand
|
|
.command('reset <agent>')
|
|
.description('Reset a local fleet agent by sending the runtime reset command')
|
|
.option('--clear', 'Send /clear')
|
|
.option('--new', 'Send /new')
|
|
.action(async (agent: string, opts: { clear?: boolean; new?: boolean }) => {
|
|
const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
|
|
const rosterAgent = getRosterAgent(roster, agent);
|
|
const paths = resolveFleetPaths(resolveMosaicHomeFromCommand(agentCommand, deps.mosaicHome));
|
|
const resetCommand = opts.clear
|
|
? '/clear'
|
|
: opts.new
|
|
? '/new'
|
|
: (roster.runtimes[rosterAgent.runtime]?.resetCommand ?? '/clear');
|
|
await runChecked(
|
|
runner,
|
|
buildAgentResetCommand(paths, agent, resetCommand, roster.tmux.socketName),
|
|
);
|
|
});
|
|
|
|
agentCommand
|
|
.command('tail <agent>')
|
|
.description('Print recent pane output for a local fleet agent')
|
|
.option('-n, --lines <number>', 'Number of pane history lines', '80')
|
|
.action(async (agent: string, opts: { lines: string }) => {
|
|
const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
|
|
getRosterAgent(roster, agent);
|
|
const lines = Number.parseInt(opts.lines, 10);
|
|
const result = await runner(
|
|
...splitCommand(
|
|
buildAgentTailCommand(agent, Number.isFinite(lines) ? lines : 80, roster.tmux.socketName),
|
|
),
|
|
);
|
|
writeCommandOutput(result);
|
|
});
|
|
}
|
|
|
|
async function installFleet(cmd: Command, frameworkRoot: string): Promise<void> {
|
|
const activePaths = resolveFleetPaths(cmd.opts<{ mosaicHome: string }>().mosaicHome);
|
|
assertDefaultMosaicHomeForSystemd(activePaths.mosaicHome);
|
|
const roster = await loadRosterForCommand(cmd);
|
|
await mkdir(activePaths.fleetToolsDir, { recursive: true });
|
|
await mkdir(activePaths.tmuxToolsDir, { recursive: true });
|
|
await mkdir(activePaths.systemdUserDir, { recursive: true });
|
|
await mkdir(activePaths.agentEnvDir, { recursive: true });
|
|
|
|
const startAgentSessionPath = join(activePaths.fleetToolsDir, 'start-agent-session.sh');
|
|
const sendMessagePath = join(activePaths.tmuxToolsDir, 'send-message.sh');
|
|
const agentSendPath = join(activePaths.tmuxToolsDir, 'agent-send.sh');
|
|
const executableToolPaths = [startAgentSessionPath, sendMessagePath, agentSendPath];
|
|
await copyFile(
|
|
join(frameworkRoot, 'tools', 'fleet', 'start-agent-session.sh'),
|
|
startAgentSessionPath,
|
|
);
|
|
await copyFile(join(frameworkRoot, 'tools', 'tmux', 'send-message.sh'), sendMessagePath);
|
|
await copyFile(join(frameworkRoot, 'tools', 'tmux', 'agent-send.sh'), agentSendPath);
|
|
for (const toolPath of executableToolPaths) {
|
|
await chmod(toolPath, 0o755);
|
|
}
|
|
await copyFile(
|
|
join(frameworkRoot, 'systemd', 'user', 'mosaic-tmux-holder.service'),
|
|
join(activePaths.systemdUserDir, 'mosaic-tmux-holder.service'),
|
|
);
|
|
await copyFile(
|
|
join(frameworkRoot, 'systemd', 'user', 'mosaic-agent@.service'),
|
|
join(activePaths.systemdUserDir, 'mosaic-agent@.service'),
|
|
);
|
|
|
|
for (const agent of roster.agents) {
|
|
const envPath = join(activePaths.agentEnvDir, `${agent.name}.env`);
|
|
const existingEnv = (await canRead(envPath)) ? await readFile(envPath, 'utf8') : undefined;
|
|
await writeFile(envPath, mergeAgentEnv(generateAgentEnv(roster, agent), existingEnv));
|
|
}
|
|
|
|
console.log(`Installed fleet files for ${roster.agents.length} agent(s).`);
|
|
}
|
|
|
|
async function loadRosterForCommand(cmd: Command): Promise<FleetRoster> {
|
|
const opts = cmd.opts<{ mosaicHome: string; roster?: string }>();
|
|
return loadFleetRoster(await resolveRosterPath(opts.mosaicHome, opts.roster));
|
|
}
|
|
|
|
async function loadRosterFromAgentCommand(
|
|
command: Command,
|
|
mosaicHomeOverride?: string,
|
|
): Promise<FleetRoster> {
|
|
const opts = command.optsWithGlobals<{ mosaicHome?: string; roster?: string }>();
|
|
const mosaicHome = opts.mosaicHome ?? mosaicHomeOverride ?? defaultMosaicHome();
|
|
return loadFleetRoster(await resolveRosterPath(mosaicHome, opts.roster));
|
|
}
|
|
|
|
function resolveMosaicHomeFromCommand(command: Command, override?: string): string {
|
|
const opts = command.optsWithGlobals<{ mosaicHome?: string }>();
|
|
return opts.mosaicHome ?? override ?? defaultMosaicHome();
|
|
}
|
|
|
|
function parseRosterText(text: string, path: string): RawFleetRoster {
|
|
const trimmed = text.trim();
|
|
if (path.endsWith('.json')) {
|
|
return JSON.parse(trimmed) as RawFleetRoster;
|
|
}
|
|
return YAML.parse(trimmed) as RawFleetRoster;
|
|
}
|
|
|
|
function normalizeRoster(raw: RawFleetRoster): FleetRoster {
|
|
assertObject(raw, 'Fleet roster');
|
|
assertKnownKeys(raw, 'Fleet roster', [
|
|
'version',
|
|
'transport',
|
|
'tmux',
|
|
'defaults',
|
|
'runtimes',
|
|
'agents',
|
|
]);
|
|
if (raw.tmux !== undefined) {
|
|
assertObject(raw.tmux, 'Fleet roster tmux');
|
|
assertKnownKeys(raw.tmux, 'Fleet roster tmux', [
|
|
'socket_name',
|
|
'socketName',
|
|
'holder_session',
|
|
'holderSession',
|
|
]);
|
|
}
|
|
if (raw.defaults !== undefined) {
|
|
assertObject(raw.defaults, 'Fleet roster defaults');
|
|
assertKnownKeys(raw.defaults, 'Fleet roster defaults', [
|
|
'working_directory',
|
|
'workingDirectory',
|
|
]);
|
|
}
|
|
if (raw.runtimes !== undefined) {
|
|
assertObject(raw.runtimes, 'Fleet roster runtimes');
|
|
for (const [runtime, config] of Object.entries(raw.runtimes)) {
|
|
assertObject(config, `Fleet roster runtime "${runtime}"`);
|
|
assertKnownKeys(config, `Fleet roster runtime "${runtime}"`, [
|
|
'reset_command',
|
|
'resetCommand',
|
|
]);
|
|
}
|
|
}
|
|
if (raw.version !== 1) {
|
|
throw new Error('Fleet roster version must be 1.');
|
|
}
|
|
if (raw.transport !== 'tmux') {
|
|
throw new Error('Fleet roster transport must be "tmux".');
|
|
}
|
|
if (!Array.isArray(raw.agents) || raw.agents.length === 0) {
|
|
throw new Error('Fleet roster must define at least one agent.');
|
|
}
|
|
|
|
const agents = raw.agents.map(normalizeAgent);
|
|
assertUniqueAgentNames(agents);
|
|
|
|
return {
|
|
version: 1,
|
|
transport: 'tmux',
|
|
tmux: {
|
|
socketName: stringValue(
|
|
raw.tmux?.socket_name ?? raw.tmux?.socketName,
|
|
DEFAULT_SOCKET_NAME,
|
|
'Fleet roster tmux socket_name',
|
|
),
|
|
holderSession: stringValue(
|
|
raw.tmux?.holder_session ?? raw.tmux?.holderSession,
|
|
DEFAULT_HOLDER_SESSION,
|
|
'Fleet roster tmux holder_session',
|
|
),
|
|
},
|
|
defaults: {
|
|
workingDirectory: stringValue(
|
|
raw.defaults?.working_directory ?? raw.defaults?.workingDirectory,
|
|
DEFAULT_WORKING_DIRECTORY,
|
|
'Fleet roster defaults working_directory',
|
|
),
|
|
},
|
|
runtimes: normalizeRuntimes(raw.runtimes as RawFleetRoster['runtimes']),
|
|
agents,
|
|
};
|
|
}
|
|
|
|
function normalizeAgent(raw: NonNullable<RawFleetRoster['agents']>[number]): FleetAgent {
|
|
assertObject(raw, 'Fleet roster agent');
|
|
assertKnownKeys(raw, 'Fleet roster agent', [
|
|
'name',
|
|
'runtime',
|
|
'class',
|
|
'working_directory',
|
|
'workingDirectory',
|
|
'model_hint',
|
|
'modelHint',
|
|
'persistent_persona',
|
|
'persistentPersona',
|
|
'reset_between_tasks',
|
|
'resetBetweenTasks',
|
|
'kickstart_template',
|
|
'kickstartTemplate',
|
|
]);
|
|
const name = stringValue(raw.name, '', 'Fleet roster agent name');
|
|
const runtime = stringValue(
|
|
raw.runtime,
|
|
'',
|
|
`Fleet roster agent "${name || '<unknown>'}" runtime`,
|
|
);
|
|
if (!name || !/^[A-Za-z0-9_.-]+$/.test(name)) {
|
|
throw new Error(`Invalid fleet agent name: ${name || '<empty>'}`);
|
|
}
|
|
if (!runtime) {
|
|
throw new Error(`Fleet agent "${name}" must define a runtime.`);
|
|
}
|
|
return {
|
|
name,
|
|
runtime,
|
|
className: stringValue(raw.class, 'worker', `Fleet roster agent "${name}" class`),
|
|
workingDirectory: optionalString(
|
|
raw.working_directory ?? raw.workingDirectory,
|
|
`Fleet roster agent "${name}" working_directory`,
|
|
),
|
|
modelHint: optionalString(
|
|
raw.model_hint ?? raw.modelHint,
|
|
`Fleet roster agent "${name}" model_hint`,
|
|
),
|
|
persistentPersona: optionalBooleanOrString(
|
|
raw.persistent_persona ?? raw.persistentPersona,
|
|
`Fleet roster agent "${name}" persistent_persona`,
|
|
),
|
|
resetBetweenTasks: optionalBoolean(
|
|
raw.reset_between_tasks ?? raw.resetBetweenTasks,
|
|
`Fleet roster agent "${name}" reset_between_tasks`,
|
|
),
|
|
kickstartTemplate: optionalString(
|
|
raw.kickstart_template ?? raw.kickstartTemplate,
|
|
`Fleet roster agent "${name}" kickstart_template`,
|
|
),
|
|
};
|
|
}
|
|
|
|
function normalizeRuntimes(
|
|
raw: RawFleetRoster['runtimes'] | undefined,
|
|
): Record<string, { resetCommand: string }> {
|
|
const result: Record<string, { resetCommand: string }> = { ...DEFAULT_RUNTIME_RESETS };
|
|
for (const [runtime, config] of Object.entries(raw ?? {})) {
|
|
result[runtime] = {
|
|
resetCommand: stringValue(
|
|
config.reset_command ?? config.resetCommand,
|
|
'/clear',
|
|
`Fleet roster runtime "${runtime}" reset_command`,
|
|
),
|
|
};
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function assertObject(value: unknown, label: string): asserts value is Record<string, unknown> {
|
|
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
throw new Error(`${label} must be an object.`);
|
|
}
|
|
}
|
|
|
|
function assertKnownKeys(
|
|
value: Record<string, unknown>,
|
|
label: string,
|
|
allowedKeys: readonly string[],
|
|
): void {
|
|
const allowed = new Set(allowedKeys);
|
|
const unknownKeys = Object.keys(value).filter((key) => !allowed.has(key));
|
|
if (unknownKeys.length > 0) {
|
|
throw new Error(`${label} has unknown field(s): ${unknownKeys.join(', ')}.`);
|
|
}
|
|
}
|
|
|
|
function assertUniqueAgentNames(agents: FleetAgent[]): void {
|
|
const seen = new Set<string>();
|
|
for (const agent of agents) {
|
|
if (seen.has(agent.name)) {
|
|
throw new Error(`Fleet roster has duplicate agent name: ${agent.name}.`);
|
|
}
|
|
seen.add(agent.name);
|
|
}
|
|
}
|
|
|
|
function stringValue(value: unknown, fallback = '', label = 'Value'): string {
|
|
if (value === undefined) {
|
|
return fallback;
|
|
}
|
|
if (typeof value !== 'string') {
|
|
throw new Error(`${label} must be a string.`);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
function optionalString(value: unknown, label = 'Value'): string | undefined {
|
|
if (value === undefined) {
|
|
return undefined;
|
|
}
|
|
if (typeof value !== 'string') {
|
|
throw new Error(`${label} must be a string.`);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
function optionalBoolean(value: unknown, label = 'Value'): boolean | undefined {
|
|
if (value === undefined) {
|
|
return undefined;
|
|
}
|
|
if (typeof value !== 'boolean') {
|
|
throw new Error(`${label} must be a boolean.`);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
function optionalBooleanOrString(value: unknown, label = 'Value'): boolean | string | undefined {
|
|
if (value === undefined) {
|
|
return undefined;
|
|
}
|
|
if (typeof value !== 'boolean' && typeof value !== 'string') {
|
|
throw new Error(`${label} must be a boolean or string.`);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
function expandHome(path: string): string {
|
|
return path === '~' || path.startsWith('~/') ? join(homedir(), path.slice(2)) : path;
|
|
}
|
|
|
|
function shellEnvValue(value: string): string {
|
|
if (/^[A-Za-z0-9_./:=@+-]+$/.test(value)) {
|
|
return value;
|
|
}
|
|
return `'${value.replaceAll("'", "'\"'\"'")}'`;
|
|
}
|
|
|
|
async function stopFleetBestEffort(runner: CommandRunner, agentNames: string[]): Promise<void> {
|
|
const failures: string[] = [];
|
|
for (const agentName of agentNames) {
|
|
const command = buildFleetServiceCommand('stop', agentName);
|
|
const result = await runner(...splitCommand(command));
|
|
writeSuccessfulCommandOutput(result);
|
|
if (result.exitCode !== 0) {
|
|
failures.push(result.stderr || result.stdout || `Command failed: ${command.join(' ')}`);
|
|
}
|
|
}
|
|
|
|
const holderCommand = buildFleetServiceCommand('stop');
|
|
const holderResult = await runner(...splitCommand(holderCommand));
|
|
writeSuccessfulCommandOutput(holderResult);
|
|
if (holderResult.exitCode !== 0) {
|
|
failures.push(
|
|
holderResult.stderr || holderResult.stdout || `Command failed: ${holderCommand.join(' ')}`,
|
|
);
|
|
}
|
|
|
|
if (failures.length > 0) {
|
|
throw new Error(
|
|
`Fleet stop completed with ${failures.length} failure(s): ${failures.join('; ')}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
async function runChecked(runner: CommandRunner, command: string[]): Promise<void> {
|
|
const result = await runner(...splitCommand(command));
|
|
if (result.exitCode !== 0) {
|
|
throw new Error(result.stderr || result.stdout || `Command failed: ${command.join(' ')}`);
|
|
}
|
|
if (result.stdout) {
|
|
process.stdout.write(result.stdout);
|
|
}
|
|
}
|
|
|
|
function splitCommand(command: string[]): [string, string[]] {
|
|
const [bin, ...args] = command;
|
|
if (!bin) {
|
|
throw new Error('Cannot run an empty command.');
|
|
}
|
|
return [bin, args];
|
|
}
|
|
|
|
function parseInitProfile(profile: string): 'minimal' | 'local-canary' {
|
|
if (profile === 'minimal' || profile === 'local-canary') {
|
|
return profile;
|
|
}
|
|
throw new Error(`Unsupported fleet profile "${profile}". Use: minimal, local-canary.`);
|
|
}
|
|
|
|
function writeCommandOutput(result: CommandResult): void {
|
|
if (result.stdout) {
|
|
process.stdout.write(result.stdout);
|
|
} else if (result.stderr) {
|
|
process.stderr.write(result.stderr);
|
|
}
|
|
setExitCodeFromResult(result);
|
|
}
|
|
|
|
function writeSuccessfulCommandOutput(result: CommandResult): void {
|
|
if (result.exitCode !== 0) {
|
|
return;
|
|
}
|
|
if (result.stdout) {
|
|
process.stdout.write(result.stdout);
|
|
}
|
|
}
|
|
|
|
function setExitCodeFromResult(result: CommandResult): void {
|
|
if (result.exitCode !== 0) {
|
|
process.exitCode = result.exitCode;
|
|
}
|
|
}
|
|
|
|
function runCommand(command: string, args: string[]): Promise<CommandResult> {
|
|
return new Promise((resolvePromise) => {
|
|
const child = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
let stdout = '';
|
|
let stderr = '';
|
|
child.stdout.on('data', (chunk: Buffer) => {
|
|
stdout += chunk.toString('utf8');
|
|
});
|
|
child.stderr.on('data', (chunk: Buffer) => {
|
|
stderr += chunk.toString('utf8');
|
|
});
|
|
child.on('error', (error) => {
|
|
resolvePromise({ stdout, stderr: error.message, exitCode: 127 });
|
|
});
|
|
child.on('close', (code) => {
|
|
resolvePromise({ stdout, stderr, exitCode: code ?? 1 });
|
|
});
|
|
});
|
|
}
|
|
|
|
function resolveFrameworkRoot(): string {
|
|
const currentFile = fileURLToPath(import.meta.url);
|
|
return resolve(dirname(currentFile), '..', '..', 'framework');
|
|
}
|
|
|
|
/**
|
|
* Default InteractiveRunner implementation: spawns the command with inherited
|
|
* stdio so the terminal is passed through to the child process. This is required
|
|
* for commands like `tmux attach` that are full-screen interactive and cannot be
|
|
* captured through a pipe.
|
|
*/
|
|
function spawnInteractive(command: string, args: string[]): Promise<number> {
|
|
return new Promise((resolvePromise) => {
|
|
const child = spawn(command, args, { stdio: 'inherit' });
|
|
child.on('error', () => {
|
|
resolvePromise(127);
|
|
});
|
|
child.on('close', (code) => {
|
|
resolvePromise(code ?? 1);
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Default SleepFn implementation backed by setTimeout.
|
|
* Tests inject a stub to avoid real delays in the send --verify polling loop.
|
|
*/
|
|
function defaultSleep(ms: number): Promise<void> {
|
|
return new Promise<void>((res) => setTimeout(res, ms));
|
|
}
|
|
|
|
async function canRead(path: string): Promise<boolean> {
|
|
try {
|
|
await access(path, constants.R_OK);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
export async function resolveRosterPath(
|
|
mosaicHome: string,
|
|
explicitPath?: string,
|
|
): Promise<string> {
|
|
if (explicitPath) {
|
|
return explicitPath;
|
|
}
|
|
const yamlPath = resolveFleetPaths(mosaicHome).rosterPath;
|
|
if (await canRead(yamlPath)) {
|
|
return yamlPath;
|
|
}
|
|
const jsonPath = join(mosaicHome, 'fleet', 'roster.json');
|
|
return jsonPath;
|
|
}
|