Compare commits
1 Commits
feat/h2-sy
...
fix/h2-rea
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f91bbeea48 |
70
docs/scratchpads/h2-readiness-available.md
Normal file
70
docs/scratchpads/h2-readiness-available.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# H2 — readiness semantics: available, not stuck
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Correct fleet readiness semantics so a healthy long-idle agent is reported as `available` (good/assignable) instead of `stuck` (fault). Reserve `stuck` in the type/JSON value space for future positive block evidence.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- `packages/mosaic/src/commands/fleet.ts`
|
||||||
|
- replace `idle` readiness state with `available`
|
||||||
|
- keep `stuck` in the union but stop emitting it from idle-only heuristics
|
||||||
|
- remove stuck threshold helper/env handling
|
||||||
|
- remove IDLE/STUCK alarm flags from table rendering
|
||||||
|
- `packages/mosaic/src/commands/fleet.spec.ts`
|
||||||
|
- update classifier branch/boundary tests
|
||||||
|
- assert very long idle maps to `available`, not `stuck`
|
||||||
|
- update table/JSON assertions for available with no alarm flags
|
||||||
|
- remove stuck threshold helper tests
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
|
||||||
|
- `classifyReadiness()` remains pure/total/never-throw and maps:
|
||||||
|
- dead/stale/unknown unchanged
|
||||||
|
- busy/null/undefined/non-finite idle to `working`
|
||||||
|
- idle >= activity threshold to `available`
|
||||||
|
- idle < activity threshold to `working`
|
||||||
|
- No idle-derived path emits `stuck`.
|
||||||
|
- `MOSAIC_HEARTBEAT_IDLE_THRESHOLD` remains backward compatible as the working→available activity threshold.
|
||||||
|
- `MOSAIC_HEARTBEAT_STUCK_THRESHOLD` and helper/default are removed.
|
||||||
|
- `fleet ps` keeps the idle-seconds column header `IDLE`, renders `available` in HB label, and does not add IDLE/STUCK warning flags.
|
||||||
|
- Local gates green: build precheck, typecheck, lint, format:check, fleet vitest.
|
||||||
|
- PR opened against `main`; no merge by worker.
|
||||||
|
|
||||||
|
## Constraints / Assumptions
|
||||||
|
|
||||||
|
- Source branch: `origin/main` @ `1020cfa`.
|
||||||
|
- `docs/TASKS.md` is orchestrator-owned; worker will not modify it.
|
||||||
|
- Documentation impact is captured in this scratchpad and PR description; no user/admin guide behavior beyond CLI readiness label semantics.
|
||||||
|
|
||||||
|
## Plan
|
||||||
|
|
||||||
|
1. Install dependencies with requested PNPM environment.
|
||||||
|
2. Inspect current H1/H1b readiness implementation and tests.
|
||||||
|
3. Update classifier types/helpers/rendering.
|
||||||
|
4. Update focused tests.
|
||||||
|
5. Run build precheck + required gates.
|
||||||
|
6. Run automated code review, remediate any findings.
|
||||||
|
7. Queue guard, push, open PR.
|
||||||
|
|
||||||
|
## Progress
|
||||||
|
|
||||||
|
- 2026-06-24: Branch created from `origin/main` @ `1020cfa`.
|
||||||
|
- 2026-06-24: Replaced idle-derived `idle`/`stuck` outputs with `available`; retained `stuck` in type union for future positive block evidence.
|
||||||
|
- 2026-06-24: Removed stuck threshold env/helper plumbing and IDLE/STUCK alarm flags.
|
||||||
|
- 2026-06-24: Updated classifier and table-render tests for available semantics.
|
||||||
|
|
||||||
|
## Verification Evidence
|
||||||
|
|
||||||
|
- `pnpm install --store-dir "$HOME/.pnpm-store"` — pass.
|
||||||
|
- `npx turbo build --filter=@mosaicstack/mosaic^...` — pass, 12/12 tasks successful.
|
||||||
|
- `pnpm typecheck` — pass, 41/41 tasks successful.
|
||||||
|
- `pnpm lint` — pass, 23/23 tasks successful.
|
||||||
|
- `pnpm format:check` — pass, all matched files use Prettier style.
|
||||||
|
- `pnpm --filter @mosaicstack/mosaic exec vitest run src/commands/fleet.spec.ts` — pass, 177 tests.
|
||||||
|
- `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` — approve, 0 findings (reviewed supplied diff; sandbox file-inspection limitation noted by tool).
|
||||||
|
|
||||||
|
## Risks / Blockers
|
||||||
|
|
||||||
|
- No current blocker.
|
||||||
|
- Review tool could not inspect repo files directly due sandbox wrapper limitation, but it reviewed the supplied diff and approved with no findings.
|
||||||
@@ -27,7 +27,6 @@ import {
|
|||||||
enableFleetUnits,
|
enableFleetUnits,
|
||||||
FLEET_PROFILES,
|
FLEET_PROFILES,
|
||||||
HEARTBEAT_IDLE_THRESHOLD_SECONDS,
|
HEARTBEAT_IDLE_THRESHOLD_SECONDS,
|
||||||
HEARTBEAT_STUCK_THRESHOLD_SECONDS,
|
|
||||||
generateAgentEnv,
|
generateAgentEnv,
|
||||||
getDefaultOperatorSourceLabel,
|
getDefaultOperatorSourceLabel,
|
||||||
getDefaultTenantAndHost,
|
getDefaultTenantAndHost,
|
||||||
@@ -48,7 +47,6 @@ import {
|
|||||||
resolvePresetFilename,
|
resolvePresetFilename,
|
||||||
RUNTIME_ACCEPTABLE_COMMANDS,
|
RUNTIME_ACCEPTABLE_COMMANDS,
|
||||||
serializeRosterToYaml,
|
serializeRosterToYaml,
|
||||||
stuckThresholdSeconds,
|
|
||||||
VERIFY_DEFAULT_TIMEOUT_MS,
|
VERIFY_DEFAULT_TIMEOUT_MS,
|
||||||
VERIFY_POLL_INTERVAL_MS,
|
VERIFY_POLL_INTERVAL_MS,
|
||||||
type AgentPsRow,
|
type AgentPsRow,
|
||||||
@@ -940,42 +938,33 @@ describe('fleet ps — heartbeat parsing', () => {
|
|||||||
|
|
||||||
describe('fleet ps — readiness thresholds', () => {
|
describe('fleet ps — readiness thresholds', () => {
|
||||||
const savedIdle = process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
const savedIdle = process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
||||||
const savedStuck = process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD;
|
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
if (savedIdle === undefined) delete process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
if (savedIdle === undefined) delete process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
||||||
else process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = savedIdle;
|
else process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = savedIdle;
|
||||||
if (savedStuck === undefined) delete process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD;
|
|
||||||
else process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD = savedStuck;
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('uses default readiness thresholds when env is unset', () => {
|
it('uses the default activity threshold when env is unset', () => {
|
||||||
delete process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
delete process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
||||||
delete process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD;
|
|
||||||
|
|
||||||
expect(idleThresholdSeconds()).toBe(HEARTBEAT_IDLE_THRESHOLD_SECONDS);
|
expect(idleThresholdSeconds()).toBe(HEARTBEAT_IDLE_THRESHOLD_SECONDS);
|
||||||
expect(stuckThresholdSeconds()).toBe(HEARTBEAT_STUCK_THRESHOLD_SECONDS);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('honors positive integer readiness thresholds from env', () => {
|
it('honors a positive integer activity threshold from env', () => {
|
||||||
process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = '120';
|
process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = '120';
|
||||||
process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD = '480';
|
|
||||||
|
|
||||||
expect(idleThresholdSeconds()).toBe(120);
|
expect(idleThresholdSeconds()).toBe(120);
|
||||||
expect(stuckThresholdSeconds()).toBe(480);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('falls back to defaults for invalid readiness thresholds', () => {
|
it('falls back to the default for invalid activity thresholds', () => {
|
||||||
process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = '0';
|
process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = '0';
|
||||||
process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD = 'not-a-number';
|
|
||||||
|
|
||||||
expect(idleThresholdSeconds()).toBe(HEARTBEAT_IDLE_THRESHOLD_SECONDS);
|
expect(idleThresholdSeconds()).toBe(HEARTBEAT_IDLE_THRESHOLD_SECONDS);
|
||||||
expect(stuckThresholdSeconds()).toBe(HEARTBEAT_STUCK_THRESHOLD_SECONDS);
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('fleet ps — readiness classification', () => {
|
describe('fleet ps — readiness classification', () => {
|
||||||
const thresholds = { idleThresholdSeconds: 300, stuckThresholdSeconds: 900 };
|
const thresholds = { idleThresholdSeconds: 300 };
|
||||||
|
|
||||||
it('reports dead when the pane is not alive', () => {
|
it('reports dead when the pane is not alive', () => {
|
||||||
expect(
|
expect(
|
||||||
@@ -1004,7 +993,7 @@ describe('fleet ps — readiness classification', () => {
|
|||||||
).toBe('stale');
|
).toBe('stale');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports working when heartbeat status is busy, even past stuck threshold', () => {
|
it('reports working when heartbeat status is busy, even after the activity threshold', () => {
|
||||||
expect(
|
expect(
|
||||||
classifyReadiness(
|
classifyReadiness(
|
||||||
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'busy', idleSeconds: 2_000 },
|
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'busy', idleSeconds: 2_000 },
|
||||||
@@ -1013,7 +1002,7 @@ describe('fleet ps — readiness classification', () => {
|
|||||||
).toBe('working');
|
).toBe('working');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports working when pane idle seconds are unavailable', () => {
|
it('reports working when pane idle seconds are null', () => {
|
||||||
expect(
|
expect(
|
||||||
classifyReadiness(
|
classifyReadiness(
|
||||||
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: null },
|
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: null },
|
||||||
@@ -1022,25 +1011,31 @@ describe('fleet ps — readiness classification', () => {
|
|||||||
).toBe('working');
|
).toBe('working');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports stuck at the stuck threshold boundary', () => {
|
it('reports working when pane idle seconds are undefined', () => {
|
||||||
expect(
|
expect(
|
||||||
classifyReadiness(
|
classifyReadiness({ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok' }, thresholds),
|
||||||
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 900 },
|
).toBe('working');
|
||||||
thresholds,
|
|
||||||
),
|
|
||||||
).toBe('stuck');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports idle at the idle threshold boundary', () => {
|
it('reports working when pane idle seconds are non-finite', () => {
|
||||||
|
expect(
|
||||||
|
classifyReadiness(
|
||||||
|
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: Number.NaN },
|
||||||
|
thresholds,
|
||||||
|
),
|
||||||
|
).toBe('working');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports available at the activity threshold boundary', () => {
|
||||||
expect(
|
expect(
|
||||||
classifyReadiness(
|
classifyReadiness(
|
||||||
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 300 },
|
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 300 },
|
||||||
thresholds,
|
thresholds,
|
||||||
),
|
),
|
||||||
).toBe('idle');
|
).toBe('available');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('reports working below the idle threshold', () => {
|
it('reports working below the activity threshold', () => {
|
||||||
expect(
|
expect(
|
||||||
classifyReadiness(
|
classifyReadiness(
|
||||||
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 299 },
|
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 299 },
|
||||||
@@ -1049,13 +1044,14 @@ describe('fleet ps — readiness classification', () => {
|
|||||||
).toBe('working');
|
).toBe('working');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('checks stuck before idle when thresholds are inverted', () => {
|
it('reports very long idle as available, not stuck', () => {
|
||||||
expect(
|
const readiness = classifyReadiness(
|
||||||
classifyReadiness(
|
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 100_000 },
|
||||||
{ paneAlive: true, hbHealth: 'healthy', hbStatus: 'ok', idleSeconds: 350 },
|
thresholds,
|
||||||
{ idleThresholdSeconds: 900, stuckThresholdSeconds: 300 },
|
);
|
||||||
),
|
|
||||||
).toBe('stuck');
|
expect(readiness).toBe('available');
|
||||||
|
expect(readiness).not.toBe('stuck');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1554,7 +1550,7 @@ describe('fleet ps — command sequences issued', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe('fleet ps — readiness table output', () => {
|
describe('fleet ps — readiness table output', () => {
|
||||||
it('renders readiness in HB column and flags idle/stuck rows', async () => {
|
it('renders available in HB column without idle/stuck alarm flags', async () => {
|
||||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||||
const runDir = join(home, 'fleet', 'run');
|
const runDir = join(home, 'fleet', 'run');
|
||||||
@@ -1565,36 +1561,34 @@ describe('fleet ps — readiness table output', () => {
|
|||||||
'version: 1',
|
'version: 1',
|
||||||
'transport: tmux',
|
'transport: tmux',
|
||||||
'agents:',
|
'agents:',
|
||||||
' - name: idle-agent',
|
' - name: working-agent',
|
||||||
' runtime: pi',
|
' runtime: pi',
|
||||||
' - name: stuck-agent',
|
' - name: available-agent',
|
||||||
' runtime: pi',
|
' runtime: pi',
|
||||||
].join('\n'),
|
].join('\n'),
|
||||||
);
|
);
|
||||||
|
|
||||||
const nowMs = 1_700_000_000_000;
|
const nowMs = 1_700_000_000_000;
|
||||||
const idleActivityEpoch = Math.floor((nowMs - 10_000) / 1000);
|
const workingActivityEpoch = Math.floor((nowMs - 2_000) / 1000);
|
||||||
const stuckActivityEpoch = Math.floor((nowMs - 40_000) / 1000);
|
const availableActivityEpoch = Math.floor((nowMs - 40_000) / 1000);
|
||||||
const hbTs = new Date(nowMs - 1_000).toISOString();
|
const hbTs = new Date(nowMs - 1_000).toISOString();
|
||||||
await writeFile(join(runDir, 'idle-agent.hb'), `ts=${hbTs}\npid=111\nstatus=ok\n`);
|
await writeFile(join(runDir, 'working-agent.hb'), `ts=${hbTs}\npid=111\nstatus=ok\n`);
|
||||||
await writeFile(join(runDir, 'stuck-agent.hb'), `ts=${hbTs}\npid=222\nstatus=ok\n`);
|
await writeFile(join(runDir, 'available-agent.hb'), `ts=${hbTs}\npid=222\nstatus=ok\n`);
|
||||||
|
|
||||||
const savedIdle = process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
const savedIdle = process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
||||||
const savedStuck = process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD;
|
|
||||||
process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = '5';
|
process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = '5';
|
||||||
process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD = '30';
|
|
||||||
|
|
||||||
const dateNow = vi.spyOn(Date, 'now').mockReturnValue(nowMs);
|
const dateNow = vi.spyOn(Date, 'now').mockReturnValue(nowMs);
|
||||||
const runner: CommandRunner = async (command, args) => {
|
const runner: CommandRunner = async (command, args) => {
|
||||||
const full = [command, ...args].join(' ');
|
const full = [command, ...args].join(' ');
|
||||||
if (full.includes('list-sessions')) {
|
if (full.includes('list-sessions')) {
|
||||||
return { stdout: 'idle-agent\nstuck-agent\n', stderr: '', exitCode: 0 };
|
return { stdout: 'working-agent\navailable-agent\n', stderr: '', exitCode: 0 };
|
||||||
}
|
}
|
||||||
if (full.includes('=idle-agent:0.0')) {
|
if (full.includes('=working-agent:0.0')) {
|
||||||
return { stdout: `111 pi 0 ${idleActivityEpoch}\n`, stderr: '', exitCode: 0 };
|
return { stdout: `111 pi 0 ${workingActivityEpoch}\n`, stderr: '', exitCode: 0 };
|
||||||
}
|
}
|
||||||
if (full.includes('=stuck-agent:0.0')) {
|
if (full.includes('=available-agent:0.0')) {
|
||||||
return { stdout: `222 pi 0 ${stuckActivityEpoch}\n`, stderr: '', exitCode: 0 };
|
return { stdout: `222 pi 0 ${availableActivityEpoch}\n`, stderr: '', exitCode: 0 };
|
||||||
}
|
}
|
||||||
if (full.includes('systemctl') && full.includes('show')) {
|
if (full.includes('systemctl') && full.includes('show')) {
|
||||||
return {
|
return {
|
||||||
@@ -1623,19 +1617,17 @@ describe('fleet ps — readiness table output', () => {
|
|||||||
dateNow.mockRestore();
|
dateNow.mockRestore();
|
||||||
if (savedIdle === undefined) delete process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
if (savedIdle === undefined) delete process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD;
|
||||||
else process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = savedIdle;
|
else process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD = savedIdle;
|
||||||
if (savedStuck === undefined) delete process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD;
|
|
||||||
else process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD = savedStuck;
|
|
||||||
await rm(home, { recursive: true, force: true });
|
await rm(home, { recursive: true, force: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
const idleLine = lines.find((line) => line.includes('idle-agent'));
|
const workingLine = lines.find((line) => line.includes('working-agent'));
|
||||||
const stuckLine = lines.find((line) => line.includes('stuck-agent'));
|
const availableLine = lines.find((line) => line.includes('available-agent'));
|
||||||
expect(idleLine).toBeDefined();
|
expect(workingLine).toBeDefined();
|
||||||
expect(idleLine).toContain('1s/idle');
|
expect(workingLine).toContain('1s/working');
|
||||||
expect(idleLine).toMatch(/\bIDLE\b/);
|
expect(availableLine).toBeDefined();
|
||||||
expect(stuckLine).toBeDefined();
|
expect(availableLine).toContain('1s/available');
|
||||||
expect(stuckLine).toContain('1s/stuck');
|
expect(availableLine).not.toMatch(/\bIDLE\b/);
|
||||||
expect(stuckLine).toMatch(/\bSTUCK\b/);
|
expect(availableLine).not.toMatch(/\bSTUCK\b/);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -395,7 +395,6 @@ export function buildAgentTailCommand(agentName: string, lines: number, socketNa
|
|||||||
|
|
||||||
export const HEARTBEAT_INTERVAL_MS = 15_000;
|
export const HEARTBEAT_INTERVAL_MS = 15_000;
|
||||||
export const HEARTBEAT_IDLE_THRESHOLD_SECONDS = 300;
|
export const HEARTBEAT_IDLE_THRESHOLD_SECONDS = 300;
|
||||||
export const HEARTBEAT_STUCK_THRESHOLD_SECONDS = 900;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Heartbeat interval in ms, honoring MOSAIC_HEARTBEAT_INTERVAL (seconds) so the
|
* Heartbeat interval in ms, honoring MOSAIC_HEARTBEAT_INTERVAL (seconds) so the
|
||||||
@@ -407,20 +406,14 @@ export function heartbeatIntervalMs(): number {
|
|||||||
return Number.isFinite(sec) && sec > 0 ? sec * 1000 : HEARTBEAT_INTERVAL_MS;
|
return Number.isFinite(sec) && sec > 0 ? sec * 1000 : HEARTBEAT_INTERVAL_MS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Idle threshold in seconds, honoring MOSAIC_HEARTBEAT_IDLE_THRESHOLD. */
|
/** Activity threshold in seconds, honoring MOSAIC_HEARTBEAT_IDLE_THRESHOLD. */
|
||||||
export function idleThresholdSeconds(): number {
|
export function idleThresholdSeconds(): number {
|
||||||
const sec = Number.parseInt(process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD ?? '', 10);
|
const sec = Number.parseInt(process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD ?? '', 10);
|
||||||
return Number.isFinite(sec) && sec > 0 ? sec : HEARTBEAT_IDLE_THRESHOLD_SECONDS;
|
return Number.isFinite(sec) && sec > 0 ? sec : HEARTBEAT_IDLE_THRESHOLD_SECONDS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Stuck threshold in seconds, honoring MOSAIC_HEARTBEAT_STUCK_THRESHOLD. */
|
|
||||||
export function stuckThresholdSeconds(): number {
|
|
||||||
const sec = Number.parseInt(process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD ?? '', 10);
|
|
||||||
return Number.isFinite(sec) && sec > 0 ? sec : HEARTBEAT_STUCK_THRESHOLD_SECONDS;
|
|
||||||
}
|
|
||||||
export const HEARTBEAT_HEALTHY_MULTIPLIER = 3;
|
export const HEARTBEAT_HEALTHY_MULTIPLIER = 3;
|
||||||
|
|
||||||
export type ReadinessState = 'working' | 'idle' | 'stuck' | 'stale' | 'dead' | 'unknown';
|
export type ReadinessState = 'working' | 'available' | 'stuck' | 'stale' | 'dead' | 'unknown';
|
||||||
|
|
||||||
export interface ReadinessSignals {
|
export interface ReadinessSignals {
|
||||||
paneAlive: boolean;
|
paneAlive: boolean;
|
||||||
@@ -431,7 +424,6 @@ export interface ReadinessSignals {
|
|||||||
|
|
||||||
export interface ReadinessThresholds {
|
export interface ReadinessThresholds {
|
||||||
idleThresholdSeconds: number;
|
idleThresholdSeconds: number;
|
||||||
stuckThresholdSeconds: number;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -456,12 +448,8 @@ export function classifyReadiness(
|
|||||||
const idleThreshold = Number.isFinite(thresholds?.idleThresholdSeconds)
|
const idleThreshold = Number.isFinite(thresholds?.idleThresholdSeconds)
|
||||||
? Number(thresholds?.idleThresholdSeconds)
|
? Number(thresholds?.idleThresholdSeconds)
|
||||||
: idleThresholdSeconds();
|
: idleThresholdSeconds();
|
||||||
const stuckThreshold = Number.isFinite(thresholds?.stuckThresholdSeconds)
|
// Follow-up: stuck pending per-agent assignment awareness: assigned task + idle past threshold => stuck.
|
||||||
? Number(thresholds?.stuckThresholdSeconds)
|
if (idleSeconds >= idleThreshold) return 'available';
|
||||||
: stuckThresholdSeconds();
|
|
||||||
|
|
||||||
if (idleSeconds >= stuckThreshold) return 'stuck';
|
|
||||||
if (idleSeconds >= idleThreshold) return 'idle';
|
|
||||||
return 'working';
|
return 'working';
|
||||||
} catch {
|
} catch {
|
||||||
return 'unknown';
|
return 'unknown';
|
||||||
@@ -1089,7 +1077,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
const rows: AgentPsRow[] = [];
|
const rows: AgentPsRow[] = [];
|
||||||
const readinessThresholds = {
|
const readinessThresholds = {
|
||||||
idleThresholdSeconds: idleThresholdSeconds(),
|
idleThresholdSeconds: idleThresholdSeconds(),
|
||||||
stuckThresholdSeconds: stuckThresholdSeconds(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Build the set of roster agent names for quick lookup when filtering socket sessions.
|
// Build the set of roster agent names for quick lookup when filtering socket sessions.
|
||||||
@@ -1264,8 +1251,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
if (!row.managed) flags.push('UNMANAGED');
|
if (!row.managed) flags.push('UNMANAGED');
|
||||||
if (row.driftFlag) flags.push('DRIFT');
|
if (row.driftFlag) flags.push('DRIFT');
|
||||||
if (row.bootEnableWarning) flags.push('BOOT-ENABLE');
|
if (row.bootEnableWarning) flags.push('BOOT-ENABLE');
|
||||||
if (row.readiness === 'idle') flags.push('IDLE');
|
|
||||||
if (row.readiness === 'stuck') flags.push('STUCK');
|
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
[
|
[
|
||||||
|
|||||||
Reference in New Issue
Block a user