fix(fleet): report idle agents as available, reserve stuck for genuine blocks (#653)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful

This commit was merged in pull request #653.
This commit is contained in:
2026-06-24 13:58:22 +00:00
parent 1020cfaf9b
commit 937077f6be
3 changed files with 123 additions and 76 deletions

View File

@@ -395,7 +395,6 @@ export function buildAgentTailCommand(agentName: string, lines: number, socketNa
export const HEARTBEAT_INTERVAL_MS = 15_000;
export const HEARTBEAT_IDLE_THRESHOLD_SECONDS = 300;
export const HEARTBEAT_STUCK_THRESHOLD_SECONDS = 900;
/**
* Heartbeat interval in ms, honoring MOSAIC_HEARTBEAT_INTERVAL (seconds) so the
@@ -407,20 +406,14 @@ export function heartbeatIntervalMs(): number {
return Number.isFinite(sec) && sec > 0 ? sec * 1000 : HEARTBEAT_INTERVAL_MS;
}
/** Idle threshold in seconds, honoring MOSAIC_HEARTBEAT_IDLE_THRESHOLD. */
/** Activity threshold in seconds, honoring MOSAIC_HEARTBEAT_IDLE_THRESHOLD. */
export function idleThresholdSeconds(): number {
const sec = Number.parseInt(process.env.MOSAIC_HEARTBEAT_IDLE_THRESHOLD ?? '', 10);
return Number.isFinite(sec) && sec > 0 ? sec : HEARTBEAT_IDLE_THRESHOLD_SECONDS;
}
/** Stuck threshold in seconds, honoring MOSAIC_HEARTBEAT_STUCK_THRESHOLD. */
export function stuckThresholdSeconds(): number {
const sec = Number.parseInt(process.env.MOSAIC_HEARTBEAT_STUCK_THRESHOLD ?? '', 10);
return Number.isFinite(sec) && sec > 0 ? sec : HEARTBEAT_STUCK_THRESHOLD_SECONDS;
}
export const HEARTBEAT_HEALTHY_MULTIPLIER = 3;
export type ReadinessState = 'working' | 'idle' | 'stuck' | 'stale' | 'dead' | 'unknown';
export type ReadinessState = 'working' | 'available' | 'stuck' | 'stale' | 'dead' | 'unknown';
export interface ReadinessSignals {
paneAlive: boolean;
@@ -431,7 +424,6 @@ export interface ReadinessSignals {
export interface ReadinessThresholds {
idleThresholdSeconds: number;
stuckThresholdSeconds: number;
}
/**
@@ -456,12 +448,8 @@ export function classifyReadiness(
const idleThreshold = Number.isFinite(thresholds?.idleThresholdSeconds)
? Number(thresholds?.idleThresholdSeconds)
: idleThresholdSeconds();
const stuckThreshold = Number.isFinite(thresholds?.stuckThresholdSeconds)
? Number(thresholds?.stuckThresholdSeconds)
: stuckThresholdSeconds();
if (idleSeconds >= stuckThreshold) return 'stuck';
if (idleSeconds >= idleThreshold) return 'idle';
// Follow-up: stuck pending per-agent assignment awareness: assigned task + idle past threshold => stuck.
if (idleSeconds >= idleThreshold) return 'available';
return 'working';
} catch {
return 'unknown';
@@ -1089,7 +1077,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
const rows: AgentPsRow[] = [];
const readinessThresholds = {
idleThresholdSeconds: idleThresholdSeconds(),
stuckThresholdSeconds: stuckThresholdSeconds(),
};
// Build the set of roster agent names for quick lookup when filtering socket sessions.
@@ -1264,8 +1251,6 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
if (!row.managed) flags.push('UNMANAGED');
if (row.driftFlag) flags.push('DRIFT');
if (row.bootEnableWarning) flags.push('BOOT-ENABLE');
if (row.readiness === 'idle') flags.push('IDLE');
if (row.readiness === 'stuck') flags.push('STUCK');
console.log(
[