feat: r0 coordinator tooling for orchestrator protocol
Implements the manual coordinator workflow for multi-session agent orchestration. Agents stop after one milestone (confirmed limitation); these tools let the human coordinator check status, generate continuation prompts, and chain sessions together. New: - tools/orchestrator/ — 5 scripts + shared library (_lib.sh) - mission-init.sh: initialize mission with milestones and state files - mission-status.sh: dashboard showing milestones, tasks, sessions - session-status.sh: check if agent is running/stale/dead - continue-prompt.sh: generate paste-ready continuation prompt - session-resume.sh: crash recovery with dirty state detection - guides/ORCHESTRATOR-PROTOCOL.md: agent-facing mission lifecycle guide - templates/docs/: mission manifest, scratchpad, continuation templates - templates/repo/.mosaic/orchestrator/mission.json: state file template Modified: - bin/mosaic: add 'coord' subcommand + resume advisory on launch - AGENTS.md: conditional loading for protocol guide + rule 37 - bin/mosaic-doctor: checks for new coordinator files - session hooks: mission detection on start, cleanup on end Usage: mosaic coord init|mission|status|continue|resume Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
208
tools/orchestrator/session-resume.sh
Executable file
208
tools/orchestrator/session-resume.sh
Executable file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
#
|
||||
# session-resume.sh — Crash recovery for dead orchestrator sessions
|
||||
#
|
||||
# Usage:
|
||||
# session-resume.sh [--project <path>] [--clean-lock]
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/_lib.sh"
|
||||
|
||||
# ─── Parse arguments ─────────────────────────────────────────────────────────
|
||||
|
||||
PROJECT="."
|
||||
CLEAN_LOCK=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--project) PROJECT="$2"; shift 2 ;;
|
||||
--clean-lock) CLEAN_LOCK=true; shift ;;
|
||||
-h|--help)
|
||||
echo "Usage: session-resume.sh [--project <path>] [--clean-lock]"
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
_require_jq
|
||||
|
||||
# ─── Check session lock ─────────────────────────────────────────────────────
|
||||
|
||||
lock_data=""
|
||||
has_lock=false
|
||||
if lock_data="$(session_lock_read "$PROJECT" 2>/dev/null)"; then
|
||||
has_lock=true
|
||||
fi
|
||||
|
||||
if [[ "$has_lock" == true ]]; then
|
||||
lock_pid="$(echo "$lock_data" | jq -r '.pid // 0')"
|
||||
lock_sid="$(echo "$lock_data" | jq -r '.session_id // "unknown"')"
|
||||
lock_rt="$(echo "$lock_data" | jq -r '.runtime // "unknown"')"
|
||||
lock_start="$(echo "$lock_data" | jq -r '.started_at // ""')"
|
||||
lock_milestone="$(echo "$lock_data" | jq -r '.milestone_id // ""')"
|
||||
|
||||
if is_pid_alive "$lock_pid"; then
|
||||
echo -e "${C_YELLOW}Session $lock_sid is still running (PID $lock_pid).${C_RESET}"
|
||||
echo "Use 'mosaic coord status' to check session health."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Session is dead
|
||||
echo ""
|
||||
echo -e "${C_RED}${C_BOLD}CRASH RECOVERY — Session $lock_sid ($lock_rt)${C_RESET}"
|
||||
echo "==========================================="
|
||||
echo ""
|
||||
|
||||
if [[ -n "$lock_start" ]]; then
|
||||
echo -e " ${C_CYAN}Session started:${C_RESET} $lock_start"
|
||||
fi
|
||||
echo -e " ${C_CYAN}Session died:${C_RESET} PID $lock_pid is not running"
|
||||
[[ -n "$lock_milestone" ]] && echo -e " ${C_CYAN}Active milestone:${C_RESET} $lock_milestone"
|
||||
echo ""
|
||||
|
||||
else
|
||||
# No lock — check mission.json for last session info
|
||||
if [[ -f "$(mission_path "$PROJECT")" ]]; then
|
||||
mission="$(load_mission "$PROJECT")"
|
||||
session_count="$(echo "$mission" | jq '.sessions | length')"
|
||||
if (( session_count > 0 )); then
|
||||
last_idx=$(( session_count - 1 ))
|
||||
last_sid="$(echo "$mission" | jq -r ".sessions[$last_idx].session_id")"
|
||||
last_reason="$(echo "$mission" | jq -r ".sessions[$last_idx].ended_reason // \"unknown\"")"
|
||||
echo -e "${C_DIM}No session lock found. Last session: $last_sid (ended: $last_reason)${C_RESET}"
|
||||
echo "Use 'mosaic coord continue' to generate a continuation prompt."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
echo -e "${C_DIM}No session state found.${C_RESET}"
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# ─── Detect dirty state ─────────────────────────────────────────────────────
|
||||
|
||||
echo -e "${C_BOLD}Dirty State:${C_RESET}"
|
||||
|
||||
dirty_files=""
|
||||
if git -C "$PROJECT" rev-parse --is-inside-work-tree &>/dev/null; then
|
||||
dirty_files="$(git -C "$PROJECT" status --porcelain 2>/dev/null || true)"
|
||||
fi
|
||||
|
||||
if [[ -n "$dirty_files" ]]; then
|
||||
echo " Modified files:"
|
||||
echo "$dirty_files" | head -20 | while IFS= read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
file_count="$(echo "$dirty_files" | wc -l)"
|
||||
if (( file_count > 20 )); then
|
||||
echo " ... and $(( file_count - 20 )) more"
|
||||
fi
|
||||
else
|
||||
echo -e " ${C_GREEN}Working tree is clean.${C_RESET}"
|
||||
fi
|
||||
|
||||
# Check for in-progress tasks
|
||||
inprog_count=0
|
||||
task_counts="$(count_tasks_md "$PROJECT")"
|
||||
inprog_count="$(echo "$task_counts" | jq '.in_progress')"
|
||||
if (( inprog_count > 0 )); then
|
||||
echo -e " ${C_YELLOW}$inprog_count task(s) still marked in-progress in TASKS.md${C_RESET}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ─── Recovery actions ────────────────────────────────────────────────────────
|
||||
|
||||
echo -e "${C_BOLD}Recovery Actions:${C_RESET}"
|
||||
if [[ -n "$dirty_files" ]]; then
|
||||
echo " 1. Review changes: git diff"
|
||||
echo " 2. If good: git add -A && git commit -m \"wip: partial work from crashed session\""
|
||||
echo " 3. If bad: git checkout ."
|
||||
fi
|
||||
echo " 4. Clean lock: mosaic coord resume --clean-lock"
|
||||
echo " 5. Generate prompt: mosaic coord continue"
|
||||
echo ""
|
||||
|
||||
# ─── Clean lock if requested ─────────────────────────────────────────────────
|
||||
|
||||
if [[ "$CLEAN_LOCK" == true ]]; then
|
||||
echo -e "${C_CYAN}Cleaning session lock...${C_RESET}"
|
||||
|
||||
# Update mission.json with crash info
|
||||
mp="$(mission_path "$PROJECT")"
|
||||
if [[ -f "$mp" && "$has_lock" == true ]]; then
|
||||
updated="$(jq \
|
||||
--arg sid "$lock_sid" \
|
||||
--arg ts "$(iso_now)" \
|
||||
'(.sessions[] | select(.session_id == $sid)) |= . + {
|
||||
ended_at: $ts,
|
||||
ended_reason: "crashed"
|
||||
}' "$mp")"
|
||||
write_json "$mp" "$updated"
|
||||
echo " Updated mission.json: session $lock_sid marked as crashed"
|
||||
fi
|
||||
|
||||
session_lock_clear "$PROJECT"
|
||||
echo " Cleared session.lock"
|
||||
echo ""
|
||||
echo -e "${C_GREEN}Lock cleared. Generate continuation prompt with: mosaic coord continue${C_RESET}"
|
||||
fi
|
||||
|
||||
# ─── Generate resume prompt ─────────────────────────────────────────────────
|
||||
|
||||
if [[ "$CLEAN_LOCK" != true ]]; then
|
||||
echo "---"
|
||||
echo ""
|
||||
echo -e "${C_BOLD}Resume Prompt (paste to new session):${C_RESET}"
|
||||
echo ""
|
||||
|
||||
mission_name=""
|
||||
mission_id=""
|
||||
if [[ -f "$(mission_path "$PROJECT")" ]]; then
|
||||
mission="$(load_mission "$PROJECT")"
|
||||
mission_name="$(echo "$mission" | jq -r '.name')"
|
||||
mission_id="$(echo "$mission" | jq -r '.mission_id')"
|
||||
quality_gates="$(echo "$mission" | jq -r '.quality_gates // "—"')"
|
||||
project_path="$(echo "$mission" | jq -r '.project_path')"
|
||||
fi
|
||||
|
||||
task_counts="$(count_tasks_md "$PROJECT")"
|
||||
tasks_done="$(echo "$task_counts" | jq '.done')"
|
||||
tasks_total="$(echo "$task_counts" | jq '.total')"
|
||||
next_task="$(find_next_task "$PROJECT")"
|
||||
|
||||
cat <<EOF
|
||||
## Crash Recovery Mission
|
||||
|
||||
Recovering **${mission_name:-Unknown Mission}** from crashed session ${lock_sid:-unknown}.
|
||||
|
||||
### WARNING: Dirty State Detected
|
||||
The previous session left uncommitted changes. Before continuing:
|
||||
1. Run \`git diff\` to review uncommitted changes
|
||||
2. Decide: commit (if good) or discard (if broken)
|
||||
3. Then proceed with the mission
|
||||
|
||||
## Setup
|
||||
|
||||
- **Project:** ${project_path:-$PROJECT}
|
||||
- **State:** docs/TASKS.md (${tasks_done}/${tasks_total} tasks complete)
|
||||
- **Manifest:** docs/MISSION-MANIFEST.md
|
||||
- **Scratchpad:** docs/scratchpads/${mission_id:-mission}.md
|
||||
- **Protocol:** ~/.config/mosaic/guides/ORCHESTRATOR.md
|
||||
- **Quality gates:** ${quality_gates:-—}
|
||||
|
||||
## Resume Point
|
||||
|
||||
- **Next task:** ${next_task:-check TASKS.md}
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Read \`docs/MISSION-MANIFEST.md\` for mission scope
|
||||
2. Read \`docs/scratchpads/${mission_id:-mission}.md\` for session history
|
||||
3. Review and resolve any uncommitted changes first
|
||||
4. Read \`docs/TASKS.md\` for current task state
|
||||
5. Continue execution from the next pending task
|
||||
6. You are the SOLE writer of \`docs/TASKS.md\`
|
||||
EOF
|
||||
fi
|
||||
Reference in New Issue
Block a user