Files
bootstrap/tools/orchestrator/session-resume.sh
Jason Woltje 5ba531e2d0 feat: r0 coordinator tooling for orchestrator protocol
Implements the manual coordinator workflow for multi-session agent
orchestration. Agents stop after one milestone (confirmed limitation);
these tools let the human coordinator check status, generate continuation
prompts, and chain sessions together.

New:
- tools/orchestrator/ — 5 scripts + shared library (_lib.sh)
  - mission-init.sh: initialize mission with milestones and state files
  - mission-status.sh: dashboard showing milestones, tasks, sessions
  - session-status.sh: check if agent is running/stale/dead
  - continue-prompt.sh: generate paste-ready continuation prompt
  - session-resume.sh: crash recovery with dirty state detection
- guides/ORCHESTRATOR-PROTOCOL.md: agent-facing mission lifecycle guide
- templates/docs/: mission manifest, scratchpad, continuation templates
- templates/repo/.mosaic/orchestrator/mission.json: state file template

Modified:
- bin/mosaic: add 'coord' subcommand + resume advisory on launch
- AGENTS.md: conditional loading for protocol guide + rule 37
- bin/mosaic-doctor: checks for new coordinator files
- session hooks: mission detection on start, cleanup on end

Usage: mosaic coord init|mission|status|continue|resume

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 17:22:50 -06:00

209 lines
7.3 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
#
# session-resume.sh — Crash recovery for dead orchestrator sessions
#
# Usage:
# session-resume.sh [--project <path>] [--clean-lock]
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/_lib.sh"
# ─── Parse arguments ─────────────────────────────────────────────────────────
PROJECT="."
CLEAN_LOCK=false
while [[ $# -gt 0 ]]; do
case "$1" in
--project) PROJECT="$2"; shift 2 ;;
--clean-lock) CLEAN_LOCK=true; shift ;;
-h|--help)
echo "Usage: session-resume.sh [--project <path>] [--clean-lock]"
exit 0
;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
_require_jq
# ─── Check session lock ─────────────────────────────────────────────────────
lock_data=""
has_lock=false
if lock_data="$(session_lock_read "$PROJECT" 2>/dev/null)"; then
has_lock=true
fi
if [[ "$has_lock" == true ]]; then
lock_pid="$(echo "$lock_data" | jq -r '.pid // 0')"
lock_sid="$(echo "$lock_data" | jq -r '.session_id // "unknown"')"
lock_rt="$(echo "$lock_data" | jq -r '.runtime // "unknown"')"
lock_start="$(echo "$lock_data" | jq -r '.started_at // ""')"
lock_milestone="$(echo "$lock_data" | jq -r '.milestone_id // ""')"
if is_pid_alive "$lock_pid"; then
echo -e "${C_YELLOW}Session $lock_sid is still running (PID $lock_pid).${C_RESET}"
echo "Use 'mosaic coord status' to check session health."
exit 0
fi
# Session is dead
echo ""
echo -e "${C_RED}${C_BOLD}CRASH RECOVERY — Session $lock_sid ($lock_rt)${C_RESET}"
echo "==========================================="
echo ""
if [[ -n "$lock_start" ]]; then
echo -e " ${C_CYAN}Session started:${C_RESET} $lock_start"
fi
echo -e " ${C_CYAN}Session died:${C_RESET} PID $lock_pid is not running"
[[ -n "$lock_milestone" ]] && echo -e " ${C_CYAN}Active milestone:${C_RESET} $lock_milestone"
echo ""
else
# No lock — check mission.json for last session info
if [[ -f "$(mission_path "$PROJECT")" ]]; then
mission="$(load_mission "$PROJECT")"
session_count="$(echo "$mission" | jq '.sessions | length')"
if (( session_count > 0 )); then
last_idx=$(( session_count - 1 ))
last_sid="$(echo "$mission" | jq -r ".sessions[$last_idx].session_id")"
last_reason="$(echo "$mission" | jq -r ".sessions[$last_idx].ended_reason // \"unknown\"")"
echo -e "${C_DIM}No session lock found. Last session: $last_sid (ended: $last_reason)${C_RESET}"
echo "Use 'mosaic coord continue' to generate a continuation prompt."
exit 0
fi
fi
echo -e "${C_DIM}No session state found.${C_RESET}"
exit 4
fi
# ─── Detect dirty state ─────────────────────────────────────────────────────
echo -e "${C_BOLD}Dirty State:${C_RESET}"
dirty_files=""
if git -C "$PROJECT" rev-parse --is-inside-work-tree &>/dev/null; then
dirty_files="$(git -C "$PROJECT" status --porcelain 2>/dev/null || true)"
fi
if [[ -n "$dirty_files" ]]; then
echo " Modified files:"
echo "$dirty_files" | head -20 | while IFS= read -r line; do
echo " $line"
done
file_count="$(echo "$dirty_files" | wc -l)"
if (( file_count > 20 )); then
echo " ... and $(( file_count - 20 )) more"
fi
else
echo -e " ${C_GREEN}Working tree is clean.${C_RESET}"
fi
# Check for in-progress tasks
inprog_count=0
task_counts="$(count_tasks_md "$PROJECT")"
inprog_count="$(echo "$task_counts" | jq '.in_progress')"
if (( inprog_count > 0 )); then
echo -e " ${C_YELLOW}$inprog_count task(s) still marked in-progress in TASKS.md${C_RESET}"
fi
echo ""
# ─── Recovery actions ────────────────────────────────────────────────────────
echo -e "${C_BOLD}Recovery Actions:${C_RESET}"
if [[ -n "$dirty_files" ]]; then
echo " 1. Review changes: git diff"
echo " 2. If good: git add -A && git commit -m \"wip: partial work from crashed session\""
echo " 3. If bad: git checkout ."
fi
echo " 4. Clean lock: mosaic coord resume --clean-lock"
echo " 5. Generate prompt: mosaic coord continue"
echo ""
# ─── Clean lock if requested ─────────────────────────────────────────────────
if [[ "$CLEAN_LOCK" == true ]]; then
echo -e "${C_CYAN}Cleaning session lock...${C_RESET}"
# Update mission.json with crash info
mp="$(mission_path "$PROJECT")"
if [[ -f "$mp" && "$has_lock" == true ]]; then
updated="$(jq \
--arg sid "$lock_sid" \
--arg ts "$(iso_now)" \
'(.sessions[] | select(.session_id == $sid)) |= . + {
ended_at: $ts,
ended_reason: "crashed"
}' "$mp")"
write_json "$mp" "$updated"
echo " Updated mission.json: session $lock_sid marked as crashed"
fi
session_lock_clear "$PROJECT"
echo " Cleared session.lock"
echo ""
echo -e "${C_GREEN}Lock cleared. Generate continuation prompt with: mosaic coord continue${C_RESET}"
fi
# ─── Generate resume prompt ─────────────────────────────────────────────────
if [[ "$CLEAN_LOCK" != true ]]; then
echo "---"
echo ""
echo -e "${C_BOLD}Resume Prompt (paste to new session):${C_RESET}"
echo ""
mission_name=""
mission_id=""
if [[ -f "$(mission_path "$PROJECT")" ]]; then
mission="$(load_mission "$PROJECT")"
mission_name="$(echo "$mission" | jq -r '.name')"
mission_id="$(echo "$mission" | jq -r '.mission_id')"
quality_gates="$(echo "$mission" | jq -r '.quality_gates // "—"')"
project_path="$(echo "$mission" | jq -r '.project_path')"
fi
task_counts="$(count_tasks_md "$PROJECT")"
tasks_done="$(echo "$task_counts" | jq '.done')"
tasks_total="$(echo "$task_counts" | jq '.total')"
next_task="$(find_next_task "$PROJECT")"
cat <<EOF
## Crash Recovery Mission
Recovering **${mission_name:-Unknown Mission}** from crashed session ${lock_sid:-unknown}.
### WARNING: Dirty State Detected
The previous session left uncommitted changes. Before continuing:
1. Run \`git diff\` to review uncommitted changes
2. Decide: commit (if good) or discard (if broken)
3. Then proceed with the mission
## Setup
- **Project:** ${project_path:-$PROJECT}
- **State:** docs/TASKS.md (${tasks_done}/${tasks_total} tasks complete)
- **Manifest:** docs/MISSION-MANIFEST.md
- **Scratchpad:** docs/scratchpads/${mission_id:-mission}.md
- **Protocol:** ~/.config/mosaic/guides/ORCHESTRATOR.md
- **Quality gates:** ${quality_gates:-—}
## Resume Point
- **Next task:** ${next_task:-check TASKS.md}
## Instructions
1. Read \`docs/MISSION-MANIFEST.md\` for mission scope
2. Read \`docs/scratchpads/${mission_id:-mission}.md\` for session history
3. Review and resolve any uncommitted changes first
4. Read \`docs/TASKS.md\` for current task state
5. Continue execution from the next pending task
6. You are the SOLE writer of \`docs/TASKS.md\`
EOF
fi