Implements the manual coordinator workflow for multi-session agent orchestration. Agents stop after one milestone (confirmed limitation); these tools let the human coordinator check status, generate continuation prompts, and chain sessions together. New: - tools/orchestrator/ — 5 scripts + shared library (_lib.sh) - mission-init.sh: initialize mission with milestones and state files - mission-status.sh: dashboard showing milestones, tasks, sessions - session-status.sh: check if agent is running/stale/dead - continue-prompt.sh: generate paste-ready continuation prompt - session-resume.sh: crash recovery with dirty state detection - guides/ORCHESTRATOR-PROTOCOL.md: agent-facing mission lifecycle guide - templates/docs/: mission manifest, scratchpad, continuation templates - templates/repo/.mosaic/orchestrator/mission.json: state file template Modified: - bin/mosaic: add 'coord' subcommand + resume advisory on launch - AGENTS.md: conditional loading for protocol guide + rule 37 - bin/mosaic-doctor: checks for new coordinator files - session hooks: mission detection on start, cleanup on end Usage: mosaic coord init|mission|status|continue|resume Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
209 lines
7.3 KiB
Bash
Executable File
209 lines
7.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
#
|
|
# session-resume.sh — Crash recovery for dead orchestrator sessions
|
|
#
|
|
# Usage:
|
|
# session-resume.sh [--project <path>] [--clean-lock]
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "$SCRIPT_DIR/_lib.sh"
|
|
|
|
# ─── Parse arguments ─────────────────────────────────────────────────────────
|
|
|
|
PROJECT="."
|
|
CLEAN_LOCK=false
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--project) PROJECT="$2"; shift 2 ;;
|
|
--clean-lock) CLEAN_LOCK=true; shift ;;
|
|
-h|--help)
|
|
echo "Usage: session-resume.sh [--project <path>] [--clean-lock]"
|
|
exit 0
|
|
;;
|
|
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
_require_jq
|
|
|
|
# ─── Check session lock ─────────────────────────────────────────────────────
|
|
|
|
lock_data=""
|
|
has_lock=false
|
|
if lock_data="$(session_lock_read "$PROJECT" 2>/dev/null)"; then
|
|
has_lock=true
|
|
fi
|
|
|
|
if [[ "$has_lock" == true ]]; then
|
|
lock_pid="$(echo "$lock_data" | jq -r '.pid // 0')"
|
|
lock_sid="$(echo "$lock_data" | jq -r '.session_id // "unknown"')"
|
|
lock_rt="$(echo "$lock_data" | jq -r '.runtime // "unknown"')"
|
|
lock_start="$(echo "$lock_data" | jq -r '.started_at // ""')"
|
|
lock_milestone="$(echo "$lock_data" | jq -r '.milestone_id // ""')"
|
|
|
|
if is_pid_alive "$lock_pid"; then
|
|
echo -e "${C_YELLOW}Session $lock_sid is still running (PID $lock_pid).${C_RESET}"
|
|
echo "Use 'mosaic coord status' to check session health."
|
|
exit 0
|
|
fi
|
|
|
|
# Session is dead
|
|
echo ""
|
|
echo -e "${C_RED}${C_BOLD}CRASH RECOVERY — Session $lock_sid ($lock_rt)${C_RESET}"
|
|
echo "==========================================="
|
|
echo ""
|
|
|
|
if [[ -n "$lock_start" ]]; then
|
|
echo -e " ${C_CYAN}Session started:${C_RESET} $lock_start"
|
|
fi
|
|
echo -e " ${C_CYAN}Session died:${C_RESET} PID $lock_pid is not running"
|
|
[[ -n "$lock_milestone" ]] && echo -e " ${C_CYAN}Active milestone:${C_RESET} $lock_milestone"
|
|
echo ""
|
|
|
|
else
|
|
# No lock — check mission.json for last session info
|
|
if [[ -f "$(mission_path "$PROJECT")" ]]; then
|
|
mission="$(load_mission "$PROJECT")"
|
|
session_count="$(echo "$mission" | jq '.sessions | length')"
|
|
if (( session_count > 0 )); then
|
|
last_idx=$(( session_count - 1 ))
|
|
last_sid="$(echo "$mission" | jq -r ".sessions[$last_idx].session_id")"
|
|
last_reason="$(echo "$mission" | jq -r ".sessions[$last_idx].ended_reason // \"unknown\"")"
|
|
echo -e "${C_DIM}No session lock found. Last session: $last_sid (ended: $last_reason)${C_RESET}"
|
|
echo "Use 'mosaic coord continue' to generate a continuation prompt."
|
|
exit 0
|
|
fi
|
|
fi
|
|
echo -e "${C_DIM}No session state found.${C_RESET}"
|
|
exit 4
|
|
fi
|
|
|
|
# ─── Detect dirty state ─────────────────────────────────────────────────────
|
|
|
|
echo -e "${C_BOLD}Dirty State:${C_RESET}"
|
|
|
|
dirty_files=""
|
|
if git -C "$PROJECT" rev-parse --is-inside-work-tree &>/dev/null; then
|
|
dirty_files="$(git -C "$PROJECT" status --porcelain 2>/dev/null || true)"
|
|
fi
|
|
|
|
if [[ -n "$dirty_files" ]]; then
|
|
echo " Modified files:"
|
|
echo "$dirty_files" | head -20 | while IFS= read -r line; do
|
|
echo " $line"
|
|
done
|
|
file_count="$(echo "$dirty_files" | wc -l)"
|
|
if (( file_count > 20 )); then
|
|
echo " ... and $(( file_count - 20 )) more"
|
|
fi
|
|
else
|
|
echo -e " ${C_GREEN}Working tree is clean.${C_RESET}"
|
|
fi
|
|
|
|
# Check for in-progress tasks
|
|
inprog_count=0
|
|
task_counts="$(count_tasks_md "$PROJECT")"
|
|
inprog_count="$(echo "$task_counts" | jq '.in_progress')"
|
|
if (( inprog_count > 0 )); then
|
|
echo -e " ${C_YELLOW}$inprog_count task(s) still marked in-progress in TASKS.md${C_RESET}"
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# ─── Recovery actions ────────────────────────────────────────────────────────
|
|
|
|
echo -e "${C_BOLD}Recovery Actions:${C_RESET}"
|
|
if [[ -n "$dirty_files" ]]; then
|
|
echo " 1. Review changes: git diff"
|
|
echo " 2. If good: git add -A && git commit -m \"wip: partial work from crashed session\""
|
|
echo " 3. If bad: git checkout ."
|
|
fi
|
|
echo " 4. Clean lock: mosaic coord resume --clean-lock"
|
|
echo " 5. Generate prompt: mosaic coord continue"
|
|
echo ""
|
|
|
|
# ─── Clean lock if requested ─────────────────────────────────────────────────
|
|
|
|
if [[ "$CLEAN_LOCK" == true ]]; then
|
|
echo -e "${C_CYAN}Cleaning session lock...${C_RESET}"
|
|
|
|
# Update mission.json with crash info
|
|
mp="$(mission_path "$PROJECT")"
|
|
if [[ -f "$mp" && "$has_lock" == true ]]; then
|
|
updated="$(jq \
|
|
--arg sid "$lock_sid" \
|
|
--arg ts "$(iso_now)" \
|
|
'(.sessions[] | select(.session_id == $sid)) |= . + {
|
|
ended_at: $ts,
|
|
ended_reason: "crashed"
|
|
}' "$mp")"
|
|
write_json "$mp" "$updated"
|
|
echo " Updated mission.json: session $lock_sid marked as crashed"
|
|
fi
|
|
|
|
session_lock_clear "$PROJECT"
|
|
echo " Cleared session.lock"
|
|
echo ""
|
|
echo -e "${C_GREEN}Lock cleared. Generate continuation prompt with: mosaic coord continue${C_RESET}"
|
|
fi
|
|
|
|
# ─── Generate resume prompt ─────────────────────────────────────────────────
|
|
|
|
if [[ "$CLEAN_LOCK" != true ]]; then
|
|
echo "---"
|
|
echo ""
|
|
echo -e "${C_BOLD}Resume Prompt (paste to new session):${C_RESET}"
|
|
echo ""
|
|
|
|
mission_name=""
|
|
mission_id=""
|
|
if [[ -f "$(mission_path "$PROJECT")" ]]; then
|
|
mission="$(load_mission "$PROJECT")"
|
|
mission_name="$(echo "$mission" | jq -r '.name')"
|
|
mission_id="$(echo "$mission" | jq -r '.mission_id')"
|
|
quality_gates="$(echo "$mission" | jq -r '.quality_gates // "—"')"
|
|
project_path="$(echo "$mission" | jq -r '.project_path')"
|
|
fi
|
|
|
|
task_counts="$(count_tasks_md "$PROJECT")"
|
|
tasks_done="$(echo "$task_counts" | jq '.done')"
|
|
tasks_total="$(echo "$task_counts" | jq '.total')"
|
|
next_task="$(find_next_task "$PROJECT")"
|
|
|
|
cat <<EOF
|
|
## Crash Recovery Mission
|
|
|
|
Recovering **${mission_name:-Unknown Mission}** from crashed session ${lock_sid:-unknown}.
|
|
|
|
### WARNING: Dirty State Detected
|
|
The previous session left uncommitted changes. Before continuing:
|
|
1. Run \`git diff\` to review uncommitted changes
|
|
2. Decide: commit (if good) or discard (if broken)
|
|
3. Then proceed with the mission
|
|
|
|
## Setup
|
|
|
|
- **Project:** ${project_path:-$PROJECT}
|
|
- **State:** docs/TASKS.md (${tasks_done}/${tasks_total} tasks complete)
|
|
- **Manifest:** docs/MISSION-MANIFEST.md
|
|
- **Scratchpad:** docs/scratchpads/${mission_id:-mission}.md
|
|
- **Protocol:** ~/.config/mosaic/guides/ORCHESTRATOR.md
|
|
- **Quality gates:** ${quality_gates:-—}
|
|
|
|
## Resume Point
|
|
|
|
- **Next task:** ${next_task:-check TASKS.md}
|
|
|
|
## Instructions
|
|
|
|
1. Read \`docs/MISSION-MANIFEST.md\` for mission scope
|
|
2. Read \`docs/scratchpads/${mission_id:-mission}.md\` for session history
|
|
3. Review and resolve any uncommitted changes first
|
|
4. Read \`docs/TASKS.md\` for current task state
|
|
5. Continue execution from the next pending task
|
|
6. You are the SOLE writer of \`docs/TASKS.md\`
|
|
EOF
|
|
fi
|