#!/usr/bin/env bash # reflect-stop-hook.sh — Stop hook (agent reflection loop, durable kernel) # # At end-of-run, capture the doer's end-state as a structured `reflection.v1` # sidecar: the mechanical diff risk-floor plus any self-report the agent left # behind. This is the passive capture half of the design (§10 step 1). It does # NOT route, score, or gate — it only writes the sidecar; pickup is future work. # # FAIL-CLOSED: if REFLECTION_MODE is unset or "off", this is a strict no-op. # Global registration is therefore safe; the feature only activates when a # launcher/profile explicitly sets REFLECTION_MODE=solo|orchestrated. # # NON-BLOCKING: Stop hooks are observational. This script NEVER emits a # `decision` field and ALWAYS exits 0 — it can never fail or stall a session. # # Environment contract: # REFLECTION_MODE off|solo|orchestrated (default: off → no-op) # REFLECTION_DIR output dir (default: /.mosaic/reflections) # REFLECTION_INPUT self-report JSON (default: /.mosaic/reflection-input.json) # REFLECTION_TASK_REF canonical task ref (default: #) # REFLECTION_AGENT persona/runtime id (default: unknown) # REFLECTION_RISK_THRESHOLD review cutoff [0,1] (default: 0.5) # # Risk-floor surface table is kept in sync with the authoritative TS # implementation at packages/macp/src/risk-floor.ts (evaluateRiskFloor). # # Exit codes: always 0 (observational hook). set -euo pipefail # ---- fail-closed gate ------------------------------------------------------- MODE="${REFLECTION_MODE:-off}" if [[ "$MODE" != "solo" && "$MODE" != "orchestrated" ]]; then exit 0 fi # Read the Stop payload (best-effort; never required). INPUT="$(cat || true)" # Sentinel lock path (global so the EXIT trap can clean it after main returns). LOCKFILE="" trap 'rm -f "${LOCKFILE:-}" 2>/dev/null || true' EXIT main() { command -v jq >/dev/null 2>&1 || return 0 # no jq → silently no-op local session_id payload_cwd repo_dir repo_name branch task_ref agent session_id="$(printf '%s' "$INPUT" | jq -r '.session_id // "unknown"' 2>/dev/null || echo unknown)" # Sanitize: session_id is interpolated into file/lock paths — allow safe # filename chars only (defends against ../ or / in the payload). session_id="${session_id//[^a-zA-Z0-9_-]/}" session_id="${session_id:-unknown}" payload_cwd="$(printf '%s' "$INPUT" | jq -r '.cwd // empty' 2>/dev/null || true)" # Resolve repo root: prefer git toplevel from the payload cwd, else PWD. local start_dir="${payload_cwd:-$PWD}" repo_dir="$(git -C "$start_dir" rev-parse --show-toplevel 2>/dev/null || echo "$start_dir")" repo_name="$(basename "$repo_dir")" branch="$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD 2>/dev/null || echo detached)" task_ref="${REFLECTION_TASK_REF:-${repo_name}#${branch}}" agent="${REFLECTION_AGENT:-unknown}" # ---- sentinel guard: avoid re-fire loops -------------------------------- local out_dir lock out_dir="${REFLECTION_DIR:-${repo_dir}/.mosaic/reflections}" mkdir -p "$out_dir" 2>/dev/null || return 0 lock="${out_dir}/.${session_id}.lock" if [[ -e "$lock" ]]; then return 0 fi : > "$lock" 2>/dev/null || true LOCKFILE="$lock" # ---- mechanical: changed files ------------------------------------------ # Union of committed-vs-HEAD~ is out of scope; capture the working surface: # staged + unstaged + untracked, best-effort. # Exclude .mosaic/ (agent scratch: reflections, locks, self-report input) — # it is tooling state, not part of the diff under review. local files files="$( { git -C "$repo_dir" diff --name-only HEAD 2>/dev/null || true git -C "$repo_dir" diff --name-only --staged 2>/dev/null || true git -C "$repo_dir" ls-files --others --exclude-standard 2>/dev/null || true } | sed '/^$/d' | grep -v '^\.mosaic/' | sort -u || true )" # ---- mechanical: risk-floor (inline port of evaluateRiskFloor) ---------- local threshold="${REFLECTION_RISK_THRESHOLD:-0.5}" local top_surface="none" top_weight="0.0" tripping="" local f surface weight while IFS= read -r f; do [[ -z "$f" ]] && continue surface="$(classify_surface "$f")" weight="$(surface_weight "$surface")" if awk "BEGIN{exit !($weight > $top_weight)}"; then top_weight="$weight"; top_surface="$surface"; tripping="$f" elif [[ "$surface" == "$top_surface" && "$surface" != "none" ]] && awk "BEGIN{exit !($weight == $top_weight)}"; then tripping="${tripping:+$tripping, }$f" fi done <<< "$files" local needs_review reason file_count file_count="$(printf '%s\n' "$files" | sed '/^$/d' | wc -l | tr -d ' ')" if awk "BEGIN{exit !($top_weight >= $threshold)}"; then needs_review=true; else needs_review=false; fi if [[ "$top_surface" == "none" ]]; then if [[ "$file_count" -eq 0 ]]; then reason="no files changed"; else reason="no sensitive surface in ${file_count} changed file(s)"; fi else reason="${top_surface} surface (weight ${top_weight}) in: ${tripping}" fi # ---- self-report merge (optional) --------------------------------------- local input_file degraded self_json input_file="${REFLECTION_INPUT:-${repo_dir}/.mosaic/reflection-input.json}" degraded=true self_json='{"confidence":null,"most_likely_wrong":null,"known_not_in_diff":null}' if [[ -r "$input_file" ]] && jq -e . "$input_file" >/dev/null 2>&1; then self_json="$(jq '{ confidence: (.confidence // null), most_likely_wrong: (.most_likely_wrong // null), known_not_in_diff: (.known_not_in_diff // null) }' "$input_file" 2>/dev/null || echo "$self_json")" degraded=false fi # ---- assemble + atomic write -------------------------------------------- local ts files_json record tmp final ts="$(date -u +%Y-%m-%dT%H:%M:%S.000Z)" files_json="$(printf '%s\n' "$files" | jq -R . | jq -s 'map(select(length>0))')" record="$(jq -n \ --arg task_ref "$task_ref" \ --arg agent "$agent" \ --arg session_id "$session_id" \ --arg ts "$ts" \ --arg repo "$repo_name" \ --argjson needs_review "$needs_review" \ --argjson score "$top_weight" \ --arg surface "$top_surface" \ --arg reason "$reason" \ --argjson files "$files_json" \ --argjson self "$self_json" \ --argjson degraded "$degraded" \ --arg mode "$MODE" \ '{ schema: "reflection.v1", task_ref: $task_ref, agent: $agent, session_id: $session_id, timestamp: $ts, repo: $repo, confidence: $self.confidence, most_likely_wrong: $self.most_likely_wrong, known_not_in_diff: $self.known_not_in_diff, risk: { needs_review: $needs_review, score: $score, surface: $surface, reason: $reason }, files_changed: $files, provenance: { source: "stop-hook", reflection_attempt: 1, degraded: $degraded, reflection_mode: $mode } }' 2>/dev/null || true)" [[ -z "$record" ]] && return 0 final="${out_dir}/${session_id}-${ts//[:]/}.reflection.json" tmp="${final}.tmp" printf '%s\n' "$record" > "$tmp" 2>/dev/null || return 0 mv -f "$tmp" "$final" 2>/dev/null || true } # classify_surface PATH → surface name (highest-risk match wins, mirrors TS) classify_surface() { local p="$1" if printf '%s' "$p" | grep -qiE 'auth|login|session|token|permission|rbac|credential|secret'; then echo auth; return; fi if printf '%s' "$p" | grep -qiE 'migration|prisma|schema|\.sql|entity|repository|seed'; then echo data; return; fi if printf '%s' "$p" | grep -qiE 'docker|\.woodpecker|compose|traefik|deploy|helm|k8s|terraform'; then echo infra; return; fi if printf '%s' "$p" | grep -qiE 'package\.json|tsconfig|turbo\.json|pnpm-|\.config\.|eslint|vite'; then echo build; return; fi if printf '%s' "$p" | grep -qE '\.tsx|\.css|components/|apps/web/'; then echo ui; return; fi if printf '%s' "$p" | grep -qE '\.spec\.|\.test\.|__tests__/'; then echo test; return; fi if printf '%s' "$p" | grep -qE '\.md$|docs/'; then echo docs; return; fi echo none } # surface_weight SURFACE → numeric weight (mirrors TS SURFACE_RULES) surface_weight() { case "$1" in auth) echo 1.0 ;; data) echo 0.9 ;; infra) echo 0.85 ;; build) echo 0.6 ;; ui) echo 0.4 ;; test) echo 0.2 ;; docs) echo 0.1 ;; *) echo 0.0 ;; esac } main || true exit 0