#!/usr/bin/env bash # reflect-board-history.sh — Phase-0 experiment P3 (outcome detectability) # # Question: for completed tasks, how often does a machine-detectable # correct/wrong outcome signal appear within a follow-up window (default 30d)? # If the base rate is too low, predicted-vs-actual calibration (design §7) has # nothing to score against, so the kernel should capture caveat-notes only. # # Method: consume a board/task export (JSONL, one task object per line) OR fall # back to scanning the git history of a `data/` task directory. For each task # that reached a "done"-like state, decide whether a later signal marks it # correct or wrong (reopen, revert, follow-up "fix"/"regression", explicit # outcome field). Emit the detectable-outcome base rate. HARNESS + RUBRIC. # # Usage: # scripts/analysis/reflect-board-history.sh --jsonl FILE [--window-days N] [--json|--md] # scripts/analysis/reflect-board-history.sh --data-dir DIR [--window-days N] [--json|--md] # # JSONL fields used (best-effort): .id .status .completed_at .outcome # .reopened_at .followups[] (free-form). Missing fields are tolerated. # # Requirements: jq (for --jsonl), git (for --data-dir), awk. # # PRE-REGISTERED KILL CONDITION: # detectable-outcome base rate < 20% ⇒ do NOT build §7 calibration loop; # capture caveat-notes only. set -euo pipefail JSONL="" DATA_DIR="" WINDOW_DAYS=30 FORMAT="json" while [[ $# -gt 0 ]]; do case "$1" in --jsonl) JSONL="$2"; shift 2 ;; --data-dir) DATA_DIR="$2"; shift 2 ;; --window-days) WINDOW_DAYS="$2"; shift 2 ;; --json) FORMAT="json"; shift ;; --md) FORMAT="md"; shift ;; -h|--help) sed -n '2,32p' "$0"; exit 0 ;; *) echo "unknown arg: $1" >&2; exit 2 ;; esac done KILL_CONDITION='detectable-outcome base rate < 20% ⇒ do NOT build §7 calibration loop' echo "# pre-registered kill condition: ${KILL_CONDITION}" >&2 done_total=0 detectable=0 if [[ -n "$JSONL" ]]; then command -v jq >/dev/null 2>&1 || { echo "jq required for --jsonl" >&2; exit 3; } [[ -r "$JSONL" ]] || { echo "cannot read $JSONL" >&2; exit 3; } # Count done tasks and those with a machine-detectable outcome signal. done_total="$(jq -rs '[.[] | select((.status // "") | test("done|complete|closed"; "i"))] | length' "$JSONL" 2>/dev/null || echo 0)" detectable="$(jq -rs ' [ .[] | select((.status // "") | test("done|complete|closed"; "i")) | select( (.outcome // null) != null or (.reopened_at // null) != null or ((.followups // []) | length) > 0 ) ] | length' "$JSONL" 2>/dev/null || echo 0)" elif [[ -n "$DATA_DIR" ]]; then command -v git >/dev/null 2>&1 || { echo "git required for --data-dir" >&2; exit 3; } [[ -d "$DATA_DIR" ]] || { echo "no such dir: $DATA_DIR" >&2; exit 3; } # Proxy: a task file later touched by a commit whose subject signals a # correction is a "detectable outcome". while IFS= read -r file; do [[ -z "$file" ]] && continue done_total=$((done_total + 1)) if git -C "$DATA_DIR" log --since="${WINDOW_DAYS} days ago" --pretty='%s' -- "$file" 2>/dev/null \ | grep -qiE 'reopen|revert|fix|regression|wrong|incorrect|redo'; then detectable=$((detectable + 1)) fi done < <(find "$DATA_DIR" -type f -name '*.json' 2>/dev/null) else echo "provide --jsonl FILE or --data-dir DIR" >&2 exit 2 fi rate="$(awk "BEGIN{ if ($done_total==0) print \"0.0\"; else printf \"%.1f\", 100*$detectable/$done_total }")" verdict="$(awk "BEGIN{print ($rate < 20.0) ? \"KILL §7 — caveat-notes only\" : \"signal present — proceed\"}")" if [[ "$FORMAT" == "md" ]]; then cat <