stack/scripts/analysis/reflect-board-history.sh

#!/usr/bin/env bash
# reflect-board-history.sh — Phase-0 experiment P3 (outcome detectability)
#
# Question: for completed tasks, how often does a machine-detectable
# correct/wrong outcome signal appear within a follow-up window (default 30d)?
# If the base rate is too low, predicted-vs-actual calibration (design §7) has
# nothing to score against, so the kernel should capture caveat-notes only.
#
# Method: consume a board/task export (JSONL, one task object per line) OR fall
# back to scanning the git history of a `data/` task directory. For each task
# that reached a "done"-like state, decide whether a later signal marks it
# correct or wrong (reopen, revert, follow-up "fix"/"regression", explicit
# outcome field). Emit the detectable-outcome base rate. HARNESS + RUBRIC.
#
# Usage:
#   scripts/analysis/reflect-board-history.sh --jsonl FILE [--window-days N] [--json|--md]
#   scripts/analysis/reflect-board-history.sh --data-dir DIR [--window-days N] [--json|--md]
#
# JSONL fields used (best-effort): .id .status .completed_at .outcome
#   .reopened_at .followups[] (free-form). Missing fields are tolerated.
#
# Requirements: jq (for --jsonl), git (for --data-dir), awk.
#
# PRE-REGISTERED KILL CONDITION:
#   detectable-outcome base rate < 20% ⇒ do NOT build §7 calibration loop;
#   capture caveat-notes only.

set -euo pipefail

JSONL=""
DATA_DIR=""
WINDOW_DAYS=30
FORMAT="json"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --jsonl) JSONL="$2"; shift 2 ;;
    --data-dir) DATA_DIR="$2"; shift 2 ;;
    --window-days) WINDOW_DAYS="$2"; shift 2 ;;
    --json) FORMAT="json"; shift ;;
    --md) FORMAT="md"; shift ;;
    -h|--help) sed -n '2,32p' "$0"; exit 0 ;;
    *) echo "unknown arg: $1" >&2; exit 2 ;;
  esac
done

KILL_CONDITION='detectable-outcome base rate < 20% ⇒ do NOT build §7 calibration loop'
echo "# pre-registered kill condition: ${KILL_CONDITION}" >&2

done_total=0
detectable=0

if [[ -n "$JSONL" ]]; then
  command -v jq >/dev/null 2>&1 || { echo "jq required for --jsonl" >&2; exit 3; }
  [[ -r "$JSONL" ]] || { echo "cannot read $JSONL" >&2; exit 3; }
  # Count done tasks and those with a machine-detectable outcome signal.
  done_total="$(jq -rs '[.[] | select((.status // "") | test("done|complete|closed"; "i"))] | length' "$JSONL" 2>/dev/null || echo 0)"
  detectable="$(jq -rs '
    [ .[]
      | select((.status // "") | test("done|complete|closed"; "i"))
      | select(
          (.outcome // null) != null
          or (.reopened_at // null) != null
          or ((.followups // []) | length) > 0
        )
    ] | length' "$JSONL" 2>/dev/null || echo 0)"
elif [[ -n "$DATA_DIR" ]]; then
  command -v git >/dev/null 2>&1 || { echo "git required for --data-dir" >&2; exit 3; }
  [[ -d "$DATA_DIR" ]] || { echo "no such dir: $DATA_DIR" >&2; exit 3; }
  # Proxy: a task file later touched by a commit whose subject signals a
  # correction is a "detectable outcome".
  while IFS= read -r file; do
    [[ -z "$file" ]] && continue
    done_total=$((done_total + 1))
    if git -C "$DATA_DIR" log --since="${WINDOW_DAYS} days ago" --pretty='%s' -- "$file" 2>/dev/null \
         | grep -qiE 'reopen|revert|fix|regression|wrong|incorrect|redo'; then
      detectable=$((detectable + 1))
    fi
  done < <(find "$DATA_DIR" -type f -name '*.json' 2>/dev/null)
else
  echo "provide --jsonl FILE or --data-dir DIR" >&2
  exit 2
fi

rate="$(awk "BEGIN{ if ($done_total==0) print \"0.0\"; else printf \"%.1f\", 100*$detectable/$done_total }")"
verdict="$(awk "BEGIN{print ($rate < 20.0) ? \"KILL §7 — caveat-notes only\" : \"signal present — proceed\"}")"

if [[ "$FORMAT" == "md" ]]; then
  cat <<EOF
## P3 — outcome detectability

- done-like tasks: **${done_total}**
- with machine-detectable outcome (window ${WINDOW_DAYS}d): **${detectable}**
- base rate: **${rate}%**
- kill condition: ${KILL_CONDITION}
- verdict: **${verdict}**
EOF
else
  awk -v dt="$done_total" -v d="$detectable" -v r="$rate" -v w="$WINDOW_DAYS" \
      -v v="$verdict" -v kc="$KILL_CONDITION" 'BEGIN{
    printf "{\n"
    printf "  \"experiment\": \"P3-board-history\",\n"
    printf "  \"window_days\": %d,\n", w
    printf "  \"done_tasks\": %d,\n", dt
    printf "  \"detectable_outcomes\": %d,\n", d
    printf "  \"base_rate_pct\": %s,\n", r
    printf "  \"kill_condition\": \"%s\",\n", kc
    printf "  \"verdict\": \"%s\"\n", v
    printf "}\n"
  }'
fi