feat(agent-reflection): durable kernel — reflection.v1 capture + risk-floor + Phase-0 (#545)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful

This commit was merged in pull request #545.
This commit is contained in:
2026-06-16 21:35:40 +00:00
parent c461380a4a
commit b8807e60df
17 changed files with 1498 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env bash
# reflect-calibration.sh — Phase-0 experiment P1 (confidence signal)
#
# Question: does an agent's self-reported confidence discriminate correct from
# incorrect work — especially on the self-rated-HIGH subset, where a closed
# loop would actually trust it? If confidence ≈ chance on the high subset, the
# signal is useless and design §7§8 should not be built.
#
# Method: consume a labelled corpus — JSONL of {confidence: 0..1, correct:
# true|false}. Compute discrimination as ROC AUC over all rows, plus the
# correct-rate (lift) on the high-confidence subset (>= threshold), and compare
# to the pre-registered chance baseline (the overall correct-rate). HARNESS +
# RUBRIC; the labelled corpus is supplied later.
#
# Usage:
# scripts/analysis/reflect-calibration.sh --jsonl FILE [--high 0.8] [--json|--md]
#
# Requirements: jq, awk.
#
# PRE-REGISTERED KILL CONDITION:
# AUC <= 0.60 OR high-subset lift <= +5pp over base rate
# ⇒ confidence is not a usable routing signal; do NOT build §7§8.
set -euo pipefail
JSONL=""
HIGH=0.8
FORMAT="json"
while [[ $# -gt 0 ]]; do
case "$1" in
--jsonl) JSONL="$2"; shift 2 ;;
--high) HIGH="$2"; shift 2 ;;
--json) FORMAT="json"; shift ;;
--md) FORMAT="md"; shift ;;
-h|--help) sed -n '2,27p' "$0"; exit 0 ;;
*) echo "unknown arg: $1" >&2; exit 2 ;;
esac
done
KILL_CONDITION='AUC <= 0.60 OR high-subset lift <= +5pp ⇒ do NOT build §7§8'
echo "# pre-registered kill condition: ${KILL_CONDITION}" >&2
command -v jq >/dev/null 2>&1 || { echo "jq required" >&2; exit 3; }
[[ -r "$JSONL" ]] || { echo "provide a readable --jsonl FILE" >&2; exit 2; }
# Normalise to "<confidence> <0|1>" rows; tolerate bad lines.
ROWS="$(jq -rs '
[ .[] | select((.confidence|type)=="number") |
"\(.confidence) \((.correct==true) | if . then 1 else 0 end)" ]
| .[]' "$JSONL" 2>/dev/null || true)"
if [[ -z "$ROWS" ]]; then
echo '{ "experiment": "P1-calibration", "error": "no usable rows" }'
exit 0
fi
# AUC via the MannWhitney U relation (rank-based); base rate; high-subset lift.
read -r N POS BASE AUC HIGH_N HIGH_CORRECT HIGH_RATE LIFT <<EOF
$(printf '%s\n' "$ROWS" | awk -v high="$HIGH" '
{ c=$1; y=$2; conf[NR]=c; lab[NR]=y; n++;
if (y==1) pos++; else neg++;
if (c>=high) { hn++; if (y==1) hc++ } }
END{
base = (n>0)? pos/n : 0;
# Rank-sum AUC: average ranks (ties → average rank).
# sort indices by confidence
for (i=1;i<=n;i++) idx[i]=i;
for (i=1;i<=n;i++) for (j=i+1;j<=n;j++) if (conf[idx[i]]>conf[idx[j]]) { t=idx[i]; idx[i]=idx[j]; idx[j]=t }
i=1;
while (i<=n) {
j=i; while (j<n && conf[idx[j+1]]==conf[idx[i]]) j++;
avg=(i+j)/2.0;
for (k=i;k<=j;k++) rank[idx[k]]=avg;
i=j+1;
}
rsum=0; for (i=1;i<=n;i++) if (lab[i]==1) rsum+=rank[i];
if (pos>0 && neg>0) auc=(rsum - pos*(pos+1)/2.0)/(pos*neg); else auc=0.5;
hrate=(hn>0)? hc/hn : 0;
lift=hrate-base;
printf "%d %d %.4f %.4f %d %d %.4f %.4f", n, pos, base, auc, hn, hc, hrate, lift
}')
EOF
verdict="$(awk -v auc="$AUC" -v lift="$LIFT" 'BEGIN{
print (auc <= 0.60 || lift <= 0.05) ? "KILL §7§8 — confidence not usable" : "signal present — proceed"
}')"
if [[ "$FORMAT" == "md" ]]; then
cat <<EOF
## P1 — confidence calibration
- rows: **${N}** (positives ${POS}) · base correct-rate **$(awk "BEGIN{printf \"%.1f\", 100*${BASE}}")%**
- ROC AUC: **${AUC}**
- high-confidence subset (>= ${HIGH}): n=${HIGH_N}, correct=${HIGH_CORRECT}, rate=$(awk "BEGIN{printf \"%.1f\", 100*${HIGH_RATE}}")%
- lift over base: **$(awk "BEGIN{printf \"%+.1f\", 100*${LIFT}}")pp**
- kill condition: ${KILL_CONDITION}
- verdict: **${verdict}**
EOF
else
awk -v n="$N" -v pos="$POS" -v base="$BASE" -v auc="$AUC" -v hn="$HIGH_N" \
-v hc="$HIGH_CORRECT" -v hr="$HIGH_RATE" -v lift="$LIFT" -v high="$HIGH" \
-v v="$verdict" -v kc="$KILL_CONDITION" 'BEGIN{
printf "{\n"
printf " \"experiment\": \"P1-calibration\",\n"
printf " \"rows\": %d,\n", n
printf " \"positives\": %d,\n", pos
printf " \"base_rate\": %.4f,\n", base
printf " \"auc\": %.4f,\n", auc
printf " \"high_threshold\": %s,\n", high
printf " \"high_subset\": { \"n\": %d, \"correct\": %d, \"rate\": %.4f },\n", hn, hc, hr
printf " \"lift_over_base\": %.4f,\n", lift
printf " \"kill_condition\": \"%s\",\n", kc
printf " \"verdict\": \"%s\"\n", v
printf "}\n"
}'
fi