feat(framework): P3 — extract Constitution (L0) + gut AGENTS dispatcher (#575)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful

Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
This commit was merged in pull request #575.
This commit is contained in:
2026-06-21 03:20:32 +00:00
committed by jason.woltje
parent bf24066a49
commit 5118be74cb
14 changed files with 260 additions and 174 deletions

View File

@@ -2,24 +2,27 @@
# verify-sanitized.sh — blocking CI gate: the public framework package must
# contain no operator-specific personal data or private executable defaults.
#
# Two rule classes:
# 1. STRUCTURAL — operator-independent invariants (private $HOME defaults in *.sh).
# 2. DENYLIST — a LABELED, one-time regression guard for the CURRENT operator's
# identity tokens. This is NOT a general PII detector (a future
# operator's name can't be enumerated); the durable control is the
# L0 prose firewall + human review. This gate just stops *this*
# contamination from coming back.
# Two rule classes, with DELIBERATELY DIFFERENT scopes:
# 1. DENYLIST (identity) — a LABELED, one-time regression guard for the CURRENT
# operator's identity tokens. Scanned EVERYWHERE including examples/, because a
# jarvis/jason/private-home regression in a SHIPPED example would break the
# open-source guarantee just as badly as one in a default. NOT a general PII
# detector (a future operator's name can't be enumerated) — the durable control
# is the L0 framework-PR firewall + human review; this just stops re-contamination.
# 2. STRUCTURAL (private $HOME default in *.sh) — scanned everywhere EXCEPT examples/,
# because worked example overlays/personas legitimately show placeholder paths.
#
# Scope: all of the framework package — *.md, *.sh, *.ps1, and the CLI scripts under
# tools/_scripts/ (which are extensionless). Excluded: examples/ (holds
# sanitized, placeholdered worked examples), node_modules/, and this gate file.
# File types: *.md, *.sh, *.ps1, *.json, and the extensionless CLI scripts under
# tools/_scripts/. Excludes node_modules/ and this gate file.
#
# NOTE on scope: private THIRD-PARTY host references (e.g. a maintainer's employer
# Gitea) are intentionally NOT in this denylist — they are functionally entangled in
# host-routing + test fixtures and are tracked as a separate follow-up.
# NOTE: '\bPDA\b' intentionally matches "PDA-friendly" (the contamination removed in P2);
# a hyphen is not a \b word boundary on the right, so "PDA-foo" matches. If a future
# legitimate doc needs the literal token "PDA" in a non-personal sense, reword it or
# narrow this rule — do not weaken the gate silently.
#
# Self-tests run first: plant known tokens and assert the scan catches them, so a
# broken regex cannot silently no-op the gate.
# NOTE: private THIRD-PARTY host refs (e.g. a maintainer's employer Gitea) are NOT in
# this denylist — they are functionally entangled in host-routing + test fixtures and
# tracked as a separate follow-up.
#
# Usage: verify-sanitized.sh [FRAMEWORK_ROOT]
set -uo pipefail
@@ -28,59 +31,55 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
FRAMEWORK_ROOT="${1:-$(cd "$SCRIPT_DIR/../../.." && pwd)}"
SELF_REL="tools/quality/scripts/verify-sanitized.sh"
# Labeled current-contaminant denylist. Anchored so substrings like "comparison" or
# "jsonwebtoken" do not match. (jarvis-brain is caught by 'jarvis'.)
DENYLIST='jarvis|jason|woltje|brain\.woltje\.com|/home/jwoltje|\bPDA\b'
# Structural: a private $HOME path used as a shell default (e.g. ${VAR:-$HOME/src/...}).
STRUCTURAL_SH=':[-=]\$\{?HOME\}?/src/'
# Build the in-scope file list once (NUL-delimited).
_scope_files() {
find "$FRAMEWORK_ROOT" -type f \
\( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -path '*/tools/_scripts/*' \) \
-not -path '*/examples/*' \
-not -path '*/node_modules/*' \
-not -path "*/$SELF_REL" \
-print0
}
fail=0
cd "$FRAMEWORK_ROOT" || { echo "FRAMEWORK_ROOT not found: $FRAMEWORK_ROOT" >&2; exit 3; }
deny_hits="$(_scope_files | xargs -0 -r grep -nIEi "$DENYLIST" 2>/dev/null || true)"
if [[ -n "$deny_hits" ]]; then
echo "✗ [denylist] operator-identity tokens in shipped files:"
echo "$deny_hits" | sed "s#$FRAMEWORK_ROOT/##; s/^/ /"
fail=1
fi
# Identity scope = ALL shipped text files (examples/ INCLUDED).
_files_identity() {
find . -type f \
\( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -name '*.json' -o -path '*/tools/_scripts/*' \) \
-not -path '*/node_modules/*' -not -path "./$SELF_REL" -print0
}
# Structural scope = shipped scripts, examples/ EXCLUDED.
_files_structural() {
find . -type f \( -name '*.sh' -o -path '*/tools/_scripts/*' \) \
-not -path '*/examples/*' -not -path '*/node_modules/*' -not -path "./$SELF_REL" -print0
}
struct_hits="$(_scope_files | xargs -0 -r grep -nIE "$STRUCTURAL_SH" 2>/dev/null \
| grep -E '\.sh:|/tools/_scripts/' || true)"
if [[ -n "$struct_hits" ]]; then
echo "✗ [structural] private \$HOME/src default in a shipped script:"
echo "$struct_hits" | sed "s#$FRAMEWORK_ROOT/##; s/^/ /"
fail=1
fi
# ---- self-test: the gate must catch planted tokens ----
# ---- self-test FIRST: a broken regex must never silently no-op the gate ----
_selftest() {
local tmp; tmp="$(mktemp -d)" || return 1
printf 'contact jason.woltje at jarvis-brain (PDA note)\n' > "$tmp/planted.md"
printf 'contact jason.woltje at jarvis-brain (PDA-friendly)\n' > "$tmp/planted.md"
printf 'X="${VAR:-$HOME/src/whatever/x.json}"\n' > "$tmp/planted.sh"
local ok=0
grep -qIEi "$DENYLIST" "$tmp/planted.md" || { echo "✗ SELF-TEST: denylist regex broken" >&2; ok=1; }
grep -qIE "$STRUCTURAL_SH" "$tmp/planted.sh" || { echo "✗ SELF-TEST: structural regex broken" >&2; ok=1; }
rm -rf "$tmp"
return $ok
local rc=0
grep -qIEi "$DENYLIST" "$tmp/planted.md" || { echo "✗ SELF-TEST: identity denylist regex broken" >&2; rc=1; }
grep -qIE "$STRUCTURAL_SH" "$tmp/planted.sh" || { echo "✗ SELF-TEST: structural regex broken" >&2; rc=1; }
rm -rf "$tmp"; return $rc
}
_selftest || exit 2
fail=0
deny_hits="$(_files_identity | xargs -0 -r grep -nIEi "$DENYLIST" 2>/dev/null || true)"
if [[ -n "$deny_hits" ]]; then
echo "✗ [denylist] operator-identity tokens in shipped files (examples/ included):"
echo "$deny_hits" | sed "s#^\./##; s/^/ /"
fail=1
fi
struct_hits="$(_files_structural | xargs -0 -r grep -nIE "$STRUCTURAL_SH" 2>/dev/null || true)"
if [[ -n "$struct_hits" ]]; then
echo "✗ [structural] private \$HOME/src default in a shipped script:"
echo "$struct_hits" | sed "s#^\./##; s/^/ /"
fail=1
fi
if [[ "$fail" -ne 0 ]]; then
echo
echo "Sanitization gate FAILED. Public framework files must not contain operator identity" >&2
echo "or private \$HOME defaults. Move personal content to init-generated files or examples/." >&2
echo "or private \$HOME defaults. Move personal content to init-generated files or genericize." >&2
exit 1
fi
echo "✓ sanitization gate passed (framework *.md/*.sh/*.ps1/_scripts; examples/ excluded)"
echo "✓ sanitization gate passed (identity scan incl. examples/; structural scan excl. examples/)"