Adds tools/quality/scripts/verify-sanitized.sh (two-class, self-tested) wired
blocking in .woodpecker/ci.yml; sanitizes operator identity from the public
framework package so the gate is green.
- purge jarvis/jason/woltje/PDA across 26 files -> generic
- delete jarvis-loop.json overlay; add neutral examples/{personas,overlays}
- relocate maintainer AUDIT to docs/audits/; delete 2 jarvis-brain rule blocks
- neutralize SOUL persona; strip "(Policy: Jason ...)" keeping universal rule
- test fixtures jason.woltje -> ci-bot (both git tests pass)
Deferred (tracked): private third-party host (uscllc) genericization.
Refs #542, closes #571
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
87 lines
3.7 KiB
Bash
Executable File
87 lines
3.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# verify-sanitized.sh — blocking CI gate: the public framework package must
|
|
# contain no operator-specific personal data or private executable defaults.
|
|
#
|
|
# Two rule classes:
|
|
# 1. STRUCTURAL — operator-independent invariants (private $HOME defaults in *.sh).
|
|
# 2. DENYLIST — a LABELED, one-time regression guard for the CURRENT operator's
|
|
# identity tokens. This is NOT a general PII detector (a future
|
|
# operator's name can't be enumerated); the durable control is the
|
|
# L0 prose firewall + human review. This gate just stops *this*
|
|
# contamination from coming back.
|
|
#
|
|
# Scope: all of the framework package — *.md, *.sh, *.ps1, and the CLI scripts under
|
|
# tools/_scripts/ (which are extensionless). Excluded: examples/ (holds
|
|
# sanitized, placeholdered worked examples), node_modules/, and this gate file.
|
|
#
|
|
# NOTE on scope: private THIRD-PARTY host references (e.g. a maintainer's employer
|
|
# Gitea) are intentionally NOT in this denylist — they are functionally entangled in
|
|
# host-routing + test fixtures and are tracked as a separate follow-up.
|
|
#
|
|
# Self-tests run first: plant known tokens and assert the scan catches them, so a
|
|
# broken regex cannot silently no-op the gate.
|
|
#
|
|
# Usage: verify-sanitized.sh [FRAMEWORK_ROOT]
|
|
set -uo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
FRAMEWORK_ROOT="${1:-$(cd "$SCRIPT_DIR/../../.." && pwd)}"
|
|
SELF_REL="tools/quality/scripts/verify-sanitized.sh"
|
|
|
|
# Labeled current-contaminant denylist. Anchored so substrings like "comparison" or
|
|
# "jsonwebtoken" do not match. (jarvis-brain is caught by 'jarvis'.)
|
|
DENYLIST='jarvis|jason|woltje|brain\.woltje\.com|/home/jwoltje|\bPDA\b'
|
|
|
|
# Structural: a private $HOME path used as a shell default (e.g. ${VAR:-$HOME/src/...}).
|
|
STRUCTURAL_SH=':[-=]\$\{?HOME\}?/src/'
|
|
|
|
# Build the in-scope file list once (NUL-delimited).
|
|
_scope_files() {
|
|
find "$FRAMEWORK_ROOT" -type f \
|
|
\( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -path '*/tools/_scripts/*' \) \
|
|
-not -path '*/examples/*' \
|
|
-not -path '*/node_modules/*' \
|
|
-not -path "*/$SELF_REL" \
|
|
-print0
|
|
}
|
|
|
|
fail=0
|
|
cd "$FRAMEWORK_ROOT" || { echo "FRAMEWORK_ROOT not found: $FRAMEWORK_ROOT" >&2; exit 3; }
|
|
|
|
deny_hits="$(_scope_files | xargs -0 -r grep -nIEi "$DENYLIST" 2>/dev/null || true)"
|
|
if [[ -n "$deny_hits" ]]; then
|
|
echo "✗ [denylist] operator-identity tokens in shipped files:"
|
|
echo "$deny_hits" | sed "s#$FRAMEWORK_ROOT/##; s/^/ /"
|
|
fail=1
|
|
fi
|
|
|
|
struct_hits="$(_scope_files | xargs -0 -r grep -nIE "$STRUCTURAL_SH" 2>/dev/null \
|
|
| grep -E '\.sh:|/tools/_scripts/' || true)"
|
|
if [[ -n "$struct_hits" ]]; then
|
|
echo "✗ [structural] private \$HOME/src default in a shipped script:"
|
|
echo "$struct_hits" | sed "s#$FRAMEWORK_ROOT/##; s/^/ /"
|
|
fail=1
|
|
fi
|
|
|
|
# ---- self-test: the gate must catch planted tokens ----
|
|
_selftest() {
|
|
local tmp; tmp="$(mktemp -d)" || return 1
|
|
printf 'contact jason.woltje at jarvis-brain (PDA note)\n' > "$tmp/planted.md"
|
|
printf 'X="${VAR:-$HOME/src/whatever/x.json}"\n' > "$tmp/planted.sh"
|
|
local ok=0
|
|
grep -qIEi "$DENYLIST" "$tmp/planted.md" || { echo "✗ SELF-TEST: denylist regex broken" >&2; ok=1; }
|
|
grep -qIE "$STRUCTURAL_SH" "$tmp/planted.sh" || { echo "✗ SELF-TEST: structural regex broken" >&2; ok=1; }
|
|
rm -rf "$tmp"
|
|
return $ok
|
|
}
|
|
_selftest || exit 2
|
|
|
|
if [[ "$fail" -ne 0 ]]; then
|
|
echo
|
|
echo "Sanitization gate FAILED. Public framework files must not contain operator identity" >&2
|
|
echo "or private \$HOME defaults. Move personal content to init-generated files or examples/." >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "✓ sanitization gate passed (framework *.md/*.sh/*.ps1/_scripts; examples/ excluded)"
|