#!/usr/bin/env bash # verify-sanitized.sh — blocking CI gate: the public framework package must # contain no operator-specific personal data or private executable defaults. # # Two rule classes: # 1. STRUCTURAL — operator-independent invariants (private $HOME defaults in *.sh). # 2. DENYLIST — a LABELED, one-time regression guard for the CURRENT operator's # identity tokens. This is NOT a general PII detector (a future # operator's name can't be enumerated); the durable control is the # L0 prose firewall + human review. This gate just stops *this* # contamination from coming back. # # Scope: all of the framework package — *.md, *.sh, *.ps1, and the CLI scripts under # tools/_scripts/ (which are extensionless). Excluded: examples/ (holds # sanitized, placeholdered worked examples), node_modules/, and this gate file. # # NOTE on scope: private THIRD-PARTY host references (e.g. a maintainer's employer # Gitea) are intentionally NOT in this denylist — they are functionally entangled in # host-routing + test fixtures and are tracked as a separate follow-up. # # Self-tests run first: plant known tokens and assert the scan catches them, so a # broken regex cannot silently no-op the gate. # # Usage: verify-sanitized.sh [FRAMEWORK_ROOT] set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FRAMEWORK_ROOT="${1:-$(cd "$SCRIPT_DIR/../../.." && pwd)}" SELF_REL="tools/quality/scripts/verify-sanitized.sh" # Labeled current-contaminant denylist. Anchored so substrings like "comparison" or # "jsonwebtoken" do not match. (jarvis-brain is caught by 'jarvis'.) DENYLIST='jarvis|jason|woltje|brain\.woltje\.com|/home/jwoltje|\bPDA\b' # Structural: a private $HOME path used as a shell default (e.g. ${VAR:-$HOME/src/...}). STRUCTURAL_SH=':[-=]\$\{?HOME\}?/src/' # Build the in-scope file list once (NUL-delimited). _scope_files() { find "$FRAMEWORK_ROOT" -type f \ \( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -path '*/tools/_scripts/*' \) \ -not -path '*/examples/*' \ -not -path '*/node_modules/*' \ -not -path "*/$SELF_REL" \ -print0 } fail=0 cd "$FRAMEWORK_ROOT" || { echo "FRAMEWORK_ROOT not found: $FRAMEWORK_ROOT" >&2; exit 3; } deny_hits="$(_scope_files | xargs -0 -r grep -nIEi "$DENYLIST" 2>/dev/null || true)" if [[ -n "$deny_hits" ]]; then echo "✗ [denylist] operator-identity tokens in shipped files:" echo "$deny_hits" | sed "s#$FRAMEWORK_ROOT/##; s/^/ /" fail=1 fi struct_hits="$(_scope_files | xargs -0 -r grep -nIE "$STRUCTURAL_SH" 2>/dev/null \ | grep -E '\.sh:|/tools/_scripts/' || true)" if [[ -n "$struct_hits" ]]; then echo "✗ [structural] private \$HOME/src default in a shipped script:" echo "$struct_hits" | sed "s#$FRAMEWORK_ROOT/##; s/^/ /" fail=1 fi # ---- self-test: the gate must catch planted tokens ---- _selftest() { local tmp; tmp="$(mktemp -d)" || return 1 printf 'contact jason.woltje at jarvis-brain (PDA note)\n' > "$tmp/planted.md" printf 'X="${VAR:-$HOME/src/whatever/x.json}"\n' > "$tmp/planted.sh" local ok=0 grep -qIEi "$DENYLIST" "$tmp/planted.md" || { echo "✗ SELF-TEST: denylist regex broken" >&2; ok=1; } grep -qIE "$STRUCTURAL_SH" "$tmp/planted.sh" || { echo "✗ SELF-TEST: structural regex broken" >&2; ok=1; } rm -rf "$tmp" return $ok } _selftest || exit 2 if [[ "$fail" -ne 0 ]]; then echo echo "Sanitization gate FAILED. Public framework files must not contain operator identity" >&2 echo "or private \$HOME defaults. Move personal content to init-generated files or examples/." >&2 exit 1 fi echo "✓ sanitization gate passed (framework *.md/*.sh/*.ps1/_scripts; examples/ excluded)"