Non-blocking items from the #575 dual-engine review: - CONSTITUTION.md: state explicitly there is NO CONSTITUTION.local.md and hard gates are not locally overridable (clarity vs LAYER-MODEL overlay-eligibility) - verify-sanitized.sh: expand identity scan to *.yml/*.yaml/*.toml/*.env/*.service (operator data could hide in shipped configs) — gate green, no new hits - AGENTS.md: clarify the intentional bare-launch stop-if-missing strictness vs the launcher's readOptional tolerance (which keeps pre-upgrade hosts working) Refs #542, closes #576 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
86 lines
4.1 KiB
Bash
Executable File
86 lines
4.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# verify-sanitized.sh — blocking CI gate: the public framework package must
|
|
# contain no operator-specific personal data or private executable defaults.
|
|
#
|
|
# Two rule classes, with DELIBERATELY DIFFERENT scopes:
|
|
# 1. DENYLIST (identity) — a LABELED, one-time regression guard for the CURRENT
|
|
# operator's identity tokens. Scanned EVERYWHERE including examples/, because a
|
|
# jarvis/jason/private-home regression in a SHIPPED example would break the
|
|
# open-source guarantee just as badly as one in a default. NOT a general PII
|
|
# detector (a future operator's name can't be enumerated) — the durable control
|
|
# is the L0 framework-PR firewall + human review; this just stops re-contamination.
|
|
# 2. STRUCTURAL (private $HOME default in *.sh) — scanned everywhere EXCEPT examples/,
|
|
# because worked example overlays/personas legitimately show placeholder paths.
|
|
#
|
|
# File types: *.md, *.sh, *.ps1, *.json, *.yml/*.yaml, *.toml, *.env, *.service, and the CLI scripts under
|
|
# tools/_scripts/. Excludes node_modules/ and this gate file.
|
|
#
|
|
# NOTE: '\bPDA\b' intentionally matches "PDA-friendly" (the contamination removed in P2);
|
|
# a hyphen is not a \b word boundary on the right, so "PDA-foo" matches. If a future
|
|
# legitimate doc needs the literal token "PDA" in a non-personal sense, reword it or
|
|
# narrow this rule — do not weaken the gate silently.
|
|
#
|
|
# NOTE: private THIRD-PARTY host refs (e.g. a maintainer's employer Gitea) are NOT in
|
|
# this denylist — they are functionally entangled in host-routing + test fixtures and
|
|
# tracked as a separate follow-up.
|
|
#
|
|
# Usage: verify-sanitized.sh [FRAMEWORK_ROOT]
|
|
set -uo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
FRAMEWORK_ROOT="${1:-$(cd "$SCRIPT_DIR/../../.." && pwd)}"
|
|
SELF_REL="tools/quality/scripts/verify-sanitized.sh"
|
|
|
|
DENYLIST='jarvis|jason|woltje|brain\.woltje\.com|/home/jwoltje|\bPDA\b'
|
|
STRUCTURAL_SH=':[-=]\$\{?HOME\}?/src/'
|
|
|
|
cd "$FRAMEWORK_ROOT" || { echo "FRAMEWORK_ROOT not found: $FRAMEWORK_ROOT" >&2; exit 3; }
|
|
|
|
# Identity scope = ALL shipped text files (examples/ INCLUDED).
|
|
_files_identity() {
|
|
find . -type f \
|
|
\( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -name '*.json' -o -name '*.yml' -o -name '*.yaml' -o -name '*.toml' -o -name '*.env' -o -name '*.service' -o -path '*/tools/_scripts/*' \) \
|
|
-not -path '*/node_modules/*' -not -path "./$SELF_REL" -print0
|
|
}
|
|
# Structural scope = shipped scripts, examples/ EXCLUDED.
|
|
_files_structural() {
|
|
find . -type f \( -name '*.sh' -o -path '*/tools/_scripts/*' \) \
|
|
-not -path '*/examples/*' -not -path '*/node_modules/*' -not -path "./$SELF_REL" -print0
|
|
}
|
|
|
|
# ---- self-test FIRST: a broken regex must never silently no-op the gate ----
|
|
_selftest() {
|
|
local tmp; tmp="$(mktemp -d)" || return 1
|
|
printf 'contact jason.woltje at jarvis-brain (PDA-friendly)\n' > "$tmp/planted.md"
|
|
printf 'X="${VAR:-$HOME/src/whatever/x.json}"\n' > "$tmp/planted.sh"
|
|
local rc=0
|
|
grep -qIEi "$DENYLIST" "$tmp/planted.md" || { echo "✗ SELF-TEST: identity denylist regex broken" >&2; rc=1; }
|
|
grep -qIE "$STRUCTURAL_SH" "$tmp/planted.sh" || { echo "✗ SELF-TEST: structural regex broken" >&2; rc=1; }
|
|
rm -rf "$tmp"; return $rc
|
|
}
|
|
_selftest || exit 2
|
|
|
|
fail=0
|
|
deny_hits="$(_files_identity | xargs -0 -r grep -nIEi "$DENYLIST" 2>/dev/null || true)"
|
|
if [[ -n "$deny_hits" ]]; then
|
|
echo "✗ [denylist] operator-identity tokens in shipped files (examples/ included):"
|
|
echo "$deny_hits" | sed "s#^\./##; s/^/ /"
|
|
fail=1
|
|
fi
|
|
|
|
struct_hits="$(_files_structural | xargs -0 -r grep -nIE "$STRUCTURAL_SH" 2>/dev/null || true)"
|
|
if [[ -n "$struct_hits" ]]; then
|
|
echo "✗ [structural] private \$HOME/src default in a shipped script:"
|
|
echo "$struct_hits" | sed "s#^\./##; s/^/ /"
|
|
fail=1
|
|
fi
|
|
|
|
if [[ "$fail" -ne 0 ]]; then
|
|
echo
|
|
echo "Sanitization gate FAILED. Public framework files must not contain operator identity" >&2
|
|
echo "or private \$HOME defaults. Move personal content to init-generated files or genericize." >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "✓ sanitization gate passed (identity scan incl. examples/; structural scan excl. examples/)"
|