fix(framework/tools): wrapper hardening — TLS validation, cred-path fallback, no-CI fast-exit (#551)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful

This commit was merged in pull request #551.
This commit is contained in:
2026-06-20 10:16:38 +00:00
parent 87f561c1f8
commit 57919c38d8
12 changed files with 434 additions and 9 deletions

View File

@@ -169,6 +169,43 @@ raise SystemExit(1)
PY
}
# Emit an actionable diagnostic to stderr when no tea login resolves for a host.
# Callers that have a working API fallback may ignore the non-zero return of
# get_gitea_login_for_host; this turns the previously SILENT failure into a loud,
# greppable hint (available logins + override + add-login instructions). Printed to
# stderr only, so it never contaminates stdout (the resolved login name) or log
# assertions that capture tea/curl invocations.
print_gitea_login_diagnostic() {
local host="${1:-<unknown>}"
local available
available=$(
command -v tea >/dev/null 2>&1 || { echo "(tea CLI not installed)"; exit 0; }
logins_json=$(tea login list --output json 2>/dev/null) || { echo "(could not query tea login list)"; exit 0; }
TEA_LOGINS_JSON="$logins_json" python3 - <<'PY'
import json, os
from urllib.parse import urlparse
try:
logins = json.loads(os.environ.get("TEA_LOGINS_JSON", "[]"))
except Exception:
logins = []
rows = []
for login in logins if isinstance(logins, list) else []:
name = str(login.get("name") or login.get("Name") or "")
url = str(login.get("url") or login.get("URL") or "")
host = urlparse(url).hostname or "?"
if name:
rows.append(f"{name} (host: {host})")
print("; ".join(rows) if rows else "(none configured)")
PY
)
{
echo "Error: no Gitea tea login matches host '$host'."
echo " Available tea logins: ${available}"
echo " Fix: set GITEA_LOGIN to a login whose URL host is '$host',"
echo " or add one: tea login add --name <name> --url https://$host --token <token>"
} >&2
}
get_gitea_login_for_host() {
local host="${1:-}"
local login
@@ -190,6 +227,7 @@ get_gitea_login_for_host() {
return 0
fi
print_gitea_login_diagnostic "$host"
return 1
}

View File

@@ -53,7 +53,15 @@ if [[ "$PLATFORM" == "github" ]]; then
gh issue comment "$ISSUE_NUMBER" --body "$COMMENT"
echo "Added comment to GitHub issue #$ISSUE_NUMBER"
elif [[ "$PLATFORM" == "gitea" ]]; then
tea issue comment "$ISSUE_NUMBER" "$COMMENT" $(get_gitea_repo_args)
# Build the invocation as an argv array (not unquoted $(get_gitea_repo_args)
# word-splitting) so the comment body — including Markdown backticks, $(...),
# and quotes — is passed verbatim and never re-split or shell-evaluated.
REPO_SLUG=$(get_repo_slug)
GITEA_LOGIN_NAME=$(get_gitea_login) || {
echo "Error: could not resolve a Gitea login for this repo; cannot comment on issue #$ISSUE_NUMBER." >&2
exit 1
}
tea issue comment "$ISSUE_NUMBER" "$COMMENT" --repo "$REPO_SLUG" --login "$GITEA_LOGIN_NAME"
echo "Added comment to Gitea issue #$ISSUE_NUMBER"
else
echo "Error: Unknown platform"

View File

@@ -72,6 +72,11 @@ elif values and all(v == "success" for v in values):
print("success")
elif any(v in {"pending", "running", "queued", "waiting"} for v in values):
print("pending")
elif not values and not state:
# No pipeline/status of any kind reported for this commit. Distinct from
# "unknown" (an ambiguous/unrecognized status that should keep polling):
# this signals a repo/commit that simply has no CI configured.
print("no-status")
else:
print("unknown")
PY
@@ -142,6 +147,21 @@ gitea_get_commit_status_json() {
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url"
}
gitea_get_default_branch() {
local host="$1"
local repo="$2"
local token="$3"
local url="https://${host}/api/v1/repos/${repo}"
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url" | python3 -c '
import json, sys
print((json.load(sys.stdin) or {}).get("default_branch", ""))
'
}
github_get_default_branch() {
gh api "repos/${OWNER}/${REPO}" --jq '.default_branch'
}
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--number)
@@ -245,6 +265,51 @@ else
exit 1
fi
# No-CI determination is TWO-TIER (primary: CI history; secondary: empty-poll streak).
#
# PRIMARY — "does this repo run CI at all?" Probed once, up front, from the DEFAULT
# BRANCH's commit status. A repo whose default branch carries CI statuses
# demonstrably runs CI, so an EMPTY status on the PR head means the pipeline simply
# has not registered YET (webhook/queue lag) — NOT that the repo is CI-less. In that
# case we must NEVER fast-green; we keep polling until the pipeline registers or the
# timeout fires (both safe). This closes the webhook-lag false-green: a slow-to-
# register pipeline feeding a merge gate can no longer be mistaken for "no CI".
#
# SECONDARY — the empty-poll streak below applies ONLY to genuinely CI-less repos
# (default branch also has no CI history, e.g. device-imaging class), where burning
# the full timeout would be pure waste. There, NO_CI_MAX empty polls => fast-exit 0.
#
# Probe failure is treated conservatively as REPO_HAS_CI=1 (assume CI present): we
# would rather wait-then-timeout than risk a false-green, per the merge-gate priority.
REPO_HAS_CI=1
detect_repo_ci() {
local def_branch def_status
# Every early exit returns 0: a probe miss must leave the conservative
# REPO_HAS_CI=1 default in place, never abort the caller under `set -e`.
if [[ "$PLATFORM" == "github" ]]; then
def_branch=$(github_get_default_branch 2>/dev/null) || {
echo "[pr-ci-wait] WARN: default-branch probe failed; assuming CI-enabled (will not fast-green on empty status)."; return 0; }
[[ -n "$def_branch" ]] || return 0
def_status=$(github_get_commit_status_json "$OWNER" "$REPO" "$def_branch" 2>/dev/null | extract_state_from_status_json) || return 0
else
def_branch=$(gitea_get_default_branch "$HOST" "$OWNER/$REPO" "$TOKEN" 2>/dev/null) || {
echo "[pr-ci-wait] WARN: default-branch probe failed; assuming CI-enabled (will not fast-green on empty status)."; return 0; }
[[ -n "$def_branch" ]] || return 0
def_status=$(gitea_get_commit_status_json "$HOST" "$OWNER/$REPO" "$TOKEN" "$def_branch" 2>/dev/null | extract_state_from_status_json) || return 0
fi
if [[ "$def_status" == "no-status" || -z "$def_status" ]]; then
REPO_HAS_CI=0
echo "[pr-ci-wait] default branch '${def_branch}' has no CI status history — treating repo as CI-less (empty-poll fast-exit enabled)."
else
REPO_HAS_CI=1
echo "[pr-ci-wait] default branch '${def_branch}' has CI history (state=${def_status}) — repo runs CI; empty status on PR head => awaiting registration, will not fast-green."
fi
}
detect_repo_ci || true
NO_CI_STREAK=0
NO_CI_MAX=3
while true; do
NOW_TS=$(date +%s)
if (( NOW_TS > DEADLINE_TS )); then
@@ -272,11 +337,35 @@ while true; do
echo "Error: CI reported ${STATE} for PR #$PR_NUMBER." >&2
exit 1
;;
no-status)
if [[ "$REPO_HAS_CI" == "1" ]]; then
# PRIMARY tier: repo demonstrably runs CI but this commit's pipeline
# has not registered yet (webhook/queue lag). Do NOT fast-green — keep
# polling until it registers or the timeout fires. Reset the streak so
# a later genuine CI-less misread can't accumulate across this state.
NO_CI_STREAK=0
echo "[pr-ci-wait] empty status on PR head but repo runs CI — awaiting pipeline registration (webhook lag), not fast-greening."
else
# SECONDARY tier: genuinely CI-less repo (default branch has no CI
# history either). Empty polls => fast-exit green after NO_CI_MAX.
NO_CI_STREAK=$((NO_CI_STREAK + 1))
if (( NO_CI_STREAK >= NO_CI_MAX )); then
echo "[INFO] no CI configured for this repo/commit (PR #$PR_NUMBER, ${NO_CI_STREAK} consecutive empty polls, default branch also CI-less); treating as green."
exit 0
fi
fi
sleep "$INTERVAL_SEC"
;;
pending|unknown)
# A pipeline exists but hasn't reached a terminal state (or is
# transiently ambiguous) — keep waiting, and reset the no-CI streak
# since this commit is not in the "no CI at all" condition.
NO_CI_STREAK=0
sleep "$INTERVAL_SEC"
;;
*)
echo "[pr-ci-wait] Unrecognized state '${STATE}', continuing to poll..."
NO_CI_STREAK=0
sleep "$INTERVAL_SEC"
;;
esac

View File

@@ -230,4 +230,81 @@ if grep -q -- 'tea issue close 536 .*--login mosaicstack' "$LOG_FILE"; then
exit 1
fi
# ---------------------------------------------------------------------------
# #560: loud diagnostic + host-derived login for BOTH instances + override-wins
# ---------------------------------------------------------------------------
# Loud diagnostic: a host with no matching tea login must emit an actionable
# error to stderr (the previous behavior was a SILENT failure). The original
# mock defines only usc/evil-usc logins, so mosaicstack resolution fails here.
git -C "$REPO_DIR" remote set-url origin https://git.mosaicstack.dev/mosaicstack/stack.git
diag_stderr=$(run_in_repo bash -c '
source "'"$SCRIPT_DIR"'/detect-platform.sh"
get_gitea_login_for_host git.mosaicstack.dev
' 2>&1 1>/dev/null || true)
if ! grep -q "no Gitea tea login matches host 'git.mosaicstack.dev'" <<<"$diag_stderr"; then
echo "Expected loud diagnostic naming the unresolved host; got: $diag_stderr" >&2
exit 1
fi
if ! grep -q "Available tea logins:" <<<"$diag_stderr"; then
echo "Expected diagnostic to list available tea logins; got: $diag_stderr" >&2
exit 1
fi
# Both-instance host derivation + override-wins, using a mock that DOES define a
# mosaicstack login. Scoped to this section so the API-fallback assertions above
# (which rely on mosaicstack having NO tea login) remain valid.
BIN_DIR2="$WORK_DIR/bin2"
mkdir -p "$BIN_DIR2"
cp "$BIN_DIR/curl" "$BIN_DIR2/curl"
cat > "$BIN_DIR2/tea" <<'SH'
#!/usr/bin/env bash
set -euo pipefail
if [[ "$*" == "login list --output json" ]]; then
cat <<'JSON'
[
{"name":"mosaicstack","url":"https://git.mosaicstack.dev","user":"jason.woltje"},
{"name":"usc","url":"https://git.uscllc.com","user":"jason.woltje"}
]
JSON
exit 0
fi
printf 'tea %s\n' "$*" >> "$MOSAIC_TEST_LOG"
exit 0
SH
chmod +x "$BIN_DIR2/tea"
run_in_repo2() {
(
cd "$REPO_DIR"
PATH="$BIN_DIR2:$PATH" \
MOSAIC_CREDENTIALS_FILE="$CREDENTIALS_FILE" \
MOSAIC_TEST_LOG="$LOG_FILE" \
"$@"
)
}
git -C "$REPO_DIR" remote set-url origin https://git.mosaicstack.dev/mosaicstack/stack.git
mosaic_login=$(run_in_repo2 bash -c 'source "'"$SCRIPT_DIR"'/detect-platform.sh"; get_gitea_login')
if [[ "$mosaic_login" != "mosaicstack" ]]; then
echo "Expected mosaicstack origin to derive login 'mosaicstack'; got '$mosaic_login'" >&2
exit 1
fi
git -C "$REPO_DIR" remote set-url origin https://git.uscllc.com/USC/uconnect.git
usc_login_derived=$(run_in_repo2 bash -c 'source "'"$SCRIPT_DIR"'/detect-platform.sh"; get_gitea_login')
if [[ "$usc_login_derived" != "usc" ]]; then
echo "Expected usc origin to derive login 'usc'; got '$usc_login_derived'" >&2
exit 1
fi
# Explicit GITEA_LOGIN override is honored when it matches the host.
git -C "$REPO_DIR" remote set-url origin https://git.mosaicstack.dev/mosaicstack/stack.git
override_wins=$(run_in_repo2 bash -c 'export GITEA_LOGIN=mosaicstack; source "'"$SCRIPT_DIR"'/detect-platform.sh"; get_gitea_login')
if [[ "$override_wins" != "mosaicstack" ]]; then
echo "Expected valid GITEA_LOGIN override to win on mosaicstack host; got '$override_wins'" >&2
exit 1
fi
git -C "$REPO_DIR" remote set-url origin https://git.uscllc.com/USC/uconnect.git
echo "Gitea login resolution regression harness passed"

View File

@@ -0,0 +1,102 @@
#!/usr/bin/env bash
# Regression harness for issue-create.sh Markdown-body safety (#559).
#
# Guards against reintroduction of eval-based command construction. The wrapper
# builds its tea/gh invocation as an argv array, so a body containing command
# substitution ($(...)), backticks, quotes, and dollar signs MUST reach tea
# verbatim and MUST NOT be shell-evaluated. This test asserts both:
# 1. No command-substitution side effect (an injected `touch SENTINEL` never runs).
# 2. The --description value tea receives is byte-for-byte the original body.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WORK_DIR="${MOSAIC_TEST_WORK_DIR:-$PWD/.mosaic-test-work/issue-create-body-safety}"
REPO_DIR="$WORK_DIR/repo"
BIN_DIR="$WORK_DIR/bin"
SENTINEL="$WORK_DIR/INJECTION_SENTINEL"
BODY_FILE="$WORK_DIR/body.txt"
RECEIVED_FILE="$WORK_DIR/received-description.txt"
rm -rf "$WORK_DIR"
mkdir -p "$REPO_DIR" "$BIN_DIR"
git -C "$REPO_DIR" init -q
git -C "$REPO_DIR" remote add origin https://git.mosaicstack.dev/mosaicstack/stack.git
# Hostile Markdown body. The unquoted heredoc expands $SENTINEL (a real path we
# want embedded) but every shell metacharacter we care about is backslash-escaped
# so the TEST shell writes them literally into the file — the bytes the wrapper
# must then preserve.
cat > "$BODY_FILE" <<EOF
# Release notes
Inline code: \`rm -rf /\` must stay literal.
Command sub attempt: \$(touch $SENTINEL)
Backtick cmd attempt: \`touch $SENTINEL\`
Dollars: \$HOME \${PATH} \$5.00 and 100% done
Quotes: "double" and 'single' and \`mixed\`
Trailing pipe-ish: foo | bar && baz ; qux
EOF
BODY="$(cat "$BODY_FILE")"
# Mock tea: resolve a mosaicstack login, then capture the --description verbatim.
cat > "$BIN_DIR/tea" <<'SH'
#!/usr/bin/env bash
set -euo pipefail
if [[ "$*" == "login list --output json" ]]; then
cat <<'JSON'
[
{"name":"mosaicstack","url":"https://git.mosaicstack.dev","user":"jason.woltje"}
]
JSON
exit 0
fi
if [[ "${1:-}" == "issue" && "${2:-}" == "create" ]]; then
desc=""
while [[ $# -gt 0 ]]; do
case "$1" in
--description) desc="$2"; shift 2 ;;
*) shift ;;
esac
done
printf '%s' "$desc" > "$MOSAIC_TEST_RECEIVED"
echo "#1 created"
exit 0
fi
exit 0
SH
chmod +x "$BIN_DIR/tea"
(
cd "$REPO_DIR"
PATH="$BIN_DIR:$PATH" \
MOSAIC_TEST_RECEIVED="$RECEIVED_FILE" \
"$SCRIPT_DIR/issue-create.sh" -t "Body safety test" -b "$BODY"
) >/dev/null
# 1. No command substitution executed anywhere in the pipeline.
if [[ -e "$SENTINEL" ]]; then
echo "FAIL: injected command substitution executed (sentinel file created): $SENTINEL" >&2
exit 1
fi
# 2. tea actually received the body (issue create path taken, not silently dropped).
if [[ ! -f "$RECEIVED_FILE" ]]; then
echo "FAIL: tea issue create was never invoked with a --description" >&2
exit 1
fi
# 3. The description tea received is byte-for-byte the original body.
if [[ "$(cat "$RECEIVED_FILE")" != "$BODY" ]]; then
echo "FAIL: body was not preserved verbatim through issue-create.sh" >&2
echo "--- expected ---" >&2; printf '%s\n' "$BODY" >&2
echo "--- received ---" >&2; cat "$RECEIVED_FILE" >&2
exit 1
fi
echo "issue-create.sh Markdown body-safety regression harness passed"