Compare commits

...

2 Commits

Author SHA1 Message Date
Hermes Agent
9e8a9cfa8d fix(pr-ci-wait): CI-history primary tier — close webhook-lag false-green (#550)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
F-06 follow-up per Mos ruling. The no-CI fast-exit was a pure empty-poll streak
(NO_CI_MAX×interval ≈ 45s), so a slow-to-register pipeline (webhook/queue lag)
looked like 'no CI' and could false-green a merge gate before the pipeline existed.

Two-tier no-CI determination:
- PRIMARY: probe the repo's DEFAULT BRANCH commit status once at startup. If it
  has CI history, the repo runs CI → an empty status on the PR head means the
  pipeline has not REGISTERED yet → never fast-green; poll until it registers or
  timeout (both safe). Closes the webhook-lag false-green.
- SECONDARY: the empty-poll streak fast-exit now applies ONLY to genuinely CI-less
  repos (default branch also has no CI history). Preserves the original no-CI win.
- Probe failure → conservative REPO_HAS_CI=1 (assume CI; wait-then-timeout beats
  false-green). All early returns are explicit 'return 0' + guarded call so the
  probe can never abort under set -e.

Verified: bash -n + shellcheck clean; behavioral harness covers established-repo
(stays 1), CI-less (→0), empty-branch/probe-fail (conservative 1), and the
no-status gate (has-CI never fast-greens, CI-less fast-exits).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Kt2D8TsnDwhtzEAPijsNmR
2026-06-18 14:18:32 -05:00
Hermes Agent
b90aec2024 fix(framework/tools): wrapper hardening — TLS validation, cred-path fallback, no-CI fast-exit (#550)
Some checks failed
ci/woodpecker/push/ci Pipeline was canceled
ci/woodpecker/pr/ci Pipeline was canceled
F-03: validate TLS by default. New _mosaic_tls_opt helper in _lib/credentials.sh
returns -k only for private-network IP literals (trusted LAN) or an explicit
MOSAIC_INSECURE_TLS opt-in; generic mosaic_http/_post/_patch helpers now use
`curl -sS $_tls` instead of `curl -sk`. Woodpecker scripts (_lib.sh,
pipeline-status/list/trigger.sh) talk only to the two public/valid CI hosts, so
`-sk` is changed to `-sS` (straight -k removal, no helper).

F-02: credentials.sh resolves MOSAIC_CREDENTIALS_FILE via a fallback chain —
env first, then ~/.config/mosaic/credentials.json, then the legacy
~/src/jarvis-brain/credentials.json retained as final fallback so the running
fleet keeps working.

F-06: pr-ci-wait.sh distinguishes a genuine no-CI condition (empty state AND no
statuses) as a new `no-status` state and fast-exits 0 after 3 consecutive empty
polls with a clear "no CI configured" message. Repos that DO have pipelines are
unaffected — any pipeline signal resets the streak and pending still waits.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Kt2D8TsnDwhtzEAPijsNmR
2026-06-18 14:02:43 -05:00
6 changed files with 118 additions and 8 deletions

View File

@@ -16,7 +16,12 @@
# After loading, service-specific env vars are exported.
# Run `load_credentials --help` for details.
MOSAIC_CREDENTIALS_FILE="${MOSAIC_CREDENTIALS_FILE:-$HOME/src/jarvis-brain/credentials.json}"
if [[ -z "${MOSAIC_CREDENTIALS_FILE:-}" ]]; then
for _cand in "$HOME/.config/mosaic/credentials.json" "$HOME/src/jarvis-brain/credentials.json"; do
if [[ -f "$_cand" ]]; then MOSAIC_CREDENTIALS_FILE="$_cand"; break; fi
done
: "${MOSAIC_CREDENTIALS_FILE:=$HOME/src/jarvis-brain/credentials.json}"
fi
_mosaic_require_jq() {
if ! command -v jq &>/dev/null; then
@@ -34,6 +39,19 @@ _mosaic_read_cred() {
jq -r "$jq_path // empty" "$MOSAIC_CREDENTIALS_FILE"
}
# Decide curl TLS flag for a target URL: validate public hosts (MITM matters on
# WAN); allow self-signed only for private-network IP literals (trusted LAN) or an
# explicit $MOSAIC_INSECURE_TLS opt-in. Echoes "-k" or "" (empty).
_mosaic_tls_opt() {
local url="$1" host
[[ -n "${MOSAIC_INSECURE_TLS:-}" ]] && { echo "-k"; return; }
host=$(printf '%s' "$url" | sed -E 's#^[a-zA-Z]+://([^/:]+).*#\1#')
if [[ "$host" =~ ^(10\.|127\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.) ]]; then
echo "-k"; return
fi
echo ""
}
# Sync Woodpecker credentials to ~/.woodpecker/<instance>.env
# Only writes when values differ to avoid unnecessary disk writes.
_mosaic_sync_woodpecker_env() {
@@ -261,7 +279,8 @@ mosaic_http() {
local base_url="${4:-}"
local response
response=$(curl -sk -w "\n%{http_code}" -X "$method" \
local _tls; _tls=$(_mosaic_tls_opt "${base_url}${endpoint}")
response=$(curl -sS $_tls -w "\n%{http_code}" -X "$method" \
-H "$auth_header" \
-H "Content-Type: application/json" \
"${base_url}${endpoint}")
@@ -279,7 +298,8 @@ mosaic_http_post() {
local base_url="${4:-}"
local response
response=$(curl -sk -w "\n%{http_code}" -X POST \
local _tls; _tls=$(_mosaic_tls_opt "${base_url}${endpoint}")
response=$(curl -sS $_tls -w "\n%{http_code}" -X POST \
-H "$auth_header" \
-H "Content-Type: application/json" \
-d "$data" \
@@ -297,7 +317,8 @@ mosaic_http_patch() {
local base_url="${4:-}"
local response
response=$(curl -sk -w "\n%{http_code}" -X PATCH \
local _tls; _tls=$(_mosaic_tls_opt "${base_url}${endpoint}")
response=$(curl -sS $_tls -w "\n%{http_code}" -X PATCH \
-H "$auth_header" \
-H "Content-Type: application/json" \
-d "$data" \

View File

@@ -72,6 +72,11 @@ elif values and all(v == "success" for v in values):
print("success")
elif any(v in {"pending", "running", "queued", "waiting"} for v in values):
print("pending")
elif not values and not state:
# No pipeline/status of any kind reported for this commit. Distinct from
# "unknown" (an ambiguous/unrecognized status that should keep polling):
# this signals a repo/commit that simply has no CI configured.
print("no-status")
else:
print("unknown")
PY
@@ -142,6 +147,21 @@ gitea_get_commit_status_json() {
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url"
}
gitea_get_default_branch() {
local host="$1"
local repo="$2"
local token="$3"
local url="https://${host}/api/v1/repos/${repo}"
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url" | python3 -c '
import json, sys
print((json.load(sys.stdin) or {}).get("default_branch", ""))
'
}
github_get_default_branch() {
gh api "repos/${OWNER}/${REPO}" --jq '.default_branch'
}
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--number)
@@ -245,6 +265,51 @@ else
exit 1
fi
# No-CI determination is TWO-TIER (primary: CI history; secondary: empty-poll streak).
#
# PRIMARY — "does this repo run CI at all?" Probed once, up front, from the DEFAULT
# BRANCH's commit status. A repo whose default branch carries CI statuses
# demonstrably runs CI, so an EMPTY status on the PR head means the pipeline simply
# has not registered YET (webhook/queue lag) — NOT that the repo is CI-less. In that
# case we must NEVER fast-green; we keep polling until the pipeline registers or the
# timeout fires (both safe). This closes the webhook-lag false-green: a slow-to-
# register pipeline feeding a merge gate can no longer be mistaken for "no CI".
#
# SECONDARY — the empty-poll streak below applies ONLY to genuinely CI-less repos
# (default branch also has no CI history, e.g. device-imaging class), where burning
# the full timeout would be pure waste. There, NO_CI_MAX empty polls => fast-exit 0.
#
# Probe failure is treated conservatively as REPO_HAS_CI=1 (assume CI present): we
# would rather wait-then-timeout than risk a false-green, per the merge-gate priority.
REPO_HAS_CI=1
detect_repo_ci() {
local def_branch def_status
# Every early exit returns 0: a probe miss must leave the conservative
# REPO_HAS_CI=1 default in place, never abort the caller under `set -e`.
if [[ "$PLATFORM" == "github" ]]; then
def_branch=$(github_get_default_branch 2>/dev/null) || {
echo "[pr-ci-wait] WARN: default-branch probe failed; assuming CI-enabled (will not fast-green on empty status)."; return 0; }
[[ -n "$def_branch" ]] || return 0
def_status=$(github_get_commit_status_json "$OWNER" "$REPO" "$def_branch" 2>/dev/null | extract_state_from_status_json) || return 0
else
def_branch=$(gitea_get_default_branch "$HOST" "$OWNER/$REPO" "$TOKEN" 2>/dev/null) || {
echo "[pr-ci-wait] WARN: default-branch probe failed; assuming CI-enabled (will not fast-green on empty status)."; return 0; }
[[ -n "$def_branch" ]] || return 0
def_status=$(gitea_get_commit_status_json "$HOST" "$OWNER/$REPO" "$TOKEN" "$def_branch" 2>/dev/null | extract_state_from_status_json) || return 0
fi
if [[ "$def_status" == "no-status" || -z "$def_status" ]]; then
REPO_HAS_CI=0
echo "[pr-ci-wait] default branch '${def_branch}' has no CI status history — treating repo as CI-less (empty-poll fast-exit enabled)."
else
REPO_HAS_CI=1
echo "[pr-ci-wait] default branch '${def_branch}' has CI history (state=${def_status}) — repo runs CI; empty status on PR head => awaiting registration, will not fast-green."
fi
}
detect_repo_ci || true
NO_CI_STREAK=0
NO_CI_MAX=3
while true; do
NOW_TS=$(date +%s)
if (( NOW_TS > DEADLINE_TS )); then
@@ -272,11 +337,35 @@ while true; do
echo "Error: CI reported ${STATE} for PR #$PR_NUMBER." >&2
exit 1
;;
no-status)
if [[ "$REPO_HAS_CI" == "1" ]]; then
# PRIMARY tier: repo demonstrably runs CI but this commit's pipeline
# has not registered yet (webhook/queue lag). Do NOT fast-green — keep
# polling until it registers or the timeout fires. Reset the streak so
# a later genuine CI-less misread can't accumulate across this state.
NO_CI_STREAK=0
echo "[pr-ci-wait] empty status on PR head but repo runs CI — awaiting pipeline registration (webhook lag), not fast-greening."
else
# SECONDARY tier: genuinely CI-less repo (default branch has no CI
# history either). Empty polls => fast-exit green after NO_CI_MAX.
NO_CI_STREAK=$((NO_CI_STREAK + 1))
if (( NO_CI_STREAK >= NO_CI_MAX )); then
echo "[INFO] no CI configured for this repo/commit (PR #$PR_NUMBER, ${NO_CI_STREAK} consecutive empty polls, default branch also CI-less); treating as green."
exit 0
fi
fi
sleep "$INTERVAL_SEC"
;;
pending|unknown)
# A pipeline exists but hasn't reached a terminal state (or is
# transiently ambiguous) — keep waiting, and reset the no-CI streak
# since this commit is not in the "no CI at all" condition.
NO_CI_STREAK=0
sleep "$INTERVAL_SEC"
;;
*)
echo "[pr-ci-wait] Unrecognized state '${STATE}', continuing to poll..."
NO_CI_STREAK=0
sleep "$INTERVAL_SEC"
;;
esac

View File

@@ -12,7 +12,7 @@ wp_resolve_repo_id() {
local full_name="$1"
local response http_code body repo_id
response=$(curl -sk -w "\n%{http_code}" \
response=$(curl -sS -w "\n%{http_code}" \
-H "Authorization: Bearer $WOODPECKER_TOKEN" \
"${WOODPECKER_URL}/api/repos/lookup/${full_name}")

View File

@@ -48,7 +48,7 @@ fi
# Resolve owner/repo to numeric ID (Woodpecker v3 API)
REPO_ID=$(wp_resolve_repo_id "$REPO") || exit 1
response=$(curl -sk -w "\n%{http_code}" \
response=$(curl -sS -w "\n%{http_code}" \
-H "Authorization: Bearer $WOODPECKER_TOKEN" \
"${WOODPECKER_URL}/api/repos/${REPO_ID}/pipelines?perPage=${LIMIT}")

View File

@@ -50,7 +50,7 @@ REPO_ID=$(wp_resolve_repo_id "$REPO") || exit 1
_wp_fetch() {
local ep="$1"
local resp http_code body
resp=$(curl -sk -w "\n%{http_code}" \
resp=$(curl -sS -w "\n%{http_code}" \
-H "Authorization: Bearer $WOODPECKER_TOKEN" \
"$ep")
http_code=$(echo "$resp" | tail -n1)

View File

@@ -46,7 +46,7 @@ REPO_ID=$(wp_resolve_repo_id "$REPO") || exit 1
echo "Triggering pipeline for $REPO on branch $BRANCH..."
response=$(curl -sk -w "\n%{http_code}" -X POST \
response=$(curl -sS -w "\n%{http_code}" -X POST \
-H "Authorization: Bearer $WOODPECKER_TOKEN" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg b "$BRANCH" '{branch: $b}')" \