Files
stack/packages/mosaic/framework/tools/git/pr-ci-wait.sh
Hermes Agent 9e8a9cfa8d
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
fix(pr-ci-wait): CI-history primary tier — close webhook-lag false-green (#550)
F-06 follow-up per Mos ruling. The no-CI fast-exit was a pure empty-poll streak
(NO_CI_MAX×interval ≈ 45s), so a slow-to-register pipeline (webhook/queue lag)
looked like 'no CI' and could false-green a merge gate before the pipeline existed.

Two-tier no-CI determination:
- PRIMARY: probe the repo's DEFAULT BRANCH commit status once at startup. If it
  has CI history, the repo runs CI → an empty status on the PR head means the
  pipeline has not REGISTERED yet → never fast-green; poll until it registers or
  timeout (both safe). Closes the webhook-lag false-green.
- SECONDARY: the empty-poll streak fast-exit now applies ONLY to genuinely CI-less
  repos (default branch also has no CI history). Preserves the original no-CI win.
- Probe failure → conservative REPO_HAS_CI=1 (assume CI; wait-then-timeout beats
  false-green). All early returns are explicit 'return 0' + guarded call so the
  probe can never abort under set -e.

Verified: bash -n + shellcheck clean; behavioral harness covers established-repo
(stays 1), CI-less (→0), empty-branch/probe-fail (conservative 1), and the
no-status gate (has-CI never fast-greens, CI-less fast-exits).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Kt2D8TsnDwhtzEAPijsNmR
2026-06-18 14:18:32 -05:00

373 lines
13 KiB
Bash
Executable File

#!/bin/bash
# pr-ci-wait.sh - Wait for PR CI status to reach terminal state (GitHub/Gitea)
# Usage: pr-ci-wait.sh -n <pr_number> [-r owner/repo] [-t timeout_sec] [-i interval_sec]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/detect-platform.sh"
PR_NUMBER=""
TIMEOUT_SEC=1800
INTERVAL_SEC=15
REPO_OVERRIDE=""
HOST_OVERRIDE=""
usage() {
cat <<EOF
Usage: $(basename "$0") -n <pr_number> [-t timeout_sec] [-i interval_sec]
Options:
-n, --number NUMBER PR number (required)
-r, --repo OWNER/REPO Repository slug (default: infer from git origin)
--host HOST Gitea host for --repo API calls (or set GITEA_HOST/GITEA_URL)
-t, --timeout SECONDS Max wait time in seconds (default: 1800)
-i, --interval SECONDS Poll interval in seconds (default: 15)
-h, --help Show this help
Examples:
$(basename "$0") -n 643
$(basename "$0") -n 643 --repo ddk/ai-bma
$(basename "$0") -n 643 -t 900 -i 10
EOF
}
# get_remote_host and get_gitea_token are provided by detect-platform.sh
extract_state_from_status_json() {
# Capture piped JSON BEFORE invoking `python3 - <<PY`. The heredoc binds
# stdin to the Python program text — so json.load(sys.stdin) inside would
# try to re-read stdin after `-` already consumed it for the program,
# yielding EOF and returning "unknown" every time. Pass payload via env.
local payload
payload=$(cat)
PR_CI_STATUS_JSON="$payload" python3 - <<'PY'
import json
import os
import sys
try:
payload = json.loads(os.environ.get("PR_CI_STATUS_JSON", ""))
except Exception:
print("unknown")
raise SystemExit(0)
state = (payload.get("state") or "").lower()
if state in {"success", "pending", "failure", "error"}:
print(state)
raise SystemExit(0)
statuses = payload.get("statuses") or []
values = []
for item in statuses:
if not isinstance(item, dict):
continue
value = (item.get("status") or item.get("state") or "").lower()
if value:
values.append(value)
if any(v in {"failure", "error"} for v in values):
print("failure")
elif values and all(v == "success" for v in values):
print("success")
elif any(v in {"pending", "running", "queued", "waiting"} for v in values):
print("pending")
elif not values and not state:
# No pipeline/status of any kind reported for this commit. Distinct from
# "unknown" (an ambiguous/unrecognized status that should keep polling):
# this signals a repo/commit that simply has no CI configured.
print("no-status")
else:
print("unknown")
PY
}
print_status_summary() {
# Same stdin-collision fix as extract_state_from_status_json above.
local payload
payload=$(cat)
PR_CI_STATUS_JSON="$payload" python3 - <<'PY'
import json
import os
import sys
try:
payload = json.loads(os.environ.get("PR_CI_STATUS_JSON", ""))
except Exception:
print("[pr-ci-wait] status payload unavailable")
raise SystemExit(0)
statuses = payload.get("statuses") or []
if not statuses:
print("[pr-ci-wait] no status contexts reported yet")
raise SystemExit(0)
for item in statuses:
if not isinstance(item, dict):
continue
name = item.get("context") or item.get("name") or "unknown-context"
state = item.get("status") or item.get("state") or "unknown-state"
target = item.get("target_url") or item.get("url") or ""
if target:
print(f"[pr-ci-wait] {name}: {state} ({target})")
else:
print(f"[pr-ci-wait] {name}: {state}")
PY
}
github_get_pr_head_sha() {
gh pr view "$PR_NUMBER" --repo "$OWNER/$REPO" --json headRefOid --jq '.headRefOid'
}
github_get_commit_status_json() {
local owner="$1"
local repo="$2"
local sha="$3"
gh api "repos/${owner}/${repo}/commits/${sha}/status"
}
gitea_get_pr_head_sha() {
local host="$1"
local repo="$2"
local token="$3"
local url="https://${host}/api/v1/repos/${repo}/pulls/${PR_NUMBER}"
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url" | python3 -c '
import json, sys
data = json.load(sys.stdin)
print((data.get("head") or {}).get("sha", ""))
'
}
gitea_get_commit_status_json() {
local host="$1"
local repo="$2"
local token="$3"
local sha="$4"
local url="https://${host}/api/v1/repos/${repo}/commits/${sha}/status"
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url"
}
gitea_get_default_branch() {
local host="$1"
local repo="$2"
local token="$3"
local url="https://${host}/api/v1/repos/${repo}"
curl -fsSL -H "User-Agent: curl/8" -H "Authorization: token ${token}" "$url" | python3 -c '
import json, sys
print((json.load(sys.stdin) or {}).get("default_branch", ""))
'
}
github_get_default_branch() {
gh api "repos/${OWNER}/${REPO}" --jq '.default_branch'
}
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--number)
PR_NUMBER="$2"
shift 2
;;
-r|--repo)
REPO_OVERRIDE="$2"
shift 2
;;
--host)
HOST_OVERRIDE="$2"
shift 2
;;
-t|--timeout)
TIMEOUT_SEC="$2"
shift 2
;;
-i|--interval)
INTERVAL_SEC="$2"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown option: $1" >&2
usage >&2
exit 1
;;
esac
done
if [[ -z "$PR_NUMBER" ]]; then
echo "Error: PR number is required (-n)." >&2
usage >&2
exit 1
fi
if ! [[ "$TIMEOUT_SEC" =~ ^[0-9]+$ ]] || ! [[ "$INTERVAL_SEC" =~ ^[0-9]+$ ]]; then
echo "Error: timeout and interval must be integer seconds." >&2
exit 1
fi
if [[ -n "$REPO_OVERRIDE" ]]; then
REPO_INFO="$REPO_OVERRIDE"
PLATFORM=$(detect_platform 2>/dev/null || echo gitea)
else
detect_platform > /dev/null
REPO_INFO=$(get_repo_info)
fi
if [[ -z "$REPO_INFO" || "$REPO_INFO" == error:* || "$REPO_INFO" != */* ]]; then
echo "Error: Could not determine repository from git origin. Run from a repo or pass --repo owner/repo." >&2
exit 1
fi
OWNER=${REPO_INFO%%/*}
REPO=${REPO_INFO##*/}
START_TS=$(date +%s)
DEADLINE_TS=$((START_TS + TIMEOUT_SEC))
if [[ "$PLATFORM" == "github" ]]; then
if ! command -v gh >/dev/null 2>&1; then
echo "Error: gh CLI is required for GitHub CI status polling." >&2
exit 1
fi
HEAD_SHA=$(github_get_pr_head_sha)
if [[ -z "$HEAD_SHA" ]]; then
echo "Error: Could not resolve head SHA for PR #$PR_NUMBER." >&2
exit 1
fi
echo "[pr-ci-wait] Platform=github PR=#${PR_NUMBER} head_sha=${HEAD_SHA}"
elif [[ "$PLATFORM" == "gitea" ]]; then
if [[ -n "$HOST_OVERRIDE" ]]; then
HOST="$HOST_OVERRIDE"
elif [[ -n "$REPO_OVERRIDE" ]]; then
HOST=$(get_gitea_api_host_for_repo_override) || {
echo "Error: Gitea host is required with --repo. Pass --host or set GITEA_HOST/GITEA_URL." >&2
exit 1
}
else
HOST=$(get_remote_host) || {
echo "Error: Could not determine Gitea host from git origin." >&2
exit 1
}
fi
TOKEN=$(get_gitea_token "$HOST") || {
echo "Error: Gitea token not found. Set GITEA_TOKEN or configure ~/.git-credentials." >&2
exit 1
}
HEAD_SHA=$(gitea_get_pr_head_sha "$HOST" "$OWNER/$REPO" "$TOKEN")
if [[ -z "$HEAD_SHA" ]]; then
echo "Error: Could not resolve head SHA for PR #$PR_NUMBER." >&2
exit 1
fi
echo "[pr-ci-wait] Platform=gitea host=${HOST} repo=${OWNER}/${REPO} PR=#${PR_NUMBER} head_sha=${HEAD_SHA}"
else
echo "Error: Unsupported platform '${PLATFORM}'." >&2
exit 1
fi
# No-CI determination is TWO-TIER (primary: CI history; secondary: empty-poll streak).
#
# PRIMARY — "does this repo run CI at all?" Probed once, up front, from the DEFAULT
# BRANCH's commit status. A repo whose default branch carries CI statuses
# demonstrably runs CI, so an EMPTY status on the PR head means the pipeline simply
# has not registered YET (webhook/queue lag) — NOT that the repo is CI-less. In that
# case we must NEVER fast-green; we keep polling until the pipeline registers or the
# timeout fires (both safe). This closes the webhook-lag false-green: a slow-to-
# register pipeline feeding a merge gate can no longer be mistaken for "no CI".
#
# SECONDARY — the empty-poll streak below applies ONLY to genuinely CI-less repos
# (default branch also has no CI history, e.g. device-imaging class), where burning
# the full timeout would be pure waste. There, NO_CI_MAX empty polls => fast-exit 0.
#
# Probe failure is treated conservatively as REPO_HAS_CI=1 (assume CI present): we
# would rather wait-then-timeout than risk a false-green, per the merge-gate priority.
REPO_HAS_CI=1
detect_repo_ci() {
local def_branch def_status
# Every early exit returns 0: a probe miss must leave the conservative
# REPO_HAS_CI=1 default in place, never abort the caller under `set -e`.
if [[ "$PLATFORM" == "github" ]]; then
def_branch=$(github_get_default_branch 2>/dev/null) || {
echo "[pr-ci-wait] WARN: default-branch probe failed; assuming CI-enabled (will not fast-green on empty status)."; return 0; }
[[ -n "$def_branch" ]] || return 0
def_status=$(github_get_commit_status_json "$OWNER" "$REPO" "$def_branch" 2>/dev/null | extract_state_from_status_json) || return 0
else
def_branch=$(gitea_get_default_branch "$HOST" "$OWNER/$REPO" "$TOKEN" 2>/dev/null) || {
echo "[pr-ci-wait] WARN: default-branch probe failed; assuming CI-enabled (will not fast-green on empty status)."; return 0; }
[[ -n "$def_branch" ]] || return 0
def_status=$(gitea_get_commit_status_json "$HOST" "$OWNER/$REPO" "$TOKEN" "$def_branch" 2>/dev/null | extract_state_from_status_json) || return 0
fi
if [[ "$def_status" == "no-status" || -z "$def_status" ]]; then
REPO_HAS_CI=0
echo "[pr-ci-wait] default branch '${def_branch}' has no CI status history — treating repo as CI-less (empty-poll fast-exit enabled)."
else
REPO_HAS_CI=1
echo "[pr-ci-wait] default branch '${def_branch}' has CI history (state=${def_status}) — repo runs CI; empty status on PR head => awaiting registration, will not fast-green."
fi
}
detect_repo_ci || true
NO_CI_STREAK=0
NO_CI_MAX=3
while true; do
NOW_TS=$(date +%s)
if (( NOW_TS > DEADLINE_TS )); then
echo "Error: Timed out waiting for CI status on PR #$PR_NUMBER after ${TIMEOUT_SEC}s." >&2
exit 124
fi
if [[ "$PLATFORM" == "github" ]]; then
STATUS_JSON=$(github_get_commit_status_json "$OWNER" "$REPO" "$HEAD_SHA")
else
STATUS_JSON=$(gitea_get_commit_status_json "$HOST" "$OWNER/$REPO" "$TOKEN" "$HEAD_SHA")
fi
STATE=$(printf '%s' "$STATUS_JSON" | extract_state_from_status_json)
echo "[pr-ci-wait] state=${STATE} pr=#${PR_NUMBER} sha=${HEAD_SHA}"
case "$STATE" in
success)
printf '%s' "$STATUS_JSON" | print_status_summary
echo "[pr-ci-wait] CI is green for PR #$PR_NUMBER."
exit 0
;;
failure|error)
printf '%s' "$STATUS_JSON" | print_status_summary
echo "Error: CI reported ${STATE} for PR #$PR_NUMBER." >&2
exit 1
;;
no-status)
if [[ "$REPO_HAS_CI" == "1" ]]; then
# PRIMARY tier: repo demonstrably runs CI but this commit's pipeline
# has not registered yet (webhook/queue lag). Do NOT fast-green — keep
# polling until it registers or the timeout fires. Reset the streak so
# a later genuine CI-less misread can't accumulate across this state.
NO_CI_STREAK=0
echo "[pr-ci-wait] empty status on PR head but repo runs CI — awaiting pipeline registration (webhook lag), not fast-greening."
else
# SECONDARY tier: genuinely CI-less repo (default branch has no CI
# history either). Empty polls => fast-exit green after NO_CI_MAX.
NO_CI_STREAK=$((NO_CI_STREAK + 1))
if (( NO_CI_STREAK >= NO_CI_MAX )); then
echo "[INFO] no CI configured for this repo/commit (PR #$PR_NUMBER, ${NO_CI_STREAK} consecutive empty polls, default branch also CI-less); treating as green."
exit 0
fi
fi
sleep "$INTERVAL_SEC"
;;
pending|unknown)
# A pipeline exists but hasn't reached a terminal state (or is
# transiently ambiguous) — keep waiting, and reset the no-CI streak
# since this commit is not in the "no CI at all" condition.
NO_CI_STREAK=0
sleep "$INTERVAL_SEC"
;;
*)
echo "[pr-ci-wait] Unrecognized state '${STATE}', continuing to poll..."
NO_CI_STREAK=0
sleep "$INTERVAL_SEC"
;;
esac
done