chore: git cache cleanup

2026-03-04 18:34:49 +00:00
parent c32cce2a88
commit 27c252600a
2001 changed files with 683185 additions and 0 deletions
--- a/scripts/ci/check-codecov-trigger-parity.sh
+++ b/scripts/ci/check-codecov-trigger-parity.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+QUALITY_WORKFLOW=".github/workflows/quality-checks.yml"
+CODECOV_WORKFLOW=".github/workflows/codecov-upload.yml"
+EXPECTED_COMMENT='Codecov upload moved to `codecov-upload.yml` (pull_request + workflow_dispatch).'
+
+fail() {
+  local message="$1"
+  echo "::error title=Codecov trigger/comment drift::${message}"
+  exit 1
+}
+
+[[ -f "$QUALITY_WORKFLOW" ]] || fail "Missing workflow file: $QUALITY_WORKFLOW"
+[[ -f "$CODECOV_WORKFLOW" ]] || fail "Missing workflow file: $CODECOV_WORKFLOW"
+
+grep -qE '^on:' "$QUALITY_WORKFLOW" || fail "quality-checks workflow is missing an 'on:' block"
+grep -qE '^on:' "$CODECOV_WORKFLOW" || fail "codecov-upload workflow is missing an 'on:' block"
+
+grep -qE '^  pull_request:' "$QUALITY_WORKFLOW" || fail "quality-checks must run on pull_request"
+if grep -qE '^  workflow_dispatch:' "$QUALITY_WORKFLOW"; then
+  fail "quality-checks unexpectedly includes workflow_dispatch; keep Codecov manual trigger scoped to codecov-upload workflow"
+fi
+
+grep -qE '^  pull_request:' "$CODECOV_WORKFLOW" || fail "codecov-upload must run on pull_request"
+grep -qE '^  workflow_dispatch:' "$CODECOV_WORKFLOW" || fail "codecov-upload must run on workflow_dispatch"
+if grep -qE '^  pull_request_target:' "$CODECOV_WORKFLOW"; then
+  fail "codecov-upload must not use pull_request_target"
+fi
+
+if ! grep -Fq "$EXPECTED_COMMENT" "$QUALITY_WORKFLOW"; then
+  fail "quality-checks Codecov handoff comment is missing or changed; expected: $EXPECTED_COMMENT"
+fi
+
+echo "Codecov trigger/comment parity check passed"
--- a/scripts/ci/check-codeql-parity.sh
+++ b/scripts/ci/check-codeql-parity.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CODEQL_WORKFLOW=".github/workflows/codeql.yml"
+TASKS_FILE=".vscode/tasks.json"
+GO_PRECOMMIT_SCRIPT="scripts/pre-commit-hooks/codeql-go-scan.sh"
+JS_PRECOMMIT_SCRIPT="scripts/pre-commit-hooks/codeql-js-scan.sh"
+
+fail() {
+  local message="$1"
+  echo "::error title=CodeQL parity drift::${message}"
+  exit 1
+}
+
+ensure_task_command() {
+  local tasks_file="$1"
+  local task_label="$2"
+  local expected_command="$3"
+
+  jq -e \
+    --arg task_label "$task_label" \
+    --arg expected_command "$expected_command" \
+    '.tasks | type == "array" and any(.[]; .label == $task_label and .command == $expected_command)' \
+    "$tasks_file" >/dev/null
+}
+
+ensure_event_branches() {
+  local workflow_file="$1"
+  local event_name="$2"
+  local expected_line="$3"
+
+  awk -v event_name="$event_name" -v expected_line="$expected_line" '
+    /^on:/ {
+      in_on = 1
+      next
+    }
+
+    in_on && $1 == event_name ":" {
+      in_event = 1
+      next
+    }
+
+    in_on && in_event && $1 == "branches:" {
+      line = $0
+      gsub(/^ +/, "", line)
+      if (line == expected_line) {
+        found = 1
+      }
+      in_event = 0
+      next
+    }
+
+    in_on && in_event && $1 ~ /^[a-z_]+:$/ {
+      in_event = 0
+    }
+
+    END {
+      exit found ? 0 : 1
+    }
+  ' "$workflow_file"
+}
+
+ensure_event_branches_with_yq() {
+  local workflow_file="$1"
+  local event_name="$2"
+  shift 2
+  local expected_branches=("$@")
+
+  local expected_json
+  local actual_json
+
+  expected_json="$(printf '%s\n' "${expected_branches[@]}" | jq -R . | jq -s .)"
+
+  if actual_json="$(yq eval -o=json ".on.${event_name}.branches // []" "$workflow_file" 2>/dev/null)"; then
+    :
+  elif actual_json="$(yq -o=json ".on.${event_name}.branches // []" "$workflow_file" 2>/dev/null)"; then
+    :
+  else
+    return 1
+  fi
+
+  jq -e \
+    --argjson expected "$expected_json" \
+    'if type != "array" then false else ((map(tostring) | unique | sort) == ($expected | map(tostring) | unique | sort)) end' \
+    <<<"$actual_json" >/dev/null
+}
+
+ensure_event_branches_semantic() {
+  local workflow_file="$1"
+  local event_name="$2"
+  local fallback_line="$3"
+  shift 3
+  local expected_branches=("$@")
+
+  if command -v yq >/dev/null 2>&1; then
+    if ensure_event_branches_with_yq "$workflow_file" "$event_name" "${expected_branches[@]}"; then
+      return 0
+    fi
+  fi
+
+  ensure_event_branches "$workflow_file" "$event_name" "$fallback_line"
+}
+
+[[ -f "$CODEQL_WORKFLOW" ]] || fail "Missing workflow file: $CODEQL_WORKFLOW"
+[[ -f "$TASKS_FILE" ]] || fail "Missing tasks file: $TASKS_FILE"
+[[ -f "$GO_PRECOMMIT_SCRIPT" ]] || fail "Missing pre-commit script: $GO_PRECOMMIT_SCRIPT"
+[[ -f "$JS_PRECOMMIT_SCRIPT" ]] || fail "Missing pre-commit script: $JS_PRECOMMIT_SCRIPT"
+
+command -v jq >/dev/null 2>&1 || fail "jq is required for semantic CodeQL parity checks"
+
+ensure_event_branches_semantic \
+  "$CODEQL_WORKFLOW" \
+  "pull_request" \
+  "branches: [main, nightly, development]" \
+  "main" "nightly" "development" || fail "codeql.yml pull_request branches must be [main, nightly, development]"
+ensure_event_branches_semantic \
+  "$CODEQL_WORKFLOW" \
+  "push" \
+  "branches: [main]" \
+  "main" || fail "codeql.yml push branches must be [main]"
+grep -Fq 'queries: security-and-quality' "$CODEQL_WORKFLOW" || fail "codeql.yml must pin init queries to security-and-quality"
+ensure_task_command "$TASKS_FILE" "Security: CodeQL Go Scan (CI-Aligned) [~60s]" "bash scripts/pre-commit-hooks/codeql-go-scan.sh" || fail "Missing or mismatched CI-aligned Go CodeQL task (label+command)"
+ensure_task_command "$TASKS_FILE" "Security: CodeQL JS Scan (CI-Aligned) [~90s]" "bash scripts/pre-commit-hooks/codeql-js-scan.sh" || fail "Missing or mismatched CI-aligned JS CodeQL task (label+command)"
+! grep -Fq 'go-security-extended.qls' "$TASKS_FILE" || fail "tasks.json contains deprecated go-security-extended suite; use CI-aligned scripts"
+! grep -Fq 'javascript-security-extended.qls' "$TASKS_FILE" || fail "tasks.json contains deprecated javascript-security-extended suite; use CI-aligned scripts"
+grep -Fq 'codeql/go-queries:codeql-suites/go-security-and-quality.qls' "$GO_PRECOMMIT_SCRIPT" || fail "Go pre-commit script must use go-security-and-quality suite"
+grep -Fq 'codeql/javascript-queries:codeql-suites/javascript-security-and-quality.qls' "$JS_PRECOMMIT_SCRIPT" || fail "JS pre-commit script must use javascript-security-and-quality suite"
+
+echo "CodeQL parity check passed (workflow triggers + suite pinning + local/pre-commit suite alignment)"
--- a/scripts/ci/dry_run_history_rewrite.sh
+++ b/scripts/ci/dry_run_history_rewrite.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# CI wrapper that fails if the repo contains historical objects or commits
+# touching specified paths, or objects larger than the configured strip size.
+
+PATHS="backend/codeql-db,codeql-db,codeql-db-js,codeql-db-go"
+STRIP_SIZE=50
+
+usage() {
+  cat <<EOF
+Usage: $0 [--paths 'p1,p2'] [--strip-size N]
+
+Runs a quick, non-destructive check against the repository history and fails
+with a non-zero exit code if any commits or objects are found that touch the
+specified paths or if any historical blobs exceed the --strip-size in MB.
+EOF
+}
+
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    --paths)
+      PATHS="$2"; shift 2;;
+    --strip-size)
+      STRIP_SIZE="$2"; shift 2;;
+    --help)
+      usage; exit 0;;
+    *)
+      echo "Unknown option: $1" >&2; usage; exit 1;;
+  esac
+done
+
+IFS=','; set -f
+paths_list=""
+for p in $PATHS; do
+  paths_list="$paths_list $p"
+done
+set +f; unset IFS
+
+echo "Checking repository history for banned paths: $paths_list"
+echo "Blobs larger than: ${STRIP_SIZE}M will fail the check"
+
+failed=0
+
+# 1) Check for commits touching paths
+for p in $paths_list; do
+  count=$(git rev-list --all -- "$p" | wc -l | tr -d ' ')
+  if [ "$count" -gt 0 ]; then
+    echo "ERROR: Found $count historical commit(s) touching path: $p"
+    git rev-list --all -- "$p" | nl -ba | sed -n '1,50p'
+    echo "DRY-RUN FAILED: historical commits detected"
+    exit 1
+  else
+    echo "OK: No history touching: $p"
+  fi
+done
+
+# 2) Check for blob objects in paths only (ignore tag/commit objects)
+# Temp files
+tmp_objects=$(mktemp)
+blob_list=$(mktemp)
+# shellcheck disable=SC2086 # $paths_list is intentionally unquoted to expand into multiple args
+git rev-list --objects --all -- $paths_list > "$tmp_objects"
+blob_count=0
+tmp_oids="$(mktemp)"
+trap 'rm -f "$tmp_objects" "$blob_list" "$tmp_oids"' EXIT INT TERM
+while read -r line; do
+  oid=$(printf '%s' "$line" | awk '{print $1}')
+  # Determine object type and only consider blobs
+  type=$(git cat-file -t "$oid" 2>/dev/null || true)
+  if [ "$type" = "blob" ]; then
+    echo "$line" >> "$blob_list"
+    blob_count=$((blob_count + 1))
+  fi
+done < "$tmp_objects"
+
+if [ "$blob_count" -gt 0 ]; then
+  echo "ERROR: Found $blob_count blob object(s) in specified paths"
+  nl -ba "$blob_list" | sed -n '1,100p'
+  echo "DRY-RUN FAILED: repository blob objects found in banned paths"
+  exit 1
+else
+  echo "OK: No repository blob objects in specified paths"
+fi
+
+# 3) Check for large objects across history
+echo "Scanning for objects larger than ${STRIP_SIZE}M..."
+large_found=0
+# Write all object oids to a temp file to avoid a subshell problem
+tmp_oids="$(mktemp)"
+git rev-list --objects --all | awk '{print $1}' > "$tmp_oids"
+while read -r oid; do
+  size=$(git cat-file -s "$oid" 2>/dev/null || echo 0)
+  if [ -n "$size" ] && [ "$size" -ge $((STRIP_SIZE * 1024 * 1024)) ]; then
+    echo "LARGE OBJECT: $oid size=$size"
+    large_found=1
+    failed=1
+  fi
+done < "$tmp_oids"
+if [ "$large_found" -eq 0 ]; then
+  echo "OK: No large objects detected across history"
+else
+  echo "DRY-RUN FAILED: large historical blobs detected"
+  exit 1
+fi
+
+if [ "$failed" -ne 0 ]; then
+  echo "DRY-RUN FAILED: Repository history contains blocked entries"
+  exit 1
+fi
+
+echo "DRY-RUN OK: No problems detected"
+exit 0