#!/usr/bin/env bash set -euo pipefail # CI wrapper that fails if the repo contains historical objects or commits # touching specified paths, or objects larger than the configured strip size. PATHS="backend/codeql-db,codeql-db,codeql-db-js,codeql-db-go" STRIP_SIZE=50 usage() { cat <&2; usage; exit 1;; esac done IFS=','; set -f paths_list="" for p in $PATHS; do paths_list="$paths_list $p" done set +f; unset IFS echo "Checking repository history for banned paths: $paths_list" echo "Blobs larger than: ${STRIP_SIZE}M will fail the check" failed=0 # 1) Check for commits touching paths for p in $paths_list; do count=$(git rev-list --all -- "$p" | wc -l | tr -d ' ') if [ "$count" -gt 0 ]; then echo "ERROR: Found $count historical commit(s) touching path: $p" git rev-list --all -- "$p" | nl -ba | sed -n '1,50p' echo "DRY-RUN FAILED: historical commits detected" exit 1 else echo "OK: No history touching: $p" fi done # 2) Check for blob objects in paths only (ignore tag/commit objects) # Temp files tmp_objects=$(mktemp) blob_list=$(mktemp) # shellcheck disable=SC2086 # $paths_list is intentionally unquoted to expand into multiple args git rev-list --objects --all -- $paths_list > "$tmp_objects" blob_count=0 tmp_oids="$(mktemp)" trap 'rm -f "$tmp_objects" "$blob_list" "$tmp_oids"' EXIT INT TERM while read -r line; do oid=$(printf '%s' "$line" | awk '{print $1}') # Determine object type and only consider blobs type=$(git cat-file -t "$oid" 2>/dev/null || true) if [ "$type" = "blob" ]; then echo "$line" >> "$blob_list" blob_count=$((blob_count + 1)) fi done < "$tmp_objects" if [ "$blob_count" -gt 0 ]; then echo "ERROR: Found $blob_count blob object(s) in specified paths" nl -ba "$blob_list" | sed -n '1,100p' echo "DRY-RUN FAILED: repository blob objects found in banned paths" exit 1 else echo "OK: No repository blob objects in specified paths" fi # 3) Check for large objects across history echo "Scanning for objects larger than ${STRIP_SIZE}M..." large_found=0 # Write all object oids to a temp file to avoid a subshell problem tmp_oids="$(mktemp)" git rev-list --objects --all | awk '{print $1}' > "$tmp_oids" while read -r oid; do size=$(git cat-file -s "$oid" 2>/dev/null || echo 0) if [ -n "$size" ] && [ "$size" -ge $((STRIP_SIZE * 1024 * 1024)) ]; then echo "LARGE OBJECT: $oid size=$size" large_found=1 failed=1 fi done < "$tmp_oids" if [ "$large_found" -eq 0 ]; then echo "OK: No large objects detected across history" else echo "DRY-RUN FAILED: large historical blobs detected" exit 1 fi if [ "$failed" -ne 0 ]; then echo "DRY-RUN FAILED: Repository history contains blocked entries" exit 1 fi echo "DRY-RUN OK: No problems detected" exit 0