From 3ec6eba23a06ff4c8e8d1eb5d70b8a0c4faacb6a Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 9 Dec 2025 14:20:37 +0000 Subject: [PATCH] feat(history-rewrite): enhance object checks in history rewrite scripts to focus on blob types and improve logging --- scripts/ci/dry_run_history_rewrite.sh | 29 ++++++++++++++----- scripts/history-rewrite/clean_history.sh | 12 +++++++- scripts/history-rewrite/preview_removals.sh | 18 +++++++++--- .../tests/clean_history.dryrun.bats | 3 ++ .../tests/validate_after_rewrite.bats | 3 ++ 5 files changed, 53 insertions(+), 12 deletions(-) diff --git a/scripts/ci/dry_run_history_rewrite.sh b/scripts/ci/dry_run_history_rewrite.sh index 36af3d28..fba44696 100755 --- a/scripts/ci/dry_run_history_rewrite.sh +++ b/scripts/ci/dry_run_history_rewrite.sh @@ -55,15 +55,30 @@ for p in $paths_list; do fi done -# 2) Check for objects in paths -obj_count=$(git rev-list --objects --all -- $paths_list | wc -l | tr -d ' ') -if [ "$obj_count" -gt 0 ]; then - echo "ERROR: Found $obj_count objects in specified paths" - git rev-list --objects --all -- $paths_list | nl -ba | sed -n '1,100p' - echo "DRY-RUN FAILED: repository objects found in banned paths" +# 2) Check for blob objects in paths only (ignore tag/commit objects) +tmp_objects=$(mktemp) +trap 'rm -f "$tmp_objects"' EXIT INT TERM +git rev-list --objects --all -- $paths_list > "$tmp_objects" +blob_count=0 +blob_list=$(mktemp) +trap 'rm -f "$tmp_objects" "$blob_list"' EXIT INT TERM +while read -r line; do + oid=$(printf '%s' "$line" | awk '{print $1}') + # Determine object type and only consider blobs + type=$(git cat-file -t "$oid" 2>/dev/null || true) + if [ "$type" = "blob" ]; then + echo "$line" >> "$blob_list" + blob_count=$((blob_count + 1)) + fi +done < "$tmp_objects" + +if [ "$blob_count" -gt 0 ]; then + echo "ERROR: Found $blob_count blob object(s) in specified paths" + nl -ba "$blob_list" | sed -n '1,100p' + echo "DRY-RUN FAILED: repository blob objects found in banned paths" exit 1 else - echo "OK: No repository objects in specified paths" + echo "OK: No repository blob objects in specified paths" fi # 3) Check for large objects across history diff --git a/scripts/history-rewrite/clean_history.sh b/scripts/history-rewrite/clean_history.sh index 3d9faba8..3f2754ba 100755 --- a/scripts/history-rewrite/clean_history.sh +++ b/scripts/history-rewrite/clean_history.sh @@ -125,7 +125,17 @@ preview_removals() { echo "=== Preview: objects in paths ===" | tee -a "$logfile" # List objects for the given paths for p in $paths_list; do - git rev-list --objects --all -- "$p" | tee -a "$logfile" | awk '{print $1, $2}' | head -n 50 | tee -a "$logfile" + echo "Path: $p" | tee -a "$logfile" + git rev-list --objects --all -- "$p" | while read -r line; do + oid=$(printf '%s' "$line" | awk '{print $1}') + label=$(printf '%s' "$line" | awk '{print $2}') + type=$(git cat-file -t "$oid" 2>/dev/null || true) + if [ "$type" = "blob" ]; then + echo "$oid $label" | tee -a "$logfile" + else + echo "[${type^^}] $oid $label" | tee -a "$logfile" + fi + done | head -n 50 | tee -a "$logfile" done echo "=== Example large objects (candidate for --strip-size) ===" | tee -a "$logfile" diff --git a/scripts/history-rewrite/preview_removals.sh b/scripts/history-rewrite/preview_removals.sh index c5ab1766..72622320 100755 --- a/scripts/history-rewrite/preview_removals.sh +++ b/scripts/history-rewrite/preview_removals.sh @@ -84,10 +84,20 @@ if [ "$FORMAT" = "json" ]; then done printf '],' else - echo "--- Objects in paths ---" - for p in $paths_list; do - git rev-list --objects --all -- "$p" | nl -ba | sed -n '1,100p' - done + echo "--- Objects in paths (blob objects shown; tags highlighted) ---" + for p in $paths_list; do + echo "Path: $p" + git rev-list --objects --all -- "$p" | while read -r line; do + oid=$(printf '%s' "$line" | awk '{print $1}') + label=$(printf '%s' "$line" | awk '{print $2}') + type=$(git cat-file -t "$oid" 2>/dev/null || true) + if [ "$type" = "blob" ]; then + echo "$oid $label" + else + echo "[${type^^}] $oid $label" + fi + done | nl -ba | sed -n '1,100p' + done fi echo "--- Example large objects larger than ${STRIP_SIZE}M ---" diff --git a/scripts/history-rewrite/tests/clean_history.dryrun.bats b/scripts/history-rewrite/tests/clean_history.dryrun.bats index 37e31c4f..27305bbf 100644 --- a/scripts/history-rewrite/tests/clean_history.dryrun.bats +++ b/scripts/history-rewrite/tests/clean_history.dryrun.bats @@ -4,6 +4,9 @@ setup() { TMPREPO=$(mktemp -d) cd "$TMPREPO" git init -q + # Set local git identity for test commits + git config user.email "test@example.com" + git config user.name "Test Runner" # create a directory that matches the paths to be pruned mkdir -p backend/codeql-db # add a large fake blob file diff --git a/scripts/history-rewrite/tests/validate_after_rewrite.bats b/scripts/history-rewrite/tests/validate_after_rewrite.bats index bf55224e..4993ec4f 100644 --- a/scripts/history-rewrite/tests/validate_after_rewrite.bats +++ b/scripts/history-rewrite/tests/validate_after_rewrite.bats @@ -5,6 +5,9 @@ setup() { TMPREPO=$(mktemp -d) cd "$TMPREPO" git init -q + # Set local git identity for test commits + git config user.email "test@example.com" + git config user.name "Test Runner" echo 'initial' > README.md git add README.md && git commit -m 'init' -q # Make a minimal .venv pre-commit stub