feat(history-rewrite): enhance object checks in history rewrite scripts to focus on blob types and improve logging

This commit is contained in:
GitHub Actions
2025-12-09 14:20:37 +00:00
parent 9adf2735dd
commit 3ec6eba23a
5 changed files with 53 additions and 12 deletions

View File

@@ -55,15 +55,30 @@ for p in $paths_list; do
fi
done
# 2) Check for objects in paths
obj_count=$(git rev-list --objects --all -- $paths_list | wc -l | tr -d ' ')
if [ "$obj_count" -gt 0 ]; then
echo "ERROR: Found $obj_count objects in specified paths"
git rev-list --objects --all -- $paths_list | nl -ba | sed -n '1,100p'
echo "DRY-RUN FAILED: repository objects found in banned paths"
# 2) Check for blob objects in paths only (ignore tag/commit objects)
tmp_objects=$(mktemp)
trap 'rm -f "$tmp_objects"' EXIT INT TERM
git rev-list --objects --all -- $paths_list > "$tmp_objects"
blob_count=0
blob_list=$(mktemp)
trap 'rm -f "$tmp_objects" "$blob_list"' EXIT INT TERM
while read -r line; do
oid=$(printf '%s' "$line" | awk '{print $1}')
# Determine object type and only consider blobs
type=$(git cat-file -t "$oid" 2>/dev/null || true)
if [ "$type" = "blob" ]; then
echo "$line" >> "$blob_list"
blob_count=$((blob_count + 1))
fi
done < "$tmp_objects"
if [ "$blob_count" -gt 0 ]; then
echo "ERROR: Found $blob_count blob object(s) in specified paths"
nl -ba "$blob_list" | sed -n '1,100p'
echo "DRY-RUN FAILED: repository blob objects found in banned paths"
exit 1
else
echo "OK: No repository objects in specified paths"
echo "OK: No repository blob objects in specified paths"
fi
# 3) Check for large objects across history

View File

@@ -125,7 +125,17 @@ preview_removals() {
echo "=== Preview: objects in paths ===" | tee -a "$logfile"
# List objects for the given paths
for p in $paths_list; do
git rev-list --objects --all -- "$p" | tee -a "$logfile" | awk '{print $1, $2}' | head -n 50 | tee -a "$logfile"
echo "Path: $p" | tee -a "$logfile"
git rev-list --objects --all -- "$p" | while read -r line; do
oid=$(printf '%s' "$line" | awk '{print $1}')
label=$(printf '%s' "$line" | awk '{print $2}')
type=$(git cat-file -t "$oid" 2>/dev/null || true)
if [ "$type" = "blob" ]; then
echo "$oid $label" | tee -a "$logfile"
else
echo "[${type^^}] $oid $label" | tee -a "$logfile"
fi
done | head -n 50 | tee -a "$logfile"
done
echo "=== Example large objects (candidate for --strip-size) ===" | tee -a "$logfile"

View File

@@ -84,10 +84,20 @@ if [ "$FORMAT" = "json" ]; then
done
printf '],'
else
echo "--- Objects in paths ---"
for p in $paths_list; do
git rev-list --objects --all -- "$p" | nl -ba | sed -n '1,100p'
done
echo "--- Objects in paths (blob objects shown; tags highlighted) ---"
for p in $paths_list; do
echo "Path: $p"
git rev-list --objects --all -- "$p" | while read -r line; do
oid=$(printf '%s' "$line" | awk '{print $1}')
label=$(printf '%s' "$line" | awk '{print $2}')
type=$(git cat-file -t "$oid" 2>/dev/null || true)
if [ "$type" = "blob" ]; then
echo "$oid $label"
else
echo "[${type^^}] $oid $label"
fi
done | nl -ba | sed -n '1,100p'
done
fi
echo "--- Example large objects larger than ${STRIP_SIZE}M ---"

View File

@@ -4,6 +4,9 @@ setup() {
TMPREPO=$(mktemp -d)
cd "$TMPREPO"
git init -q
# Set local git identity for test commits
git config user.email "test@example.com"
git config user.name "Test Runner"
# create a directory that matches the paths to be pruned
mkdir -p backend/codeql-db
# add a large fake blob file

View File

@@ -5,6 +5,9 @@ setup() {
TMPREPO=$(mktemp -d)
cd "$TMPREPO"
git init -q
# Set local git identity for test commits
git config user.email "test@example.com"
git config user.name "Test Runner"
echo 'initial' > README.md
git add README.md && git commit -m 'init' -q
# Make a minimal .venv pre-commit stub