fix: implement GHCR and Docker Hub prune scripts with summary reporting

This commit is contained in:
GitHub Actions
2026-02-26 03:30:02 +00:00
parent 1913e9d739
commit ac720f95df
4 changed files with 367 additions and 213 deletions

174
scripts/prune-dockerhub.sh Executable file
View File

@@ -0,0 +1,174 @@
#!/usr/bin/env bash
set -euo pipefail
# prune-dockerhub.sh
# Deletes old container images from Docker Hub according to retention and protection rules.
OWNER=${OWNER:-${GITHUB_REPOSITORY_OWNER:-Wikid82}}
IMAGE_NAME=${IMAGE_NAME:-charon}
KEEP_DAYS=${KEEP_DAYS:-30}
KEEP_LAST_N=${KEEP_LAST_N:-30}
DRY_RUN=${DRY_RUN:-false}
PROTECTED_REGEX=${PROTECTED_REGEX:-'["^v","^latest$","^main$","^develop$"]'}
DOCKERHUB_USERNAME=${DOCKERHUB_USERNAME:-}
DOCKERHUB_TOKEN=${DOCKERHUB_TOKEN:-}
LOG_PREFIX="[prune-dockerhub]"
cutoff_ts=$(date -d "$KEEP_DAYS days ago" +%s 2>/dev/null || date -d "-$KEEP_DAYS days" +%s)
dry_run=false
case "${DRY_RUN,,}" in
true|1|yes|y|on) dry_run=true ;;
*) dry_run=false ;;
esac
TOTAL_CANDIDATES=0
TOTAL_CANDIDATES_BYTES=0
TOTAL_DELETED=0
TOTAL_DELETED_BYTES=0
echo "$LOG_PREFIX starting with OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run"
echo "$LOG_PREFIX PROTECTED_REGEX=$PROTECTED_REGEX"
require() {
command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1" >&2; exit 1; }
}
require curl
require jq
is_protected_tag() {
local tag="$1"
local rgx
while IFS= read -r rgx; do
[[ -z "$rgx" ]] && continue
if [[ "$tag" =~ $rgx ]]; then
return 0
fi
done < <(echo "$PROTECTED_REGEX" | jq -r '.[]')
return 1
}
human_readable() {
local bytes=${1:-0}
if [[ -z "$bytes" ]] || (( bytes <= 0 )); then
echo "0 B"
return
fi
local unit=(B KiB MiB GiB TiB)
local i=0
local value=$bytes
while (( value > 1024 )) && (( i < 4 )); do
value=$((value / 1024))
i=$((i + 1))
done
printf "%s %s" "${value}" "${unit[$i]}"
}
action_delete_dockerhub() {
echo "$LOG_PREFIX -> Docker Hub cleanup for ${DOCKERHUB_USERNAME:-<unset>}/$IMAGE_NAME (dry-run=$dry_run)"
if [[ -z "${DOCKERHUB_USERNAME:-}" || -z "${DOCKERHUB_TOKEN:-}" ]]; then
echo "$LOG_PREFIX Docker Hub credentials not set; skipping Docker Hub cleanup"
return
fi
local hub_token page page_size all resp results_count total
local keep_tags tag tag_name last_updated last_ts protected bytes
hub_token=$(printf '{"username":"%s","password":"%s"}' "$DOCKERHUB_USERNAME" "$DOCKERHUB_TOKEN" | \
curl -sS -X POST -H "Content-Type: application/json" --data-binary @- \
https://hub.docker.com/v2/users/login/ | jq -r '.token')
if [[ -z "$hub_token" || "$hub_token" == "null" ]]; then
echo "$LOG_PREFIX Failed to obtain Docker Hub token; aborting Docker Hub cleanup"
return
fi
page=1
page_size=100
all='[]'
while :; do
resp=$(curl -sS -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags?page_size=$page_size&page=$page")
results_count=$(echo "$resp" | jq -r '.results | length')
if [[ -z "$results_count" || "$results_count" == "0" ]]; then
break
fi
all=$(jq -s '.[0] + .[1].results' <(echo "$all") <(echo "$resp"))
((page++))
done
total=$(echo "$all" | jq -r 'length')
if [[ -z "$total" || "$total" == "0" ]]; then
echo "$LOG_PREFIX Docker Hub: no tags found"
return
fi
echo "$LOG_PREFIX Docker Hub: fetched $total tags total"
keep_tags=$(echo "$all" | jq -r --argjson n "${KEEP_LAST_N:-0}" '
(sort_by(.last_updated) | reverse) as $s
| ($s[0:$n] | map(.name)) | join(" ")
')
while IFS= read -r tag; do
tag_name=$(echo "$tag" | jq -r '.name')
last_updated=$(echo "$tag" | jq -r '.last_updated')
last_ts=$(date -d "$last_updated" +%s 2>/dev/null || echo 0)
if [[ -n "$keep_tags" && " $keep_tags " == *" $tag_name "* ]]; then
echo "$LOG_PREFIX keep (last_n): tag=$tag_name last_updated=$last_updated"
continue
fi
protected=false
if is_protected_tag "$tag_name"; then
protected=true
fi
if $protected; then
echo "$LOG_PREFIX keep (protected): tag=$tag_name last_updated=$last_updated"
continue
fi
if (( last_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): tag=$tag_name last_updated=$last_updated"
continue
fi
echo "$LOG_PREFIX candidate: tag=$tag_name last_updated=$last_updated"
bytes=$(echo "$tag" | jq -r '.images | map(.size) | add // 0' 2>/dev/null || echo 0)
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1))
TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + bytes))
if $dry_run; then
echo "$LOG_PREFIX DRY RUN: would delete Docker Hub tag=$tag_name (approx ${bytes} bytes)"
else
echo "$LOG_PREFIX deleting Docker Hub tag=$tag_name (approx ${bytes} bytes)"
curl -sS -X DELETE -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags/${tag_name}/" >/dev/null || true
TOTAL_DELETED=$((TOTAL_DELETED + 1))
TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + bytes))
fi
done < <(echo "$all" | jq -c 'sort_by(.last_updated) | .[]')
}
# Main
action_delete_dockerhub
echo "$LOG_PREFIX SUMMARY: total_candidates=${TOTAL_CANDIDATES} total_candidates_bytes=${TOTAL_CANDIDATES_BYTES} total_deleted=${TOTAL_DELETED} total_deleted_bytes=${TOTAL_DELETED_BYTES}"
echo "$LOG_PREFIX SUMMARY_HUMAN: candidates=${TOTAL_CANDIDATES} candidates_size=$(human_readable "${TOTAL_CANDIDATES_BYTES}") deleted=${TOTAL_DELETED} deleted_size=$(human_readable "${TOTAL_DELETED_BYTES}")"
: > prune-summary-dockerhub.env
echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary-dockerhub.env
echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary-dockerhub.env
echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary-dockerhub.env
echo "TOTAL_DELETED_BYTES=${TOTAL_DELETED_BYTES}" >> prune-summary-dockerhub.env
echo "$LOG_PREFIX done"

View File

@@ -1,10 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
echo "[prune] SCRIPT VERSION: GH_API_VARIANT"
# prune-container-images.sh
# Deletes old images from GHCR and Docker Hub according to retention and protection rules.
# prune-ghcr.sh
# Deletes old container images from GitHub Container Registry (GHCR)
# according to retention and protection rules.
REGISTRIES=${REGISTRIES:-ghcr}
OWNER=${OWNER:-${GITHUB_REPOSITORY_OWNER:-Wikid82}}
IMAGE_NAME=${IMAGE_NAME:-charon}
@@ -14,33 +13,29 @@ KEEP_LAST_N=${KEEP_LAST_N:-30}
DRY_RUN=${DRY_RUN:-false}
PROTECTED_REGEX=${PROTECTED_REGEX:-'["^v","^latest$","^main$","^develop$"]'}
# Extra knobs (optional)
PRUNE_UNTAGGED=${PRUNE_UNTAGGED:-true}
PRUNE_SBOM_TAGS=${PRUNE_SBOM_TAGS:-true}
LOG_PREFIX="[prune]"
LOG_PREFIX="[prune-ghcr]"
now_ts=$(date +%s)
cutoff_ts=$(date -d "$KEEP_DAYS days ago" +%s 2>/dev/null || date -d "-$KEEP_DAYS days" +%s)
# Normalize DRY_RUN to true/false reliably
dry_run=false
case "${DRY_RUN,,}" in
true|1|yes|y|on) dry_run=true ;;
*) dry_run=false ;;
esac
# Totals
TOTAL_CANDIDATES=0
TOTAL_CANDIDATES_BYTES=0
TOTAL_DELETED=0
TOTAL_DELETED_BYTES=0
echo "$LOG_PREFIX starting with REGISTRIES=$REGISTRIES OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run"
echo "$LOG_PREFIX starting with OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run"
echo "$LOG_PREFIX PROTECTED_REGEX=$PROTECTED_REGEX PRUNE_UNTAGGED=$PRUNE_UNTAGGED PRUNE_SBOM_TAGS=$PRUNE_SBOM_TAGS"
require() {
command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1"; exit 1; }
command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1" >&2; exit 1; }
}
require curl
require jq
@@ -57,8 +52,6 @@ is_protected_tag() {
return 1
}
# Some repos generate tons of tags like sha-xxxx, pr-123-xxxx, *.sbom.
# We treat SBOM-only tags as deletable (optional).
tag_is_sbom() {
local tag="$1"
[[ "$tag" == *.sbom ]]
@@ -80,9 +73,9 @@ human_readable() {
printf "%s %s" "${value}" "${unit[$i]}"
}
# --- GHCR ---
# All echo/log statements go to stderr so stdout remains pure JSON
ghcr_list_all_versions_json() {
local namespace_type="$1" # orgs or users
local namespace_type="$1"
local page=1
local per_page=100
local all='[]'
@@ -90,7 +83,6 @@ ghcr_list_all_versions_json() {
while :; do
local url="https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions?per_page=$per_page&page=$page"
# Use GitHubs recommended headers
local resp
resp=$(curl -sS \
-H "Authorization: Bearer $GITHUB_TOKEN" \
@@ -98,29 +90,26 @@ ghcr_list_all_versions_json() {
-H "X-GitHub-Api-Version: 2022-11-28" \
"$url" || true)
# ✅ NEW: ensure we got JSON
if ! echo "$resp" | jq -e . >/dev/null 2>&1; then
echo "$LOG_PREFIX GHCR returned non-JSON for url=$url"
echo "$LOG_PREFIX GHCR response (first 200 chars): $(echo "$resp" | head -c 200 | tr '\n' ' ')"
echo "$LOG_PREFIX GHCR returned non-JSON for url=$url" >&2
echo "$LOG_PREFIX GHCR response (first 200 chars): $(echo "$resp" | head -c 200 | tr '\n' ' ')" >&2
echo "[]"
return 0
fi
# Handle JSON error messages
if echo "$resp" | jq -e 'has("message")' >/dev/null 2>&1; then
local msg
msg=$(echo "$resp" | jq -r '.message')
if [[ "$msg" == "Not Found" ]]; then
echo "$LOG_PREFIX GHCR ${namespace_type} endpoint returned Not Found"
echo "$LOG_PREFIX GHCR ${namespace_type} endpoint returned Not Found" >&2
echo "[]"
return 0
fi
echo "$LOG_PREFIX GHCR API error: $msg"
# also print documentation_url if present (helpful)
echo "$LOG_PREFIX GHCR API error: $msg" >&2
doc=$(echo "$resp" | jq -r '.documentation_url // empty')
[[ -n "$doc" ]] && echo "$LOG_PREFIX GHCR docs: $doc"
[[ -n "$doc" ]] && echo "$LOG_PREFIX GHCR docs: $doc" >&2
echo "[]"
return 0
fi
@@ -146,7 +135,6 @@ action_delete_ghcr() {
return
fi
# Try orgs first, then users
local all
local namespace_type="orgs"
all=$(ghcr_list_all_versions_json "$namespace_type")
@@ -164,12 +152,6 @@ action_delete_ghcr() {
echo "$LOG_PREFIX GHCR: fetched $total versions total"
# Normalize a working list:
# - id
# - created_at
# - created_ts
# - tags array
# - tags_csv
local normalized
normalized=$(echo "$all" | jq -c '
map({
@@ -181,8 +163,6 @@ action_delete_ghcr() {
})
')
# Compute the globally newest KEEP_LAST_N ids to always keep
# (If KEEP_LAST_N is 0 or empty, keep none by this rule)
local keep_ids
keep_ids=$(echo "$normalized" | jq -r --argjson n "${KEEP_LAST_N:-0}" '
(sort_by(.created_ts) | reverse) as $s
@@ -193,21 +173,20 @@ action_delete_ghcr() {
echo "$LOG_PREFIX GHCR: keeping newest KEEP_LAST_N ids: $KEEP_LAST_N"
fi
# Iterate versions sorted oldest->newest so deletions are predictable
local ver protected all_sbom candidate_bytes
while IFS= read -r ver; do
local id created created_ts tags_csv
all_sbom=false
id=$(echo "$ver" | jq -r '.id')
created=$(echo "$ver" | jq -r '.created_at')
created_ts=$(echo "$ver" | jq -r '.created_ts')
tags_csv=$(echo "$ver" | jq -r '.tags_csv')
# KEEP_LAST_N rule (global)
if [[ -n "$keep_ids" && " $keep_ids " == *" $id "* ]]; then
echo "$LOG_PREFIX keep (last_n): id=$id tags=$tags_csv created=$created"
continue
fi
# Protected tags rule
protected=false
if [[ -n "$tags_csv" ]]; then
while IFS= read -r t; do
@@ -223,8 +202,6 @@ action_delete_ghcr() {
continue
fi
# Optional: treat SBOM-only versions/tags as deletable
# If every tag is *.sbom and PRUNE_SBOM_TAGS=true, we allow pruning regardless of “tag protected” rules.
if [[ "${PRUNE_SBOM_TAGS,,}" == "true" && -n "$tags_csv" ]]; then
all_sbom=true
while IFS= read -r t; do
@@ -234,46 +211,40 @@ action_delete_ghcr() {
break
fi
done < <(echo "$tags_csv" | tr ',' '\n')
if $all_sbom; then
# allow fallthrough; do not "keep" just because tags are recent
:
fi
fi
# Age rule
if (( created_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): id=$id tags=$tags_csv created=$created"
continue
fi
# Optional: prune untagged versions (common GHCR bloat)
if [[ "${PRUNE_UNTAGGED,,}" == "true" ]]; then
# tags_csv can be empty for untagged
if [[ -z "$tags_csv" ]]; then
echo "$LOG_PREFIX candidate (untagged): id=$id tags=<none> created=$created"
else
echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created"
fi
# If all tags are SBOM tags and PRUNE_SBOM_TAGS is enabled, skip the age check
if [[ "${all_sbom:-false}" == "true" ]]; then
echo "$LOG_PREFIX candidate (sbom-only): id=$id tags=$tags_csv created=$created"
else
# If not pruning untagged, skip them
if [[ -z "$tags_csv" ]]; then
echo "$LOG_PREFIX keep (untagged disabled): id=$id created=$created"
if (( created_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): id=$id tags=$tags_csv created=$created"
continue
fi
echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created"
if [[ "${PRUNE_UNTAGGED,,}" == "true" ]]; then
if [[ -z "$tags_csv" ]]; then
echo "$LOG_PREFIX candidate (untagged): id=$id tags=<none> created=$created"
else
echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created"
fi
else
if [[ -z "$tags_csv" ]]; then
echo "$LOG_PREFIX keep (untagged disabled): id=$id created=$created"
continue
fi
echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created"
fi
fi
# Candidate bookkeeping
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1))
# Best-effort size estimation: GHCR registry auth is messy; dont block prune on it.
candidate_bytes=0
if $dry_run; then
echo "$LOG_PREFIX DRY RUN: would delete GHCR version id=$id (approx ${candidate_bytes} bytes)"
else
echo "$LOG_PREFIX deleting GHCR version id=$id"
# Use GitHub API delete
curl -sS -X DELETE -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions/$id" >/dev/null || true
TOTAL_DELETED=$((TOTAL_DELETED + 1))
@@ -282,116 +253,16 @@ action_delete_ghcr() {
done < <(echo "$normalized" | jq -c 'sort_by(.created_ts) | .[]')
}
# --- Docker Hub ---
action_delete_dockerhub() {
echo "$LOG_PREFIX -> Docker Hub cleanup for ${DOCKERHUB_USERNAME:-<unset>}/$IMAGE_NAME (dry-run=$dry_run)"
# Main
action_delete_ghcr
if [[ -z "${DOCKERHUB_USERNAME:-}" || -z "${DOCKERHUB_TOKEN:-}" ]]; then
echo "$LOG_PREFIX Docker Hub credentials not set; skipping Docker Hub cleanup"
return
fi
hub_token=$(curl -sS -X POST -H "Content-Type: application/json" \
-d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
https://hub.docker.com/v2/users/login/ | jq -r '.token')
if [[ -z "$hub_token" || "$hub_token" == "null" ]]; then
echo "$LOG_PREFIX Failed to obtain Docker Hub token; aborting Docker Hub cleanup"
return
fi
# Fetch all pages first so KEEP_LAST_N can be global
page=1
page_size=100
all='[]'
while :; do
resp=$(curl -sS -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags?page_size=$page_size&page=$page")
results_count=$(echo "$resp" | jq -r '.results | length')
if [[ -z "$results_count" || "$results_count" == "0" ]]; then
break
fi
all=$(jq -s '.[0] + .[1].results' <(echo "$all") <(echo "$resp"))
((page++))
done
total=$(echo "$all" | jq -r 'length')
if [[ -z "$total" || "$total" == "0" ]]; then
echo "$LOG_PREFIX Docker Hub: no tags found"
return
fi
echo "$LOG_PREFIX Docker Hub: fetched $total tags total"
keep_tags=$(echo "$all" | jq -r --argjson n "${KEEP_LAST_N:-0}" '
(sort_by(.last_updated) | reverse) as $s
| ($s[0:$n] | map(.name)) | join(" ")
')
while IFS= read -r tag; do
tag_name=$(echo "$tag" | jq -r '.name')
last_updated=$(echo "$tag" | jq -r '.last_updated')
last_ts=$(date -d "$last_updated" +%s 2>/dev/null || 0)
if [[ -n "$keep_tags" && " $keep_tags " == *" $tag_name "* ]]; then
echo "$LOG_PREFIX keep (last_n): tag=$tag_name last_updated=$last_updated"
continue
fi
protected=false
if is_protected_tag "$tag_name"; then
protected=true
fi
if $protected; then
echo "$LOG_PREFIX keep (protected): tag=$tag_name last_updated=$last_updated"
continue
fi
if (( last_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): tag=$tag_name last_updated=$last_updated"
continue
fi
echo "$LOG_PREFIX candidate: tag=$tag_name last_updated=$last_updated"
bytes=$(echo "$tag" | jq -r '.images | map(.size) | add // 0' 2>/dev/null || echo 0)
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1))
TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + bytes))
if $dry_run; then
echo "$LOG_PREFIX DRY RUN: would delete Docker Hub tag=$tag_name (approx ${bytes} bytes)"
else
echo "$LOG_PREFIX deleting Docker Hub tag=$tag_name (approx ${bytes} bytes)"
curl -sS -X DELETE -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags/${tag_name}/" >/dev/null || true
TOTAL_DELETED=$((TOTAL_DELETED + 1))
TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + bytes))
fi
done < <(echo "$all" | jq -c 'sort_by(.last_updated) | .[]')
}
# Main: iterate requested registries
IFS=',' read -ra regs <<< "$REGISTRIES"
for r in "${regs[@]}"; do
case "$r" in
ghcr) action_delete_ghcr ;;
dockerhub) action_delete_dockerhub ;;
*) echo "$LOG_PREFIX unknown registry: $r" ;;
esac
done
# Summary
echo "$LOG_PREFIX SUMMARY: total_candidates=${TOTAL_CANDIDATES} total_candidates_bytes=${TOTAL_CANDIDATES_BYTES} total_deleted=${TOTAL_DELETED} total_deleted_bytes=${TOTAL_DELETED_BYTES}"
echo "$LOG_PREFIX SUMMARY_HUMAN: candidates=${TOTAL_CANDIDATES} candidates_size=$(human_readable "${TOTAL_CANDIDATES_BYTES}") deleted=${TOTAL_DELETED} deleted_size=$(human_readable "${TOTAL_DELETED_BYTES}")"
# Export summary for workflow parsing
: > prune-summary.env
echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary.env
echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary.env
echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary.env
echo "TOTAL_DELETED_BYTES=${TOTAL_DELETED_BYTES}" >> prune-summary.env
: > prune-summary-ghcr.env
echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary-ghcr.env
echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary-ghcr.env
echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary-ghcr.env
echo "TOTAL_DELETED_BYTES=${TOTAL_DELETED_BYTES}" >> prune-summary-ghcr.env
echo "$LOG_PREFIX done"