fix: update protected regex pattern for container pruning scripts and enhance logging details

This commit is contained in:
GitHub Actions
2026-02-25 17:35:47 +00:00
parent ad31bacc1c
commit 1af04987e0
2 changed files with 325 additions and 205 deletions

View File

@@ -36,7 +36,7 @@ jobs:
KEEP_DAYS: ${{ github.event.inputs.keep_days || '30' }}
KEEP_LAST_N: ${{ github.event.inputs.keep_last_n || '30' }}
DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }}
PROTECTED_REGEX: '["^v","^latest$","^main$","^develop$"]'
PROTECTED_REGEX: '["^v?[0-9]+\\.[0-9]+\\.[0-9]+$","^latest$","^main$","^develop$"]'
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

View File

@@ -3,236 +3,70 @@ set -euo pipefail
# prune-container-images.sh
# Deletes old images from GHCR and Docker Hub according to retention and protection rules.
# Defaults: dry-run (no deletes). Accepts env vars for configuration.
# Required env vars (workflow will set these):
# - REGISTRIES (comma-separated: ghcr,dockerhub)
# - OWNER (github repository owner)
# - IMAGE_NAME (charon)
# - KEEP_DAYS (default 30)
# - PROTECTED_REGEX (JSON array of regex strings)
# - DRY_RUN (true/false)
# - KEEP_LAST_N (optional, default 30)
# - DOCKERHUB_USERNAME/DOCKERHUB_TOKEN (for Docker Hub)
# - GITHUB_TOKEN (for GHCR API)
REGISTRIES=${REGISTRIES:-ghcr}
OWNER=${OWNER:-${GITHUB_REPOSITORY_OWNER:-Wikid82}}
IMAGE_NAME=${IMAGE_NAME:-charon}
KEEP_DAYS=${KEEP_DAYS:-30}
KEEP_LAST_N=${KEEP_LAST_N:-30}
DRY_RUN=${DRY_RUN:-false}
PROTECTED_REGEX=${PROTECTED_REGEX:-'["^v","^latest$","^main$","^develop$"]'}
# Extra knobs (optional)
PRUNE_UNTAGGED=${PRUNE_UNTAGGED:-true}
PRUNE_SBOM_TAGS=${PRUNE_SBOM_TAGS:-true}
LOG_PREFIX="[prune]"
now_ts=$(date +%s)
cutoff_ts=$(date -d "$KEEP_DAYS days ago" +%s 2>/dev/null || date -d "-$KEEP_DAYS days" +%s)
# Normalize DRY_RUN to true/false reliably
dry_run=false
case "${DRY_RUN,,}" in
true|1|yes|y|on) dry_run=true ;;
*) dry_run=false ;;
esac
# Totals
TOTAL_CANDIDATES=0
TOTAL_CANDIDATES_BYTES=0
TOTAL_DELETED=0
TOTAL_DELETED_BYTES=0
echo "$LOG_PREFIX starting with REGISTRIES=$REGISTRIES KEEP_DAYS=$KEEP_DAYS DRY_RUN=$DRY_RUN"
echo "$LOG_PREFIX starting with REGISTRIES=$REGISTRIES OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run"
echo "$LOG_PREFIX PROTECTED_REGEX=$PROTECTED_REGEX PRUNE_UNTAGGED=$PRUNE_UNTAGGED PRUNE_SBOM_TAGS=$PRUNE_SBOM_TAGS"
action_delete_ghcr() {
echo "$LOG_PREFIX -> GHCR cleanup for $OWNER/$IMAGE_NAME (dry-run=$DRY_RUN)"
require() {
command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1"; exit 1; }
}
require curl
require jq
page=1
per_page=100
namespace_type="orgs"
while :; do
url="https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions?per_page=$per_page&page=$page"
resp=$(curl -sS -H "Authorization: Bearer $GITHUB_TOKEN" "$url")
# Handle API errors gracefully and try users/organizations as needed
if echo "$resp" | jq -e '.message' >/dev/null 2>&1; then
msg=$(echo "$resp" | jq -r '.message')
if [[ "$msg" == "Not Found" && "$namespace_type" == "orgs" ]]; then
echo "$LOG_PREFIX GHCR org lookup returned Not Found; switching to users endpoint"
namespace_type="users"
page=1
continue
fi
if echo "$msg" | grep -q "read:packages"; then
echo "$LOG_PREFIX GHCR API error: $msg. Ensure token has 'read:packages' scope or use Actions GITHUB_TOKEN with package permissions."
return
fi
is_protected_tag() {
local tag="$1"
local rgx
while IFS= read -r rgx; do
[[ -z "$rgx" ]] && continue
if [[ "$tag" =~ $rgx ]]; then
return 0
fi
ids=$(echo "$resp" | jq -r '.[].id' 2>/dev/null)
if [[ -z "$ids" ]]; then
break
fi
# For each version, capture id, created_at, tags
echo "$resp" | jq -c '.[]' | while read -r ver; do
id=$(echo "$ver" | jq -r '.id')
created=$(echo "$ver" | jq -r '.created_at')
tags=$(echo "$ver" | jq -r '.metadata.container.tags // [] | join(",")')
created_ts=$(date -d "$created" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$created" +%s 2>/dev/null || 0)
# skip protected tags
protected=false
for rgx in $(echo "$PROTECTED_REGEX" | jq -r '.[]'); do
for tag in $(echo "$tags" | sed 's/,/ /g'); do
if [[ "$tag" =~ $rgx ]]; then
protected=true
fi
done
done
if $protected; then
echo "$LOG_PREFIX keep (protected): id=$id tags=$tags created=$created"
continue
fi
# skip if not older than cutoff
if (( created_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): id=$id tags=$tags created=$created"
continue
fi
echo "$LOG_PREFIX candidate: id=$id tags=$tags created=$created"
# Try to estimate size for GHCR by fetching manifest (best-effort)
candidate_bytes=0
for tag in $(echo "$tags" | sed 's/,/ /g'); do
if [[ -n "$tag" && "$tag" != "null" ]]; then
manifest_url="https://ghcr.io/v2/${OWNER}/${IMAGE_NAME}/manifests/${tag}"
manifest=$(curl -sS -H "Accept: application/vnd.docker.distribution.manifest.v2+json" -H "Authorization: Bearer $GITHUB_TOKEN" "$manifest_url" || true)
if [[ -n "$manifest" ]]; then
bytes=$(echo "$manifest" | jq -r '.layers // [] | map(.size) | add // 0')
if [[ "$bytes" != "null" ]] && (( bytes > 0 )) 2>/dev/null; then
candidate_bytes=$((candidate_bytes + bytes))
fi
fi
fi
done
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES+1))
TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + candidate_bytes))
if [[ "$DRY_RUN" == "true" ]]; then
echo "$LOG_PREFIX DRY RUN: would delete GHCR version id=$id (approx ${candidate_bytes} bytes)"
else
echo "$LOG_PREFIX deleting GHCR version id=$id (approx ${candidate_bytes} bytes)"
curl -sS -X DELETE -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions/$id"
TOTAL_DELETED=$((TOTAL_DELETED+1))
TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + candidate_bytes))
fi
done
((page++))
done
done < <(echo "$PROTECTED_REGEX" | jq -r '.[]')
return 1
}
action_delete_dockerhub() {
echo "$LOG_PREFIX -> Docker Hub cleanup for $DOCKERHUB_USERNAME/$IMAGE_NAME (dry-run=$DRY_RUN)"
if [[ -z "${DOCKERHUB_USERNAME:-}" || -z "${DOCKERHUB_TOKEN:-}" ]]; then
echo "$LOG_PREFIX Docker Hub credentials not set; skipping Docker Hub cleanup"
return
fi
# Login to Docker Hub to get token (v2)
hub_token=$(curl -sS -X POST -H "Content-Type: application/json" \
-d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
https://hub.docker.com/v2/users/login/ | jq -r '.token')
if [[ -z "$hub_token" || "$hub_token" == "null" ]]; then
echo "$LOG_PREFIX Failed to obtain Docker Hub token; aborting Docker Hub cleanup"
return
fi
page=1
page_size=100
while :; do
resp=$(curl -sS -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags?page_size=$page_size&page=$page")
results_count=$(echo "$resp" | jq -r '.results | length')
if [[ "$results_count" == "0" || -z "$results_count" ]]; then
break
fi
echo "$resp" | jq -c '.results[]' | while read -r tag; do
tag_name=$(echo "$tag" | jq -r '.name')
last_updated=$(echo "$tag" | jq -r '.last_updated')
last_ts=$(date -d "$last_updated" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%S%z" "$last_updated" +%s 2>/dev/null || 0)
# Check protected patterns
protected=false
for rgx in $(echo "$PROTECTED_REGEX" | jq -r '.[]'); do
if [[ "$tag_name" =~ $rgx ]]; then
protected=true
break
fi
done
if $protected; then
echo "$LOG_PREFIX keep (protected): tag=$tag_name last_updated=$last_updated"
continue
fi
if (( last_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): tag=$tag_name last_updated=$last_updated"
continue
fi
echo "$LOG_PREFIX candidate: tag=$tag_name last_updated=$last_updated"
# Estimate size from Docker Hub tag JSON (images[].size or full_size)
bytes=0
bytes=$(echo "$tag" | jq -r '.images | map(.size) | add // empty') || true
if [[ -z "$bytes" || "$bytes" == "null" ]]; then
bytes=$(echo "$tag" | jq -r '.full_size // empty' 2>/dev/null || true)
fi
bytes=${bytes:-0}
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES+1))
TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + bytes))
if [[ "$DRY_RUN" == "true" ]]; then
echo "$LOG_PREFIX DRY RUN: would delete Docker Hub tag=$tag_name (approx ${bytes} bytes)"
else
echo "$LOG_PREFIX deleting Docker Hub tag=$tag_name (approx ${bytes} bytes)"
curl -sS -X DELETE -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags/${tag_name}/"
TOTAL_DELETED=$((TOTAL_DELETED+1))
TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + bytes))
fi
done
((page++))
done
# Some repos generate tons of tags like sha-xxxx, pr-123-xxxx, *.sbom.
# We treat SBOM-only tags as deletable (optional).
tag_is_sbom() {
local tag="$1"
[[ "$tag" == *.sbom ]]
}
# Main: iterate requested registries
IFS=',' read -ra regs <<< "$REGISTRIES"
for r in "${regs[@]}"; do
case "$r" in
ghcr)
action_delete_ghcr
;;
dockerhub)
action_delete_dockerhub
;;
*)
echo "$LOG_PREFIX unknown registry: $r"
;;
esac
done
# Summary
human_readable() {
local bytes=$1
if (( bytes == 0 )); then
local bytes=${1:-0}
if [[ -z "$bytes" ]] || (( bytes <= 0 )); then
echo "0 B"
return
fi
@@ -246,10 +80,296 @@ human_readable() {
printf "%s %s" "${value}" "${unit[$i]}"
}
# --- GHCR ---
ghcr_list_all_versions_json() {
local namespace_type="$1" # orgs or users
local page=1
local per_page=100
local all='[]'
while :; do
local url="https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions?per_page=$per_page&page=$page"
local resp
resp=$(curl -sS -H "Authorization: Bearer $GITHUB_TOKEN" "$url" || true)
# Error handling
if echo "$resp" | jq -e '.message' >/dev/null 2>&1; then
local msg
msg=$(echo "$resp" | jq -r '.message')
if [[ "$msg" == "Not Found" ]]; then
echo "$LOG_PREFIX GHCR ${namespace_type} endpoint returned Not Found"
echo "[]"
return 0
fi
echo "$LOG_PREFIX GHCR API error: $msg"
echo "[]"
return 0
fi
local count
count=$(echo "$resp" | jq -r 'length')
if [[ -z "$count" || "$count" == "0" ]]; then
break
fi
all=$(jq -s 'add' <(echo "$all") <(echo "$resp"))
((page++))
done
echo "$all"
}
action_delete_ghcr() {
echo "$LOG_PREFIX -> GHCR cleanup for $OWNER/$IMAGE_NAME (dry-run=$dry_run)"
if [[ -z "${GITHUB_TOKEN:-}" ]]; then
echo "$LOG_PREFIX GITHUB_TOKEN not set; skipping GHCR cleanup"
return
fi
# Try orgs first, then users
local all
local namespace_type="orgs"
all=$(ghcr_list_all_versions_json "$namespace_type")
if [[ "$(echo "$all" | jq -r 'length')" == "0" ]]; then
namespace_type="users"
all=$(ghcr_list_all_versions_json "$namespace_type")
fi
local total
total=$(echo "$all" | jq -r 'length')
if [[ -z "$total" || "$total" == "0" ]]; then
echo "$LOG_PREFIX GHCR: no versions found (or insufficient access)."
return
fi
echo "$LOG_PREFIX GHCR: fetched $total versions total"
# Normalize a working list:
# - id
# - created_at
# - created_ts
# - tags array
# - tags_csv
local normalized
normalized=$(echo "$all" | jq -c '
map({
id: .id,
created_at: .created_at,
tags: (.metadata.container.tags // []),
tags_csv: ((.metadata.container.tags // []) | join(",")),
created_ts: (.created_at | fromdateiso8601)
})
')
# Compute the globally newest KEEP_LAST_N ids to always keep
# (If KEEP_LAST_N is 0 or empty, keep none by this rule)
local keep_ids
keep_ids=$(echo "$normalized" | jq -r --argjson n "${KEEP_LAST_N:-0}" '
(sort_by(.created_ts) | reverse) as $s
| ($s[0:$n] | map(.id)) | join(" ")
')
if [[ -n "$keep_ids" ]]; then
echo "$LOG_PREFIX GHCR: keeping newest KEEP_LAST_N ids: $KEEP_LAST_N"
fi
# Iterate versions sorted oldest->newest so deletions are predictable
while IFS= read -r ver; do
local id created created_ts tags_csv
id=$(echo "$ver" | jq -r '.id')
created=$(echo "$ver" | jq -r '.created_at')
created_ts=$(echo "$ver" | jq -r '.created_ts')
tags_csv=$(echo "$ver" | jq -r '.tags_csv')
# KEEP_LAST_N rule (global)
if [[ -n "$keep_ids" && " $keep_ids " == *" $id "* ]]; then
echo "$LOG_PREFIX keep (last_n): id=$id tags=$tags_csv created=$created"
continue
fi
# Protected tags rule
protected=false
if [[ -n "$tags_csv" ]]; then
while IFS= read -r t; do
[[ -z "$t" ]] && continue
if is_protected_tag "$t"; then
protected=true
break
fi
done < <(echo "$tags_csv" | tr ',' '\n')
fi
if $protected; then
echo "$LOG_PREFIX keep (protected): id=$id tags=$tags_csv created=$created"
continue
fi
# Optional: treat SBOM-only versions/tags as deletable
# If every tag is *.sbom and PRUNE_SBOM_TAGS=true, we allow pruning regardless of “tag protected” rules.
if [[ "${PRUNE_SBOM_TAGS,,}" == "true" && -n "$tags_csv" ]]; then
all_sbom=true
while IFS= read -r t; do
[[ -z "$t" ]] && continue
if ! tag_is_sbom "$t"; then
all_sbom=false
break
fi
done < <(echo "$tags_csv" | tr ',' '\n')
if $all_sbom; then
# allow fallthrough; do not "keep" just because tags are recent
:
fi
fi
# Age rule
if (( created_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): id=$id tags=$tags_csv created=$created"
continue
fi
# Optional: prune untagged versions (common GHCR bloat)
if [[ "${PRUNE_UNTAGGED,,}" == "true" ]]; then
# tags_csv can be empty for untagged
if [[ -z "$tags_csv" ]]; then
echo "$LOG_PREFIX candidate (untagged): id=$id tags=<none> created=$created"
else
echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created"
fi
else
# If not pruning untagged, skip them
if [[ -z "$tags_csv" ]]; then
echo "$LOG_PREFIX keep (untagged disabled): id=$id created=$created"
continue
fi
echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created"
fi
# Candidate bookkeeping
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1))
# Best-effort size estimation: GHCR registry auth is messy; dont block prune on it.
candidate_bytes=0
if $dry_run; then
echo "$LOG_PREFIX DRY RUN: would delete GHCR version id=$id (approx ${candidate_bytes} bytes)"
else
echo "$LOG_PREFIX deleting GHCR version id=$id"
# Use GitHub API delete
curl -sS -X DELETE -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions/$id" >/dev/null || true
TOTAL_DELETED=$((TOTAL_DELETED + 1))
fi
done < <(echo "$normalized" | jq -c 'sort_by(.created_ts) | .[]')
}
# --- Docker Hub ---
action_delete_dockerhub() {
echo "$LOG_PREFIX -> Docker Hub cleanup for ${DOCKERHUB_USERNAME:-<unset>}/$IMAGE_NAME (dry-run=$dry_run)"
if [[ -z "${DOCKERHUB_USERNAME:-}" || -z "${DOCKERHUB_TOKEN:-}" ]]; then
echo "$LOG_PREFIX Docker Hub credentials not set; skipping Docker Hub cleanup"
return
fi
hub_token=$(curl -sS -X POST -H "Content-Type: application/json" \
-d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
https://hub.docker.com/v2/users/login/ | jq -r '.token')
if [[ -z "$hub_token" || "$hub_token" == "null" ]]; then
echo "$LOG_PREFIX Failed to obtain Docker Hub token; aborting Docker Hub cleanup"
return
fi
# Fetch all pages first so KEEP_LAST_N can be global
page=1
page_size=100
all='[]'
while :; do
resp=$(curl -sS -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags?page_size=$page_size&page=$page")
results_count=$(echo "$resp" | jq -r '.results | length')
if [[ -z "$results_count" || "$results_count" == "0" ]]; then
break
fi
all=$(jq -s '.[0] + .[1].results' <(echo "$all") <(echo "$resp"))
((page++))
done
total=$(echo "$all" | jq -r 'length')
if [[ -z "$total" || "$total" == "0" ]]; then
echo "$LOG_PREFIX Docker Hub: no tags found"
return
fi
echo "$LOG_PREFIX Docker Hub: fetched $total tags total"
keep_tags=$(echo "$all" | jq -r --argjson n "${KEEP_LAST_N:-0}" '
(sort_by(.last_updated) | reverse) as $s
| ($s[0:$n] | map(.name)) | join(" ")
')
while IFS= read -r tag; do
tag_name=$(echo "$tag" | jq -r '.name')
last_updated=$(echo "$tag" | jq -r '.last_updated')
last_ts=$(date -d "$last_updated" +%s 2>/dev/null || 0)
if [[ -n "$keep_tags" && " $keep_tags " == *" $tag_name "* ]]; then
echo "$LOG_PREFIX keep (last_n): tag=$tag_name last_updated=$last_updated"
continue
fi
protected=false
if is_protected_tag "$tag_name"; then
protected=true
fi
if $protected; then
echo "$LOG_PREFIX keep (protected): tag=$tag_name last_updated=$last_updated"
continue
fi
if (( last_ts >= cutoff_ts )); then
echo "$LOG_PREFIX keep (recent): tag=$tag_name last_updated=$last_updated"
continue
fi
echo "$LOG_PREFIX candidate: tag=$tag_name last_updated=$last_updated"
bytes=$(echo "$tag" | jq -r '.images | map(.size) | add // 0' 2>/dev/null || echo 0)
TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1))
TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + bytes))
if $dry_run; then
echo "$LOG_PREFIX DRY RUN: would delete Docker Hub tag=$tag_name (approx ${bytes} bytes)"
else
echo "$LOG_PREFIX deleting Docker Hub tag=$tag_name (approx ${bytes} bytes)"
curl -sS -X DELETE -H "Authorization: JWT $hub_token" \
"https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags/${tag_name}/" >/dev/null || true
TOTAL_DELETED=$((TOTAL_DELETED + 1))
TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + bytes))
fi
done < <(echo "$all" | jq -c 'sort_by(.last_updated) | .[]')
}
# Main: iterate requested registries
IFS=',' read -ra regs <<< "$REGISTRIES"
for r in "${regs[@]}"; do
case "$r" in
ghcr) action_delete_ghcr ;;
dockerhub) action_delete_dockerhub ;;
*) echo "$LOG_PREFIX unknown registry: $r" ;;
esac
done
# Summary
echo "$LOG_PREFIX SUMMARY: total_candidates=${TOTAL_CANDIDATES} total_candidates_bytes=${TOTAL_CANDIDATES_BYTES} total_deleted=${TOTAL_DELETED} total_deleted_bytes=${TOTAL_DELETED_BYTES}"
echo "$LOG_PREFIX SUMMARY_HUMAN: candidates=${TOTAL_CANDIDATES} candidates_size=$(human_readable ${TOTAL_CANDIDATES_BYTES}) deleted=${TOTAL_DELETED} deleted_size=$(human_readable ${TOTAL_DELETED_BYTES})"
echo "$LOG_PREFIX SUMMARY_HUMAN: candidates=${TOTAL_CANDIDATES} candidates_size=$(human_readable "${TOTAL_CANDIDATES_BYTES}") deleted=${TOTAL_DELETED} deleted_size=$(human_readable "${TOTAL_DELETED_BYTES}")"
# Export summary for workflow parsing
: > prune-summary.env
echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary.env
echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary.env
echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary.env