From ac720f95df26cdd2d2d6ace5b0bcbee222a7780e Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 26 Feb 2026 03:30:02 +0000 Subject: [PATCH] fix: implement GHCR and Docker Hub prune scripts with summary reporting --- .github/workflows/container-prune.yml | 192 ++++++++++++---- .../WORKFLOW_REVIEW_2026-01-26.md | 3 +- scripts/prune-dockerhub.sh | 174 +++++++++++++++ ...rune-container-images.sh => prune-ghcr.sh} | 211 ++++-------------- 4 files changed, 367 insertions(+), 213 deletions(-) create mode 100755 scripts/prune-dockerhub.sh rename scripts/{prune-container-images.sh => prune-ghcr.sh} (50%) diff --git a/.github/workflows/container-prune.yml b/.github/workflows/container-prune.yml index 861774da..64fa4a28 100644 --- a/.github/workflows/container-prune.yml +++ b/.github/workflows/container-prune.yml @@ -6,10 +6,6 @@ on: - cron: '0 3 * * 0' # Weekly: Sundays at 03:00 UTC workflow_dispatch: inputs: - registries: - description: 'Comma-separated registries to prune (ghcr,dockerhub)' - required: false - default: 'ghcr,dockerhub' keep_days: description: 'Number of days to retain images (unprotected)' required: false @@ -28,47 +24,38 @@ permissions: contents: read jobs: - prune: + prune-ghcr: runs-on: ubuntu-latest env: OWNER: ${{ github.repository_owner }} IMAGE_NAME: charon - REGISTRIES: ${{ github.event.inputs.registries || 'ghcr,dockerhub' }} KEEP_DAYS: ${{ github.event.inputs.keep_days || '30' }} KEEP_LAST_N: ${{ github.event.inputs.keep_last_n || '30' }} - DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }} + DRY_RUN: ${{ github.event_name == 'pull_request' && 'true' || github.event.inputs.dry_run || 'false' }} PROTECTED_REGEX: '["^v?[0-9]+\\.[0-9]+\\.[0-9]+$","^latest$","^main$","^develop$"]' + PRUNE_UNTAGGED: 'true' + PRUNE_SBOM_TAGS: 'true' steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: Install tools run: | - sudo apt-get update && sudo apt-get install -y jq curl gh + sudo apt-get update && sudo apt-get install -y jq curl - - name: Show prune script being executed - run: | - echo "===== SCRIPT PATH =====" - pwd - ls -la scripts - echo "===== FIRST 20 LINES =====" - head -n 20 scripts/prune-container-images.sh - - - name: Run container prune + - name: Run GHCR prune env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} run: | - chmod +x scripts/prune-container-images.sh - ./scripts/prune-container-images.sh 2>&1 | tee prune-${{ github.run_id }}.log + chmod +x scripts/prune-ghcr.sh + ./scripts/prune-ghcr.sh 2>&1 | tee prune-ghcr-${{ github.run_id }}.log - - name: Summarize prune results (space reclaimed) - if: ${{ always() }} + - name: Summarize GHCR results + if: always() run: | set -euo pipefail - SUMMARY_FILE=prune-summary.env - LOG_FILE=prune-${{ github.run_id }}.log + SUMMARY_FILE=prune-summary-ghcr.env + LOG_FILE=prune-ghcr-${{ github.run_id }}.log human() { local bytes=${1:-0} @@ -76,7 +63,7 @@ jobs: echo "0 B" return fi - awk -v b="$bytes" 'function human(x){ split("B KiB MiB GiB TiB",u," "); i=0; while(x>1024){x/=1024;i++} printf "%0.2f %s", x, u[i+1]} END{human(b)}' + awk -v b="$bytes" 'BEGIN { split("B KiB MiB GiB TiB",u," "); i=0; x=b; while(x>1024){x/=1024;i++} printf "%0.2f %s", x, u[i+1] }' } if [ -f "$SUMMARY_FILE" ]; then @@ -86,34 +73,155 @@ jobs: TOTAL_DELETED_BYTES=$(grep -E '^TOTAL_DELETED_BYTES=' "$SUMMARY_FILE" | cut -d= -f2 || echo 0) { - echo "## Container prune summary" + echo "## GHCR prune summary" echo "- candidates: ${TOTAL_CANDIDATES} (≈ $(human "${TOTAL_CANDIDATES_BYTES}"))" echo "- deleted: ${TOTAL_DELETED} (≈ $(human "${TOTAL_DELETED_BYTES}"))" } >> "$GITHUB_STEP_SUMMARY" - - printf 'PRUNE_SUMMARY: candidates=%s candidates_bytes=%s deleted=%s deleted_bytes=%s\n' \ - "${TOTAL_CANDIDATES}" "${TOTAL_CANDIDATES_BYTES}" "${TOTAL_DELETED}" "${TOTAL_DELETED_BYTES}" - echo "Deleted approximately: $(human "${TOTAL_DELETED_BYTES}")" - echo "space_saved=$(human "${TOTAL_DELETED_BYTES}")" >> "$GITHUB_OUTPUT" else deleted_bytes=$(grep -oE '\( *approx +[0-9]+ bytes\)' "$LOG_FILE" | sed -E 's/.*approx +([0-9]+) bytes.*/\1/' | awk '{s+=$1} END {print s+0}' || true) deleted_count=$(grep -cE 'deleting |DRY RUN: would delete' "$LOG_FILE" || true) { - echo "## Container prune summary" + echo "## GHCR prune summary" echo "- deleted (approx): ${deleted_count} (≈ $(human "${deleted_bytes}"))" } >> "$GITHUB_STEP_SUMMARY" - - printf 'PRUNE_SUMMARY: deleted_approx=%s deleted_bytes=%s\n' "${deleted_count}" "${deleted_bytes}" - echo "Deleted approximately: $(human "${deleted_bytes}")" - echo "space_saved=$(human "${deleted_bytes}")" >> "$GITHUB_OUTPUT" fi - - name: Upload prune artifacts - if: ${{ always() }} + - name: Upload GHCR prune artifacts + if: always() uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 with: - name: prune-log-${{ github.run_id }} + name: prune-ghcr-log-${{ github.run_id }} path: | - prune-${{ github.run_id }}.log - prune-summary.env + prune-ghcr-${{ github.run_id }}.log + prune-summary-ghcr.env + + prune-dockerhub: + runs-on: ubuntu-latest + env: + OWNER: ${{ github.repository_owner }} + IMAGE_NAME: charon + KEEP_DAYS: ${{ github.event.inputs.keep_days || '30' }} + KEEP_LAST_N: ${{ github.event.inputs.keep_last_n || '30' }} + DRY_RUN: ${{ github.event_name == 'pull_request' && 'true' || github.event.inputs.dry_run || 'false' }} + PROTECTED_REGEX: '["^v?[0-9]+\\.[0-9]+\\.[0-9]+$","^latest$","^main$","^develop$"]' + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Install tools + run: | + sudo apt-get update && sudo apt-get install -y jq curl + + - name: Run Docker Hub prune + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + run: | + chmod +x scripts/prune-dockerhub.sh + ./scripts/prune-dockerhub.sh 2>&1 | tee prune-dockerhub-${{ github.run_id }}.log + + - name: Summarize Docker Hub results + if: always() + run: | + set -euo pipefail + SUMMARY_FILE=prune-summary-dockerhub.env + LOG_FILE=prune-dockerhub-${{ github.run_id }}.log + + human() { + local bytes=${1:-0} + if [ -z "$bytes" ] || [ "$bytes" -eq 0 ]; then + echo "0 B" + return + fi + awk -v b="$bytes" 'BEGIN { split("B KiB MiB GiB TiB",u," "); i=0; x=b; while(x>1024){x/=1024;i++} printf "%0.2f %s", x, u[i+1] }' + } + + if [ -f "$SUMMARY_FILE" ]; then + TOTAL_CANDIDATES=$(grep -E '^TOTAL_CANDIDATES=' "$SUMMARY_FILE" | cut -d= -f2 || echo 0) + TOTAL_CANDIDATES_BYTES=$(grep -E '^TOTAL_CANDIDATES_BYTES=' "$SUMMARY_FILE" | cut -d= -f2 || echo 0) + TOTAL_DELETED=$(grep -E '^TOTAL_DELETED=' "$SUMMARY_FILE" | cut -d= -f2 || echo 0) + TOTAL_DELETED_BYTES=$(grep -E '^TOTAL_DELETED_BYTES=' "$SUMMARY_FILE" | cut -d= -f2 || echo 0) + + { + echo "## Docker Hub prune summary" + echo "- candidates: ${TOTAL_CANDIDATES} (≈ $(human "${TOTAL_CANDIDATES_BYTES}"))" + echo "- deleted: ${TOTAL_DELETED} (≈ $(human "${TOTAL_DELETED_BYTES}"))" + } >> "$GITHUB_STEP_SUMMARY" + else + deleted_bytes=$(grep -oE '\( *approx +[0-9]+ bytes\)' "$LOG_FILE" | sed -E 's/.*approx +([0-9]+) bytes.*/\1/' | awk '{s+=$1} END {print s+0}' || true) + deleted_count=$(grep -cE 'deleting |DRY RUN: would delete' "$LOG_FILE" || true) + + { + echo "## Docker Hub prune summary" + echo "- deleted (approx): ${deleted_count} (≈ $(human "${deleted_bytes}"))" + } >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Upload Docker Hub prune artifacts + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: prune-dockerhub-log-${{ github.run_id }} + path: | + prune-dockerhub-${{ github.run_id }}.log + prune-summary-dockerhub.env + + summarize: + runs-on: ubuntu-latest + needs: [prune-ghcr, prune-dockerhub] + if: always() + steps: + - name: Download all artifacts + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + pattern: prune-*-log-${{ github.run_id }} + merge-multiple: true + + - name: Combined summary + run: | + set -euo pipefail + + human() { + local bytes=${1:-0} + if [ -z "$bytes" ] || [ "$bytes" -eq 0 ]; then + echo "0 B" + return + fi + awk -v b="$bytes" 'BEGIN { split("B KiB MiB GiB TiB",u," "); i=0; x=b; while(x>1024){x/=1024;i++} printf "%0.2f %s", x, u[i+1] }' + } + + GHCR_CANDIDATES=0 GHCR_CANDIDATES_BYTES=0 GHCR_DELETED=0 GHCR_DELETED_BYTES=0 + if [ -f prune-summary-ghcr.env ]; then + GHCR_CANDIDATES=$(grep -E '^TOTAL_CANDIDATES=' prune-summary-ghcr.env | cut -d= -f2 || echo 0) + GHCR_CANDIDATES_BYTES=$(grep -E '^TOTAL_CANDIDATES_BYTES=' prune-summary-ghcr.env | cut -d= -f2 || echo 0) + GHCR_DELETED=$(grep -E '^TOTAL_DELETED=' prune-summary-ghcr.env | cut -d= -f2 || echo 0) + GHCR_DELETED_BYTES=$(grep -E '^TOTAL_DELETED_BYTES=' prune-summary-ghcr.env | cut -d= -f2 || echo 0) + fi + + HUB_CANDIDATES=0 HUB_CANDIDATES_BYTES=0 HUB_DELETED=0 HUB_DELETED_BYTES=0 + if [ -f prune-summary-dockerhub.env ]; then + HUB_CANDIDATES=$(grep -E '^TOTAL_CANDIDATES=' prune-summary-dockerhub.env | cut -d= -f2 || echo 0) + HUB_CANDIDATES_BYTES=$(grep -E '^TOTAL_CANDIDATES_BYTES=' prune-summary-dockerhub.env | cut -d= -f2 || echo 0) + HUB_DELETED=$(grep -E '^TOTAL_DELETED=' prune-summary-dockerhub.env | cut -d= -f2 || echo 0) + HUB_DELETED_BYTES=$(grep -E '^TOTAL_DELETED_BYTES=' prune-summary-dockerhub.env | cut -d= -f2 || echo 0) + fi + + TOTAL_CANDIDATES=$((GHCR_CANDIDATES + HUB_CANDIDATES)) + TOTAL_CANDIDATES_BYTES=$((GHCR_CANDIDATES_BYTES + HUB_CANDIDATES_BYTES)) + TOTAL_DELETED=$((GHCR_DELETED + HUB_DELETED)) + TOTAL_DELETED_BYTES=$((GHCR_DELETED_BYTES + HUB_DELETED_BYTES)) + + { + echo "## Combined container prune summary" + echo "" + echo "| Registry | Candidates | Deleted | Space Reclaimed |" + echo "|----------|------------|---------|-----------------|" + echo "| GHCR | ${GHCR_CANDIDATES} | ${GHCR_DELETED} | $(human "${GHCR_DELETED_BYTES}") |" + echo "| Docker Hub | ${HUB_CANDIDATES} | ${HUB_DELETED} | $(human "${HUB_DELETED_BYTES}") |" + echo "| **Total** | **${TOTAL_CANDIDATES}** | **${TOTAL_DELETED}** | **$(human "${TOTAL_DELETED_BYTES}")** |" + } >> "$GITHUB_STEP_SUMMARY" + + printf 'PRUNE_SUMMARY: candidates=%s candidates_bytes=%s deleted=%s deleted_bytes=%s\n' \ + "${TOTAL_CANDIDATES}" "${TOTAL_CANDIDATES_BYTES}" "${TOTAL_DELETED}" "${TOTAL_DELETED_BYTES}" + echo "Total space reclaimed: $(human "${TOTAL_DELETED_BYTES}")" diff --git a/docs/implementation/WORKFLOW_REVIEW_2026-01-26.md b/docs/implementation/WORKFLOW_REVIEW_2026-01-26.md index c82ca778..e9099914 100644 --- a/docs/implementation/WORKFLOW_REVIEW_2026-01-26.md +++ b/docs/implementation/WORKFLOW_REVIEW_2026-01-26.md @@ -159,7 +159,8 @@ A new scheduled workflow and helper script were added to safely prune old contai - **Files added**: - `.github/workflows/container-prune.yml` (weekly schedule, manual dispatch) - - `scripts/prune-container-images.sh` (dry-run by default; supports GHCR and Docker Hub) + - `scripts/prune-ghcr.sh` (GHCR cleanup) + - `scripts/prune-dockerhub.sh` (Docker Hub cleanup) - **Behavior**: - Default: **dry-run=true** (no destructive changes). diff --git a/scripts/prune-dockerhub.sh b/scripts/prune-dockerhub.sh new file mode 100755 index 00000000..f59fe341 --- /dev/null +++ b/scripts/prune-dockerhub.sh @@ -0,0 +1,174 @@ +#!/usr/bin/env bash +set -euo pipefail +# prune-dockerhub.sh +# Deletes old container images from Docker Hub according to retention and protection rules. + +OWNER=${OWNER:-${GITHUB_REPOSITORY_OWNER:-Wikid82}} +IMAGE_NAME=${IMAGE_NAME:-charon} + +KEEP_DAYS=${KEEP_DAYS:-30} +KEEP_LAST_N=${KEEP_LAST_N:-30} + +DRY_RUN=${DRY_RUN:-false} +PROTECTED_REGEX=${PROTECTED_REGEX:-'["^v","^latest$","^main$","^develop$"]'} + +DOCKERHUB_USERNAME=${DOCKERHUB_USERNAME:-} +DOCKERHUB_TOKEN=${DOCKERHUB_TOKEN:-} + +LOG_PREFIX="[prune-dockerhub]" + +cutoff_ts=$(date -d "$KEEP_DAYS days ago" +%s 2>/dev/null || date -d "-$KEEP_DAYS days" +%s) + +dry_run=false +case "${DRY_RUN,,}" in + true|1|yes|y|on) dry_run=true ;; + *) dry_run=false ;; +esac + +TOTAL_CANDIDATES=0 +TOTAL_CANDIDATES_BYTES=0 +TOTAL_DELETED=0 +TOTAL_DELETED_BYTES=0 + +echo "$LOG_PREFIX starting with OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run" +echo "$LOG_PREFIX PROTECTED_REGEX=$PROTECTED_REGEX" + +require() { + command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1" >&2; exit 1; } +} +require curl +require jq + +is_protected_tag() { + local tag="$1" + local rgx + while IFS= read -r rgx; do + [[ -z "$rgx" ]] && continue + if [[ "$tag" =~ $rgx ]]; then + return 0 + fi + done < <(echo "$PROTECTED_REGEX" | jq -r '.[]') + return 1 +} + +human_readable() { + local bytes=${1:-0} + if [[ -z "$bytes" ]] || (( bytes <= 0 )); then + echo "0 B" + return + fi + local unit=(B KiB MiB GiB TiB) + local i=0 + local value=$bytes + while (( value > 1024 )) && (( i < 4 )); do + value=$((value / 1024)) + i=$((i + 1)) + done + printf "%s %s" "${value}" "${unit[$i]}" +} + +action_delete_dockerhub() { + echo "$LOG_PREFIX -> Docker Hub cleanup for ${DOCKERHUB_USERNAME:-}/$IMAGE_NAME (dry-run=$dry_run)" + + if [[ -z "${DOCKERHUB_USERNAME:-}" || -z "${DOCKERHUB_TOKEN:-}" ]]; then + echo "$LOG_PREFIX Docker Hub credentials not set; skipping Docker Hub cleanup" + return + fi + + local hub_token page page_size all resp results_count total + local keep_tags tag tag_name last_updated last_ts protected bytes + + hub_token=$(printf '{"username":"%s","password":"%s"}' "$DOCKERHUB_USERNAME" "$DOCKERHUB_TOKEN" | \ + curl -sS -X POST -H "Content-Type: application/json" --data-binary @- \ + https://hub.docker.com/v2/users/login/ | jq -r '.token') + + if [[ -z "$hub_token" || "$hub_token" == "null" ]]; then + echo "$LOG_PREFIX Failed to obtain Docker Hub token; aborting Docker Hub cleanup" + return + fi + + page=1 + page_size=100 + all='[]' + while :; do + resp=$(curl -sS -H "Authorization: JWT $hub_token" \ + "https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags?page_size=$page_size&page=$page") + + results_count=$(echo "$resp" | jq -r '.results | length') + if [[ -z "$results_count" || "$results_count" == "0" ]]; then + break + fi + + all=$(jq -s '.[0] + .[1].results' <(echo "$all") <(echo "$resp")) + ((page++)) + done + + total=$(echo "$all" | jq -r 'length') + if [[ -z "$total" || "$total" == "0" ]]; then + echo "$LOG_PREFIX Docker Hub: no tags found" + return + fi + + echo "$LOG_PREFIX Docker Hub: fetched $total tags total" + + keep_tags=$(echo "$all" | jq -r --argjson n "${KEEP_LAST_N:-0}" ' + (sort_by(.last_updated) | reverse) as $s + | ($s[0:$n] | map(.name)) | join(" ") + ') + + while IFS= read -r tag; do + tag_name=$(echo "$tag" | jq -r '.name') + last_updated=$(echo "$tag" | jq -r '.last_updated') + last_ts=$(date -d "$last_updated" +%s 2>/dev/null || echo 0) + + if [[ -n "$keep_tags" && " $keep_tags " == *" $tag_name "* ]]; then + echo "$LOG_PREFIX keep (last_n): tag=$tag_name last_updated=$last_updated" + continue + fi + + protected=false + if is_protected_tag "$tag_name"; then + protected=true + fi + if $protected; then + echo "$LOG_PREFIX keep (protected): tag=$tag_name last_updated=$last_updated" + continue + fi + + if (( last_ts >= cutoff_ts )); then + echo "$LOG_PREFIX keep (recent): tag=$tag_name last_updated=$last_updated" + continue + fi + + echo "$LOG_PREFIX candidate: tag=$tag_name last_updated=$last_updated" + + bytes=$(echo "$tag" | jq -r '.images | map(.size) | add // 0' 2>/dev/null || echo 0) + TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1)) + TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + bytes)) + + if $dry_run; then + echo "$LOG_PREFIX DRY RUN: would delete Docker Hub tag=$tag_name (approx ${bytes} bytes)" + else + echo "$LOG_PREFIX deleting Docker Hub tag=$tag_name (approx ${bytes} bytes)" + curl -sS -X DELETE -H "Authorization: JWT $hub_token" \ + "https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags/${tag_name}/" >/dev/null || true + TOTAL_DELETED=$((TOTAL_DELETED + 1)) + TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + bytes)) + fi + + done < <(echo "$all" | jq -c 'sort_by(.last_updated) | .[]') +} + +# Main +action_delete_dockerhub + +echo "$LOG_PREFIX SUMMARY: total_candidates=${TOTAL_CANDIDATES} total_candidates_bytes=${TOTAL_CANDIDATES_BYTES} total_deleted=${TOTAL_DELETED} total_deleted_bytes=${TOTAL_DELETED_BYTES}" +echo "$LOG_PREFIX SUMMARY_HUMAN: candidates=${TOTAL_CANDIDATES} candidates_size=$(human_readable "${TOTAL_CANDIDATES_BYTES}") deleted=${TOTAL_DELETED} deleted_size=$(human_readable "${TOTAL_DELETED_BYTES}")" + +: > prune-summary-dockerhub.env +echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary-dockerhub.env +echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary-dockerhub.env +echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary-dockerhub.env +echo "TOTAL_DELETED_BYTES=${TOTAL_DELETED_BYTES}" >> prune-summary-dockerhub.env + +echo "$LOG_PREFIX done" diff --git a/scripts/prune-container-images.sh b/scripts/prune-ghcr.sh similarity index 50% rename from scripts/prune-container-images.sh rename to scripts/prune-ghcr.sh index 18edf625..8900fbd8 100755 --- a/scripts/prune-container-images.sh +++ b/scripts/prune-ghcr.sh @@ -1,10 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -echo "[prune] SCRIPT VERSION: GH_API_VARIANT" -# prune-container-images.sh -# Deletes old images from GHCR and Docker Hub according to retention and protection rules. +# prune-ghcr.sh +# Deletes old container images from GitHub Container Registry (GHCR) +# according to retention and protection rules. -REGISTRIES=${REGISTRIES:-ghcr} OWNER=${OWNER:-${GITHUB_REPOSITORY_OWNER:-Wikid82}} IMAGE_NAME=${IMAGE_NAME:-charon} @@ -14,33 +13,29 @@ KEEP_LAST_N=${KEEP_LAST_N:-30} DRY_RUN=${DRY_RUN:-false} PROTECTED_REGEX=${PROTECTED_REGEX:-'["^v","^latest$","^main$","^develop$"]'} -# Extra knobs (optional) PRUNE_UNTAGGED=${PRUNE_UNTAGGED:-true} PRUNE_SBOM_TAGS=${PRUNE_SBOM_TAGS:-true} -LOG_PREFIX="[prune]" +LOG_PREFIX="[prune-ghcr]" -now_ts=$(date +%s) cutoff_ts=$(date -d "$KEEP_DAYS days ago" +%s 2>/dev/null || date -d "-$KEEP_DAYS days" +%s) -# Normalize DRY_RUN to true/false reliably dry_run=false case "${DRY_RUN,,}" in true|1|yes|y|on) dry_run=true ;; *) dry_run=false ;; esac -# Totals TOTAL_CANDIDATES=0 TOTAL_CANDIDATES_BYTES=0 TOTAL_DELETED=0 TOTAL_DELETED_BYTES=0 -echo "$LOG_PREFIX starting with REGISTRIES=$REGISTRIES OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run" +echo "$LOG_PREFIX starting with OWNER=$OWNER IMAGE_NAME=$IMAGE_NAME KEEP_DAYS=$KEEP_DAYS KEEP_LAST_N=$KEEP_LAST_N DRY_RUN=$dry_run" echo "$LOG_PREFIX PROTECTED_REGEX=$PROTECTED_REGEX PRUNE_UNTAGGED=$PRUNE_UNTAGGED PRUNE_SBOM_TAGS=$PRUNE_SBOM_TAGS" require() { - command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1"; exit 1; } + command -v "$1" >/dev/null 2>&1 || { echo "$LOG_PREFIX missing required command: $1" >&2; exit 1; } } require curl require jq @@ -57,8 +52,6 @@ is_protected_tag() { return 1 } -# Some repos generate tons of tags like sha-xxxx, pr-123-xxxx, *.sbom. -# We treat SBOM-only tags as deletable (optional). tag_is_sbom() { local tag="$1" [[ "$tag" == *.sbom ]] @@ -80,9 +73,9 @@ human_readable() { printf "%s %s" "${value}" "${unit[$i]}" } -# --- GHCR --- +# All echo/log statements go to stderr so stdout remains pure JSON ghcr_list_all_versions_json() { - local namespace_type="$1" # orgs or users + local namespace_type="$1" local page=1 local per_page=100 local all='[]' @@ -90,7 +83,6 @@ ghcr_list_all_versions_json() { while :; do local url="https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions?per_page=$per_page&page=$page" - # Use GitHub’s recommended headers local resp resp=$(curl -sS \ -H "Authorization: Bearer $GITHUB_TOKEN" \ @@ -98,29 +90,26 @@ ghcr_list_all_versions_json() { -H "X-GitHub-Api-Version: 2022-11-28" \ "$url" || true) - # ✅ NEW: ensure we got JSON if ! echo "$resp" | jq -e . >/dev/null 2>&1; then - echo "$LOG_PREFIX GHCR returned non-JSON for url=$url" - echo "$LOG_PREFIX GHCR response (first 200 chars): $(echo "$resp" | head -c 200 | tr '\n' ' ')" + echo "$LOG_PREFIX GHCR returned non-JSON for url=$url" >&2 + echo "$LOG_PREFIX GHCR response (first 200 chars): $(echo "$resp" | head -c 200 | tr '\n' ' ')" >&2 echo "[]" return 0 fi - # Handle JSON error messages if echo "$resp" | jq -e 'has("message")' >/dev/null 2>&1; then local msg msg=$(echo "$resp" | jq -r '.message') if [[ "$msg" == "Not Found" ]]; then - echo "$LOG_PREFIX GHCR ${namespace_type} endpoint returned Not Found" + echo "$LOG_PREFIX GHCR ${namespace_type} endpoint returned Not Found" >&2 echo "[]" return 0 fi - echo "$LOG_PREFIX GHCR API error: $msg" - # also print documentation_url if present (helpful) + echo "$LOG_PREFIX GHCR API error: $msg" >&2 doc=$(echo "$resp" | jq -r '.documentation_url // empty') - [[ -n "$doc" ]] && echo "$LOG_PREFIX GHCR docs: $doc" + [[ -n "$doc" ]] && echo "$LOG_PREFIX GHCR docs: $doc" >&2 echo "[]" return 0 fi @@ -146,7 +135,6 @@ action_delete_ghcr() { return fi - # Try orgs first, then users local all local namespace_type="orgs" all=$(ghcr_list_all_versions_json "$namespace_type") @@ -164,12 +152,6 @@ action_delete_ghcr() { echo "$LOG_PREFIX GHCR: fetched $total versions total" - # Normalize a working list: - # - id - # - created_at - # - created_ts - # - tags array - # - tags_csv local normalized normalized=$(echo "$all" | jq -c ' map({ @@ -181,8 +163,6 @@ action_delete_ghcr() { }) ') - # Compute the globally newest KEEP_LAST_N ids to always keep - # (If KEEP_LAST_N is 0 or empty, keep none by this rule) local keep_ids keep_ids=$(echo "$normalized" | jq -r --argjson n "${KEEP_LAST_N:-0}" ' (sort_by(.created_ts) | reverse) as $s @@ -193,21 +173,20 @@ action_delete_ghcr() { echo "$LOG_PREFIX GHCR: keeping newest KEEP_LAST_N ids: $KEEP_LAST_N" fi - # Iterate versions sorted oldest->newest so deletions are predictable + local ver protected all_sbom candidate_bytes while IFS= read -r ver; do local id created created_ts tags_csv + all_sbom=false id=$(echo "$ver" | jq -r '.id') created=$(echo "$ver" | jq -r '.created_at') created_ts=$(echo "$ver" | jq -r '.created_ts') tags_csv=$(echo "$ver" | jq -r '.tags_csv') - # KEEP_LAST_N rule (global) if [[ -n "$keep_ids" && " $keep_ids " == *" $id "* ]]; then echo "$LOG_PREFIX keep (last_n): id=$id tags=$tags_csv created=$created" continue fi - # Protected tags rule protected=false if [[ -n "$tags_csv" ]]; then while IFS= read -r t; do @@ -223,8 +202,6 @@ action_delete_ghcr() { continue fi - # Optional: treat SBOM-only versions/tags as deletable - # If every tag is *.sbom and PRUNE_SBOM_TAGS=true, we allow pruning regardless of “tag protected” rules. if [[ "${PRUNE_SBOM_TAGS,,}" == "true" && -n "$tags_csv" ]]; then all_sbom=true while IFS= read -r t; do @@ -234,46 +211,40 @@ action_delete_ghcr() { break fi done < <(echo "$tags_csv" | tr ',' '\n') - if $all_sbom; then - # allow fallthrough; do not "keep" just because tags are recent - : - fi fi - # Age rule - if (( created_ts >= cutoff_ts )); then - echo "$LOG_PREFIX keep (recent): id=$id tags=$tags_csv created=$created" - continue - fi - - # Optional: prune untagged versions (common GHCR bloat) - if [[ "${PRUNE_UNTAGGED,,}" == "true" ]]; then - # tags_csv can be empty for untagged - if [[ -z "$tags_csv" ]]; then - echo "$LOG_PREFIX candidate (untagged): id=$id tags= created=$created" - else - echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created" - fi + # If all tags are SBOM tags and PRUNE_SBOM_TAGS is enabled, skip the age check + if [[ "${all_sbom:-false}" == "true" ]]; then + echo "$LOG_PREFIX candidate (sbom-only): id=$id tags=$tags_csv created=$created" else - # If not pruning untagged, skip them - if [[ -z "$tags_csv" ]]; then - echo "$LOG_PREFIX keep (untagged disabled): id=$id created=$created" + if (( created_ts >= cutoff_ts )); then + echo "$LOG_PREFIX keep (recent): id=$id tags=$tags_csv created=$created" continue fi - echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created" + + if [[ "${PRUNE_UNTAGGED,,}" == "true" ]]; then + if [[ -z "$tags_csv" ]]; then + echo "$LOG_PREFIX candidate (untagged): id=$id tags= created=$created" + else + echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created" + fi + else + if [[ -z "$tags_csv" ]]; then + echo "$LOG_PREFIX keep (untagged disabled): id=$id created=$created" + continue + fi + echo "$LOG_PREFIX candidate: id=$id tags=$tags_csv created=$created" + fi fi - # Candidate bookkeeping TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1)) - # Best-effort size estimation: GHCR registry auth is messy; don’t block prune on it. candidate_bytes=0 if $dry_run; then echo "$LOG_PREFIX DRY RUN: would delete GHCR version id=$id (approx ${candidate_bytes} bytes)" else echo "$LOG_PREFIX deleting GHCR version id=$id" - # Use GitHub API delete curl -sS -X DELETE -H "Authorization: Bearer $GITHUB_TOKEN" \ "https://api.github.com/${namespace_type}/${OWNER}/packages/container/${IMAGE_NAME}/versions/$id" >/dev/null || true TOTAL_DELETED=$((TOTAL_DELETED + 1)) @@ -282,116 +253,16 @@ action_delete_ghcr() { done < <(echo "$normalized" | jq -c 'sort_by(.created_ts) | .[]') } -# --- Docker Hub --- -action_delete_dockerhub() { - echo "$LOG_PREFIX -> Docker Hub cleanup for ${DOCKERHUB_USERNAME:-}/$IMAGE_NAME (dry-run=$dry_run)" +# Main +action_delete_ghcr - if [[ -z "${DOCKERHUB_USERNAME:-}" || -z "${DOCKERHUB_TOKEN:-}" ]]; then - echo "$LOG_PREFIX Docker Hub credentials not set; skipping Docker Hub cleanup" - return - fi - - hub_token=$(curl -sS -X POST -H "Content-Type: application/json" \ - -d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \ - https://hub.docker.com/v2/users/login/ | jq -r '.token') - - if [[ -z "$hub_token" || "$hub_token" == "null" ]]; then - echo "$LOG_PREFIX Failed to obtain Docker Hub token; aborting Docker Hub cleanup" - return - fi - - # Fetch all pages first so KEEP_LAST_N can be global - page=1 - page_size=100 - all='[]' - while :; do - resp=$(curl -sS -H "Authorization: JWT $hub_token" \ - "https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags?page_size=$page_size&page=$page") - - results_count=$(echo "$resp" | jq -r '.results | length') - if [[ -z "$results_count" || "$results_count" == "0" ]]; then - break - fi - - all=$(jq -s '.[0] + .[1].results' <(echo "$all") <(echo "$resp")) - ((page++)) - done - - total=$(echo "$all" | jq -r 'length') - if [[ -z "$total" || "$total" == "0" ]]; then - echo "$LOG_PREFIX Docker Hub: no tags found" - return - fi - - echo "$LOG_PREFIX Docker Hub: fetched $total tags total" - - keep_tags=$(echo "$all" | jq -r --argjson n "${KEEP_LAST_N:-0}" ' - (sort_by(.last_updated) | reverse) as $s - | ($s[0:$n] | map(.name)) | join(" ") - ') - - while IFS= read -r tag; do - tag_name=$(echo "$tag" | jq -r '.name') - last_updated=$(echo "$tag" | jq -r '.last_updated') - last_ts=$(date -d "$last_updated" +%s 2>/dev/null || 0) - - if [[ -n "$keep_tags" && " $keep_tags " == *" $tag_name "* ]]; then - echo "$LOG_PREFIX keep (last_n): tag=$tag_name last_updated=$last_updated" - continue - fi - - protected=false - if is_protected_tag "$tag_name"; then - protected=true - fi - if $protected; then - echo "$LOG_PREFIX keep (protected): tag=$tag_name last_updated=$last_updated" - continue - fi - - if (( last_ts >= cutoff_ts )); then - echo "$LOG_PREFIX keep (recent): tag=$tag_name last_updated=$last_updated" - continue - fi - - echo "$LOG_PREFIX candidate: tag=$tag_name last_updated=$last_updated" - - bytes=$(echo "$tag" | jq -r '.images | map(.size) | add // 0' 2>/dev/null || echo 0) - TOTAL_CANDIDATES=$((TOTAL_CANDIDATES + 1)) - TOTAL_CANDIDATES_BYTES=$((TOTAL_CANDIDATES_BYTES + bytes)) - - if $dry_run; then - echo "$LOG_PREFIX DRY RUN: would delete Docker Hub tag=$tag_name (approx ${bytes} bytes)" - else - echo "$LOG_PREFIX deleting Docker Hub tag=$tag_name (approx ${bytes} bytes)" - curl -sS -X DELETE -H "Authorization: JWT $hub_token" \ - "https://hub.docker.com/v2/repositories/${DOCKERHUB_USERNAME}/${IMAGE_NAME}/tags/${tag_name}/" >/dev/null || true - TOTAL_DELETED=$((TOTAL_DELETED + 1)) - TOTAL_DELETED_BYTES=$((TOTAL_DELETED_BYTES + bytes)) - fi - - done < <(echo "$all" | jq -c 'sort_by(.last_updated) | .[]') -} - -# Main: iterate requested registries -IFS=',' read -ra regs <<< "$REGISTRIES" -for r in "${regs[@]}"; do - case "$r" in - ghcr) action_delete_ghcr ;; - dockerhub) action_delete_dockerhub ;; - *) echo "$LOG_PREFIX unknown registry: $r" ;; - esac -done - -# Summary echo "$LOG_PREFIX SUMMARY: total_candidates=${TOTAL_CANDIDATES} total_candidates_bytes=${TOTAL_CANDIDATES_BYTES} total_deleted=${TOTAL_DELETED} total_deleted_bytes=${TOTAL_DELETED_BYTES}" echo "$LOG_PREFIX SUMMARY_HUMAN: candidates=${TOTAL_CANDIDATES} candidates_size=$(human_readable "${TOTAL_CANDIDATES_BYTES}") deleted=${TOTAL_DELETED} deleted_size=$(human_readable "${TOTAL_DELETED_BYTES}")" -# Export summary for workflow parsing -: > prune-summary.env -echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary.env -echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary.env -echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary.env -echo "TOTAL_DELETED_BYTES=${TOTAL_DELETED_BYTES}" >> prune-summary.env +: > prune-summary-ghcr.env +echo "TOTAL_CANDIDATES=${TOTAL_CANDIDATES}" >> prune-summary-ghcr.env +echo "TOTAL_CANDIDATES_BYTES=${TOTAL_CANDIDATES_BYTES}" >> prune-summary-ghcr.env +echo "TOTAL_DELETED=${TOTAL_DELETED}" >> prune-summary-ghcr.env +echo "TOTAL_DELETED_BYTES=${TOTAL_DELETED_BYTES}" >> prune-summary-ghcr.env echo "$LOG_PREFIX done"