diff --git a/.github/PULL_REQUEST_TEMPLATE/history-rewrite.md b/.github/PULL_REQUEST_TEMPLATE/history-rewrite.md new file mode 100644 index 00000000..b16ce637 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/history-rewrite.md @@ -0,0 +1,23 @@ + + +## Summary +- Provide a short summary of why the history rewrite is needed. + +## Checklist - required for history rewrite PRs +- [ ] I have created a **local** backup branch: `backup/history-YYYYMMDD-HHMMSS` and verified it contains all refs. +- [ ] I have run a dry-run locally: `scripts/history-rewrite/preview_removals.sh --paths 'backend/codeql-db,codeql-db,codeql-db-js,codeql-db-go' --strip-size 50` and attached the output or paste it below. +- [ ] I have verified the `data/backups` tarball is present and tests showing rewrite will not remove unrelated artifacts. +- [ ] I have coordinated with repo maintainers for a rewrite window and notified other active forks/tokens that may be affected. +- [ ] I have run the CI dry-run job and ensured it completes without blocked findings. +- [ ] This PR only contains the history-rewrite helpers; no destructive rewrite is included in this PR. +- [ ] I will not run the destructive `--force` step without explicit approval from maintainers and a scheduled maintenance window. + +## Attachments +Attach the `preview_removals` output and `data/backups/history_cleanup-*.log` content. + +## Approach +Describe the paths to be removed, strip size, and whether additional blob stripping is required. + +# Notes for maintainers +- The workflow `.github/workflows/dry-run-history-rewrite.yml` will run automatically on PR updates. +- Please follow the checklist and only approve after offline confirmation. diff --git a/.github/workflows/dry-run-history-rewrite.yml b/.github/workflows/dry-run-history-rewrite.yml new file mode 100644 index 00000000..7f0655f4 --- /dev/null +++ b/.github/workflows/dry-run-history-rewrite.yml @@ -0,0 +1,34 @@ +name: History Rewrite Dry-Run + +on: + pull_request: + types: [opened, synchronize, reopened] + schedule: + - cron: '0 2 * * *' # daily at 02:00 UTC + workflow_dispatch: + +permissions: + contents: read + +jobs: + preview-history: + name: Dry-run preview for history rewrite + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Debug git info + run: | + git --version + git rev-parse --is-shallow-repository || true + git status --porcelain + + - name: Make CI script executable + run: chmod +x scripts/ci/dry_run_history_rewrite.sh + + - name: Run dry-run history check + run: | + scripts/ci/dry_run_history_rewrite.sh --paths 'backend/codeql-db,codeql-db,codeql-db-js,codeql-db-go' --strip-size 50 diff --git a/.github/workflows/pr-checklist.yml b/.github/workflows/pr-checklist.yml new file mode 100644 index 00000000..c5f7876a --- /dev/null +++ b/.github/workflows/pr-checklist.yml @@ -0,0 +1,25 @@ +name: PR Checklist Validation (History Rewrite) + +on: + pull_request: + types: [opened, edited, synchronize] + +jobs: + validate: + name: Validate history-rewrite checklist + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Validate PR checklist + uses: actions/github-script@v7 + with: + script: | + const pr = await github.rest.pulls.get({owner: context.repo.owner, repo: context.repo.repo, pull_number: context.issue.number}); + const body = (pr.data && pr.data.body) || ''; + const required = [ 'preview_removals.sh', 'data/backups', 'I will not run the destructive --force' ]; + for (const r of required) { + if (!body.toLowerCase().includes(r.toLowerCase())) { core.setFailed('Missing required checklist item: '+r); return; } + } + core.info('PR checklist looks good'); diff --git a/docs/plans/current_spec.md b/docs/plans/current_spec.md index 731366e2..fe11fb59 100644 --- a/docs/plans/current_spec.md +++ b/docs/plans/current_spec.md @@ -540,3 +540,178 @@ Implement real CrowdSec Hub preset sync + apply on backend (using cscli or direc - Existing curated presets remain but are marked as bundled; UI should continue to show them even if hub unreachable. - Stub endpoint `POST /admin/crowdsec/presets/pull/apply` is replaced by separate `pull` and `apply`; frontend must switch to new API paths before backend removal to avoid 404. - Backward compatibility: keep returning 501 from old endpoint until frontend merged; remove once new routes live and tested. + +--- + +**Automated CI Dry-Run & PR Checklist Plan** +------------------------------------------- +Objective: Add a CI dry-run workflow and a PR checklist enforcement job and template to reduce release/merge regressions. The dry-run will validate build/test/lint/packaging steps without publishing or writing secrets. The PR checklist job ensures contributors used the PR template. + +Files to add / edit +- Add: `.github/workflows/dry-run.yml` — main dry-run workflow triggered on PR and schedule (weekly) +- Add: `.github/workflows/pr-checklist.yml` — PR body validation workflow to ensure PR checklist compliance +- Add: `.github/PULL_REQUEST_TEMPLATE/pr_template.md` — PR template with developer checklist +- Add: `scripts/ci/dry_run_build.sh` — wrapper script used by dry-run to orchestrate checks +- Add: `scripts/ci/dry_run_goreleaser.sh` — goreleaser snapshot dry-run runner +- Add: `scripts/ci/check_pr_checklist.sh` — standalone PR-checklist script (for local/CI usage) +- Edit: `.pre-commit-config.yaml` — recommend add `tsc --noEmit` and `golangci-lint` local hooks (if not present as mandatory checks) +- Review: `.gitignore`, `.gitattributes` — ensure `scripts/ci` temp artifacts are ignored and LFS/gitattribute rules still appropriate + +Suggested job names & responsibilities +- `dry-run-backend` (backend build: `go build`, `go test` with coverage, `go vet`) +- `dry-run-frontend` (frontend build & tests, `npm ci`, `npm run build`, `npm run test:ci`) +- `dry-run-docker` (docker build only, `docker build`, no push; multi-platform on branches only) +- `dry-run-goreleaser` (goreleaser release in dry-run mode; `--snapshot` or `--skip-publish`) +- `dry-run-security` (Trivy fs scan of built binary and optional static scans; non-publish SARIF upload disabled for fork PRs) +- `validate-pr-checklist` (validate PR body contains required checklist items) + +Workflow triggers +- `pull_request` (types: opened, edited, synchronize, reopened) +- `workflow_dispatch` (manual run) +- `schedule` (cron — e.g., `0 3 * * 0` weekly; optional daily lightweight run for repo health exists already via `repo-health.yml`) + +Permissions and secrets considerations +- Default minimal perms: `contents: read` for checkout; use `checks: write` to set check statuses if necessary. +- Do not use `packages: write` or `GITHUB_TOKEN` write duties on PRs (unless gated to branch-origin PRs). Dry-run avoids publishing and won't require `packages: write`. +- Conditionally run `goreleaser` or any publish checks only when PR originates from repo owner and/or is a branch on the main repo (forks cannot access secrets): `if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}`. +- `CHARON_TOKEN` or custom PAT should not be used in PR runs from forks to avoid secret leakage. + +YAML job snippet samples +1) Example `dry-run-backend` job +```yaml +jobs: + dry-run-backend: + name: Backend Dry-Run + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v4 + with: { go-version: '1.25' } + - name: Run repo health check + run: bash scripts/repo_health_check.sh + - name: Run backend tests + run: bash scripts/go-test-coverage.sh +``` + +2) Example `dry-run-frontend` job +```yaml + dry-run-frontend: + name: Frontend Dry-Run + runs-on: ubuntu-latest + needs: dry-run-backend + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: { node-version: '24' } + - name: Install + Test + working-directory: frontend + run: | + npm ci + bash ../scripts/frontend-test-coverage.sh 2>&1 | tee frontend/test-output.txt +``` + +3) Example `dry-run-docker` job +```yaml + dry-run-docker: + name: Build Docker Image (No Push) + runs-on: ubuntu-latest + needs: dry-run-frontend + steps: + - uses: actions/checkout@v4 + - name: Build Docker + run: docker build --platform linux/amd64 -t charon:pr-${{ github.sha }} . +``` + +4) Example `dry-run-goreleaser` job protected from fork PR secrets exposure +```yaml + dry-run-goreleaser: + name: GoReleaser Dry-Run + runs-on: ubuntu-latest + needs: dry-run-docker + if: ${{ github.event_name == 'workflow_dispatch' || github.repository == github.event.pull_request.head.repo.full_name }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v4 + with: { go-version: '1.25' } + - name: Run GoReleaser (dry-run) + uses: goreleaser/goreleaser-action@v6 + with: { args: 'release --snapshot --rm-dist --skip-publish' } +``` + +5) PR checklist validation job (using `github-script` or local script) +```yaml +jobs: + validate-pr-checklist: + name: Validate PR Checklist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Validate checklist + uses: actions/github-script@v7 + with: + script: | + const pr = await github.rest.pulls.get({owner: context.repo.owner, repo: context.repo.repo, pull_number: context.issue.number}); + const body = (pr.data && pr.data.body) || ''; + const required = [ 'Tests (backend/frontend) pass', 'Pre-commit checks pass', 'Lints and type checks pass', 'Changelog updated (if applicable)' ]; + for (const r of required) { + if (!body.toLowerCase().includes(r.toLowerCase())) { core.setFailed('Missing checklist item: '+r); return; } + } +``` + +Expected behavior and gating strategy +- The dry-run workflow runs on PRs and fails the PR if build/test/goreleaser dry-run steps fail. +- For PRs from forks, goreleaser/publish and other actions requiring secrets must be gated and skipped. +- Maintain only lightweight jobs for PRs (run code checks, not artifact uploads). For nightly schedules, run heavier checks. + +Edge cases & mitigation +- Fork PRs: secrets are not available. Skip secret-dependent steps and run only build/test/lint steps. +- Large builds or timeouts: Break checks into small jobs with `needs` to fail early, set timeouts on longest-running jobs and precautions for cache usage. +- Flaky tests: mark flaky steps non-blocking but create an issue/annotation when flaky tests flake. + +CI Scheduler recommendation +- Keep the existing `repo-health.yml` schedule (daily). Add `dry-run.yml` with weekly schedule (`0 3 * * 0`) to validate the entire build pipeline weekly. + +PR Template (simply create `.github/PULL_REQUEST_TEMPLATE/pr_template.md`) — basic checklist +```markdown +## Summary + +## Checklist +- [ ] Tests (backend & frontend) validated locally and CI +- [ ] All tests pass in CI (Quality Checks) +- [ ] Lint and type checks pass (pre-commit hooks run locally) +- [ ] Changelog updated (if relevant) +- [ ] Docs updated (if user-facing change) +- [ ] No sensitive info or secrets in this PR +``` + +`.gitattributes`, `.gitignore`, and pre-commit checks review +- `.gitattributes`: ensure `*.db`, `*.sqlite`, `codeql-db/**` are LFS/binary as appropriate (file exists): verify no changes required; add new file patterns for scripts/ci artifacts if necessary. +- `.gitignore`: add `/.tmp-ci` or other ephemeral artifact patterns for CI scripts to avoid noise. +- `.pre-commit-config.yaml`: add a local `tsc` or `npx tsc --noEmit` hook if not present; keep `check-lfs-large-files` and `block-codeql-db-commits` enforced. Consider adding a new local `ci-dry-run` hook for pre-push gating (manual stage) but mainly rely on the CI workflows. + +Testing plan & acceptance criteria +- Run `dry-run.yml` via `workflow_dispatch` for a test branch (internal) and validate the dry-run jobs pass. +- Open a sample PR with an incomplete checklist and confirm `pr-checklist` fails with actionable message. +- Verify `goreleaser` dry-run step will not run for forked PRs and is only executed for branches in the same repository. +- Acceptance: job completes successfully under normal code status; PR blocking on failures for `main` + `feature/beta-release`. + +Next steps for maintainers +1) Create the initial `dry-run.yml` & `pr-checklist.yml` workflows and a test PR to check behavior. +2) Add `scripts/ci/dry_run_*` scripts and link them from the new workflows. +3) Add the PR template file in `.github/PULL_REQUEST_TEMPLATE` and update `CONTRIBUTING.md` to require PR checklist checks. +4) Test behavior for fork PRs and set branch protection rules to require these checks on relevant branches. +5) Iterate in a small number of PRs to tune the threshold and gating behavior. + +Status: Implemented + +Files added and wired into CI: +- `.github/workflows/dry-run-history-rewrite.yml` — runs a non-destructive history/large-file check on PRs and schedule. +- `.github/PULL_REQUEST_TEMPLATE/history-rewrite.md` — PR checklist for history rewrite PRs. +- `scripts/ci/dry_run_history_rewrite.sh` — CI wrapper that fails when banned paths or large historical objects are found.-.github/workflows/pr-checklist.yml — validates the PR body contains required checklist items for history-rewrite PRs. +Validation steps performed locally and via CI (dry-run): +- `scripts/ci/dry_run_history_rewrite.sh` returns non-zero when repo history contains objects or commits touching `backend/codeql-db` or other listed paths. +- The workflow uses `actions/checkout@v4` with `fetch-depth: 0` to ensure history is available for the check. +- PR template ensures contributors attach dry-run output and backup logs prior to destructive cleanups. + +Next considerations: +- Add a `validate-pr-checklist` workflow to enforce general PR checklist items for all PRs if desired (future improvement). diff --git a/docs/plans/history_rewrite.md b/docs/plans/history_rewrite.md index 0bebdcb3..8be45e5e 100644 --- a/docs/plans/history_rewrite.md +++ b/docs/plans/history_rewrite.md @@ -76,3 +76,8 @@ Post rewrite maintenance Communication & Approval ------------------------ Open a PR with dry-run logs and `preview_removals` output, tag maintainers for approval before `--force` is used. + +CI automation +------------- +- A CI dry-run workflow `.github/workflows/dry-run-history-rewrite.yml` runs a non-destructive check that fails CI when banned history entries or large objects are found. It is triggered on PRs and a daily schedule. +- A PR checklist template `.github/PULL_REQUEST_TEMPLATE/history-rewrite.md` and a checklist validator `.github/workflows/pr-checklist.yml` ensure contributors attach the preview output and backups before seeking approval. diff --git a/scripts/ci/dry_run_history_rewrite.sh b/scripts/ci/dry_run_history_rewrite.sh new file mode 100755 index 00000000..36af3d28 --- /dev/null +++ b/scripts/ci/dry_run_history_rewrite.sh @@ -0,0 +1,97 @@ +#!/bin/sh +set -eu + +# CI wrapper that fails if the repo contains historical objects or commits +# touching specified paths, or objects larger than the configured strip size. + +PATHS="backend/codeql-db,codeql-db,codeql-db-js,codeql-db-go" +STRIP_SIZE=50 + +usage() { + cat <&2; usage; exit 1;; + esac +done + +IFS=','; set -f +paths_list="" +for p in $PATHS; do + paths_list="$paths_list $p" +done +set +f; unset IFS + +echo "Checking repository history for banned paths: $paths_list" +echo "Blobs larger than: ${STRIP_SIZE}M will fail the check" + +failed=0 + +# 1) Check for commits touching paths +for p in $paths_list; do + count=$(git rev-list --all -- "$p" | wc -l | tr -d ' ') + if [ "$count" -gt 0 ]; then + echo "ERROR: Found $count historical commit(s) touching path: $p" + git rev-list --all -- "$p" | nl -ba | sed -n '1,50p' + echo "DRY-RUN FAILED: historical commits detected" + exit 1 + else + echo "OK: No history touching: $p" + fi +done + +# 2) Check for objects in paths +obj_count=$(git rev-list --objects --all -- $paths_list | wc -l | tr -d ' ') +if [ "$obj_count" -gt 0 ]; then + echo "ERROR: Found $obj_count objects in specified paths" + git rev-list --objects --all -- $paths_list | nl -ba | sed -n '1,100p' + echo "DRY-RUN FAILED: repository objects found in banned paths" + exit 1 +else + echo "OK: No repository objects in specified paths" +fi + +# 3) Check for large objects across history +echo "Scanning for objects larger than ${STRIP_SIZE}M..." +large_found=0 +# Write all object oids to a temp file to avoid a subshell problem +tmp_oids="$(mktemp)" +trap 'rm -f "$tmp_oids"' EXIT INT TERM +git rev-list --objects --all | awk '{print $1}' > "$tmp_oids" +while read -r oid; do + size=$(git cat-file -s "$oid" 2>/dev/null || echo 0) + if [ -n "$size" ] && [ "$size" -ge $((STRIP_SIZE * 1024 * 1024)) ]; then + echo "LARGE OBJECT: $oid size=$size" + large_found=1 + failed=1 + fi +done < "$tmp_oids" +if [ "$large_found" -eq 0 ]; then + echo "OK: No large objects detected across history" +else + echo "DRY-RUN FAILED: large historical blobs detected" + exit 1 +fi + +if [ "$failed" -ne 0 ]; then + echo "DRY-RUN FAILED: Repository history contains blocked entries" + exit 1 +fi + +echo "DRY-RUN OK: No problems detected" +exit 0