diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml new file mode 100644 index 00000000..5cac4244 --- /dev/null +++ b/.github/workflows/nightly-build.yml @@ -0,0 +1,221 @@ +name: Nightly Build & Package +on: + push: + branches: + - nightly + schedule: + # Daily at 09:00 UTC (4am EST / 5am EDT) + - cron: '0 9 * * *' + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-nightly: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + outputs: + version: ${{ steps.meta.outputs.version }} + tags: ${{ steps.meta.outputs.tags }} + digest: ${{ steps.build.outputs.digest }} + + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3.7.1 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=nightly + type=raw,value=nightly-{{date 'YYYY-MM-DD'}} + type=sha,prefix=nightly-,format=short + labels: | + org.opencontainers.image.title=Charon Nightly + org.opencontainers.image.description=Nightly build of Charon + + - name: Build and push Docker image + id: build + uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 # v6.9.0 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + VERSION=nightly-${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max + provenance: true + sbom: true + + - name: Generate SBOM + uses: anchore/sbom-action@99c98a8d93295c87a56f582070a01cd96fc2db1d # v0.21.1 + with: + image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly + format: cyclonedx-json + output-file: sbom-nightly.json + + - name: Upload SBOM artifact + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: sbom-nightly + path: sbom-nightly.json + retention-days: 30 + + test-nightly-image: + needs: build-and-push-nightly + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Pull nightly image + run: docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly + + - name: Run container smoke test + run: | + docker run --name charon-nightly -d \ + -p 8080:8080 \ + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly + + # Wait for container to start + sleep 10 + + # Check container is running + docker ps | grep charon-nightly + + # Basic health check + curl -f http://localhost:8080/health || exit 1 + + # Cleanup + docker stop charon-nightly + docker rm charon-nightly + + build-nightly-release: + needs: test-nightly-image + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 + with: + go-version: '1.23' + + - name: Set up Node.js + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 + with: + node-version: '20' + + - name: Set up Zig (for cross-compilation) + uses: goto-bus-stop/setup-zig@abea47f85e598557f500fa1fd2ab7464fcb39406 # v2.2.1 + with: + version: 0.11.0 + + - name: Build frontend + working-directory: ./frontend + run: | + npm ci + npm run build + + - name: Run GoReleaser (snapshot mode) + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf # v6.1.0 + with: + distribution: goreleaser + version: '~> v2' + args: release --snapshot --skip=publish --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload nightly binaries + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: nightly-binaries + path: dist/* + retention-days: 30 + + verify-nightly-supply-chain: + needs: build-and-push-nightly + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + security-events: write + + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Download SBOM + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: sbom-nightly + + - name: Scan with Grype + uses: anchore/scan-action@64a33b277ea7a1215a3c142735a1091341939ff5 # v4.1.2 + with: + sbom: sbom-nightly.json + fail-build: false + severity-cutoff: high + + - name: Scan with Trivy + uses: aquasecurity/trivy-action@915b19bbe73b92a6cf82a1bc12b087c9a19a5fe2 # 0.28.0 + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:nightly + format: 'sarif' + output: 'trivy-nightly.sarif' + + - name: Upload Trivy results + uses: github/codeql-action/upload-sarif@1f1223ea5cb211a8eeff76efc05e03f79c7fc6b1 # v3.28.2 + with: + sarif_file: 'trivy-nightly.sarif' + category: 'trivy-nightly' + + - name: Check for critical CVEs + run: | + if grep -q "CRITICAL" trivy-nightly.sarif; then + echo "❌ Critical vulnerabilities found in nightly build" + exit 1 + fi + echo "✅ No critical vulnerabilities found" diff --git a/.github/workflows/propagate-changes.yml b/.github/workflows/propagate-changes.yml index db93f8b8..67588a0c 100644 --- a/.github/workflows/propagate-changes.yml +++ b/.github/workflows/propagate-changes.yml @@ -146,9 +146,10 @@ jobs: if (currentBranch === 'main') { // Main -> Development - await createPR('main', 'development', 'nightly'); + await createPR('main', 'development'); } else if (currentBranch === 'development') { // Development -> Nightly + await createPR('development', 'nightly'); } else if (currentBranch === 'nightly') { // Nightly -> Feature branches const branches = await github.paginate(github.rest.repos.listBranches, { diff --git a/.github/workflows/supply-chain-verify.yml b/.github/workflows/supply-chain-verify.yml index ea4fb087..87f1cb2f 100644 --- a/.github/workflows/supply-chain-verify.yml +++ b/.github/workflows/supply-chain-verify.yml @@ -76,6 +76,8 @@ jobs: TAG="latest" elif [[ "${{ github.event.workflow_run.head_branch }}" == "development" ]]; then TAG="dev" + elif [[ "${{ github.event.workflow_run.head_branch }}" == "nightly" ]]; then + TAG="nightly" elif [[ "${{ github.event.workflow_run.head_branch }}" == "feature/beta-release" ]]; then TAG="beta" elif [[ "${{ github.event.workflow_run.event }}" == "pull_request" ]]; then diff --git a/.markdownlintignore b/.markdownlintignore new file mode 100644 index 00000000..e39f9a4c --- /dev/null +++ b/.markdownlintignore @@ -0,0 +1,10 @@ +# Ignore auto-generated or legacy documentation +docs/reports/ +docs/implementation/ +docs/issues/ +docs/plans/archive/ +backend/ +CODEQL_*.md +COVERAGE_*.md +SECURITY_REMEDIATION_COMPLETE.md +ISSUE_*.md diff --git a/CODEQL_EMAIL_INJECTION_REMEDIATION_COMPLETE.md b/CODEQL_EMAIL_INJECTION_REMEDIATION_COMPLETE.md index 747051b2..450dd9a6 100644 --- a/CODEQL_EMAIL_INJECTION_REMEDIATION_COMPLETE.md +++ b/CODEQL_EMAIL_INJECTION_REMEDIATION_COMPLETE.md @@ -12,12 +12,14 @@ Successfully remediated the CodeQL `go/email-injection` finding by implementing ### 1. Helper Functions Added to `backend/internal/services/mail_service.go` #### `encodeSubject(subject string) (string, error)` + - Trims whitespace from subject lines - Rejects any CR/LF characters to prevent header injection - Uses MIME Q-encoding (RFC 2047) for UTF-8 subject lines - Returns encoded subject suitable for email headers #### `toHeaderUndisclosedRecipients() string` + - Returns constant `"undisclosed-recipients:;"` for RFC 5322 To: header - Prevents request-derived email addresses from appearing in message headers - Eliminates the CodeQL-detected taint flow from user input to SMTP message @@ -25,19 +27,21 @@ Successfully remediated the CodeQL `go/email-injection` finding by implementing ### 2. Modified `buildEmail()` Function **Key Security Changes:** + - Changed `To:` header to use `toHeaderUndisclosedRecipients()` instead of request-derived recipient address - Recipient validation still performed for SMTP envelope (RCPT TO command) - Subject encoding enforced through `encodeSubject()` helper - Updated security documentation comments **Critical Implementation Detail:** + - SMTP envelope recipients (`toEnvelope` in `smtp.SendMail`) remain correct for delivery - Only RFC 5322 message headers changed - Separation of envelope routing from message headers eliminates injection risk ### 3. Enhanced Test Coverage -#### New Tests in `backend/internal/services/mail_service_test.go`: +#### New Tests in `backend/internal/services/mail_service_test.go` 1. **`TestMailService_BuildEmail_UndisclosedRecipients`** - Verifies `To:` header contains `undisclosed-recipients:;` @@ -48,7 +52,7 @@ Successfully remediated the CodeQL `go/email-injection` finding by implementing - Tests HTML template auto-escaping for special characters in `appName` - Verifies XSS protection in invite emails -#### Updated Tests in `backend/internal/api/handlers/user_handler_test.go`: +#### Updated Tests in `backend/internal/api/handlers/user_handler_test.go` 1. **`TestUserHandler_PreviewInviteURL_Success_Unconfigured`** - Updated to verify `base_url` and `preview_url` are empty when `app.public_url` not configured @@ -62,18 +66,21 @@ Successfully remediated the CodeQL `go/email-injection` finding by implementing ## Verification Results ### Test Results ✅ + ```bash cd /projects/Charon/backend/internal/services go test -v -run "TestMail" . ``` **Result**: All mail service tests PASS + - Total mail service tests: 28 tests - New security tests: 2 added - Updated tests: 1 modified - Coverage: 81.1% of statements (services package) ### CodeQL Scan Results ✅ + ```bash codeql database analyze codeql-db-go \ --format=sarif-latest \ @@ -81,6 +88,7 @@ codeql database analyze codeql-db-go \ ``` **Result**: + - Total findings: 0 - `go/email-injection` findings: 0 (RESOLVED) - Previous finding location: `backend/internal/services/mail_service.go:285` @@ -89,12 +97,14 @@ codeql database analyze codeql-db-go \ ## Security Impact ### Before Remediation + - Request-derived email addresses flowed into RFC 5322 message headers - CodeQL identified potential for content spoofing (CWE-640) - Malicious recipient addresses could theoretically manipulate headers - Risk: Low (existing CRLF rejection mitigated most attacks, but CodeQL flagged it) ### After Remediation + - **Zero request-derived data** in message headers - `To:` header uses RFC-compliant constant: `undisclosed-recipients:;` - SMTP envelope routing unchanged (still uses validated recipient) @@ -106,6 +116,7 @@ codeql database analyze codeql-db-go \ 4. Dot-stuffing (existing) ### Additional Protections + - Host header injection prevented in invite URL generation - HTML template auto-escaping verified - Comprehensive test coverage for injection scenarios diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f897e8d4..9b16a9ea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -47,11 +47,13 @@ curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/insta ``` Ensure `$GOPATH/bin` is in your `PATH`: + ```bash export PATH="$PATH:$(go env GOPATH)/bin" ``` Verify installation: + ```bash golangci-lint --version # Should output: golangci-lint has version 1.xx.x ... @@ -98,12 +100,35 @@ npm run dev # Start frontend dev server ### Branching Strategy -- **main** - Production-ready code -- **development** - Main development branch (default) +- **main** - Production-ready code (stable releases) +- **nightly** - Pre-release testing branch (automated daily builds at 02:00 UTC) +- **development** - Main development branch (default for contributions) - **feature/** - Feature branches (e.g., `feature/add-ssl-support`) - **bugfix/** - Bug fix branches (e.g., `bugfix/fix-import-crash`) - **hotfix/** - Urgent production fixes +### Branch Flow + +The project uses a three-tier branching model: + +``` +development → nightly → main + (unstable) (testing) (stable) +``` + +**Flow details:** + +1. **development → nightly**: Automated daily merge at 02:00 UTC +2. **nightly → main**: Manual PR after validation and testing +3. **Contributors always branch from `development`** + +**Why nightly?** + +- Provides a testing ground for features before production +- Automated daily builds catch integration issues +- Users can test pre-release features via `nightly` Docker tag +- Maintainers validate stability before merging to `main` + ### Creating a Feature Branch Always branch from `development`: @@ -114,6 +139,8 @@ git pull upstream development git checkout -b feature/your-feature-name ``` +**Note:** Never branch from `nightly` or `main`. The `nightly` branch is managed by automation and receives daily merges from `development`. + ### Commit Message Guidelines Follow the [Conventional Commits](https://www.conventionalcommits.org/) specification: @@ -222,6 +249,49 @@ export function ProxyHostForm({ host, onSubmit, onCancel }: ProxyHostFormProps) ## Testing Guidelines +### Testing Against Nightly Builds + +Before submitting a PR, test your changes against the latest nightly build: + +**Pull latest nightly:** + +```bash +docker pull ghcr.io/wikid82/charon:nightly +``` + +**Run your local changes against nightly:** + +```bash +# Start nightly container +docker run -d --name charon-nightly \ + -p 8080:8080 \ + ghcr.io/wikid82/charon:nightly + +# Test your feature/fix +curl http://localhost:8080/api/v1/health + +# Clean up +docker stop charon-nightly && docker rm charon-nightly +``` + +**Integration testing:** + +If your changes affect existing features, verify compatibility: + +1. Deploy nightly build in test environment +2. Run your modified frontend/backend against it +3. Verify no regressions in existing functionality +4. Document any breaking changes in your PR + +**Reporting nightly issues:** + +If you find bugs in nightly builds: + +1. Check if the issue exists in `development` branch +2. Open an issue tagged with `nightly` label +3. Include nightly build date or commit SHA +4. Provide reproduction steps + ### Backend Tests Write tests for all new functionality: @@ -298,6 +368,7 @@ Charon uses [Agent Skills](https://agentskills.io) for AI-discoverable developme ### What is a Skill? A skill is a combination of: + - **YAML Frontmatter**: Metadata following the [agentskills.io specification](https://agentskills.io/specification) - **Markdown Documentation**: Usage instructions, examples, and troubleshooting - **Execution Script**: Shell script that performs the actual task @@ -305,6 +376,7 @@ A skill is a combination of: ### When to Create a Skill Create a new skill when you have a: + - **Repeatable task** that developers run frequently - **Complex workflow** that benefits from documentation - **CI/CD operation** that should be AI-discoverable @@ -317,6 +389,7 @@ Create a new skill when you have a: #### 1. Plan Your Skill Before creating, define: + - **Name**: Use `{category}-{feature}-{variant}` format (e.g., `test-backend-coverage`) - **Category**: test, integration-test, security, qa, build, utility, docker - **Purpose**: One clear sentence describing what it does @@ -407,6 +480,7 @@ command example **Last Updated**: YYYY-MM-DD **Maintained by**: Charon Project **Source**: Original implementation or script path + ``` #### 4. Create the Execution Script @@ -511,24 +585,28 @@ All skills must pass validation: ### Best Practices **Documentation:** + - Keep SKILL.md under 500 lines - Include real-world examples - Document all prerequisites clearly - Add troubleshooting section for common issues **Scripts:** + - Always use helper functions for logging and error handling - Validate environment before execution - Make scripts idempotent when possible - Clean up resources on exit (use trap) **Testing:** + - Test skill in clean environment - Verify all exit codes - Check output format consistency - Test error scenarios **Metadata:** + - Set accurate `execution_time` (short < 1min, medium 1-5min, long > 5min) - Use `ci_cd_safe: false` for interactive or risky operations - Mark `idempotent: true` only if truly safe to run repeatedly @@ -539,17 +617,20 @@ All skills must pass validation: Charon provides helper scripts for common operations: **Logging** (`_logging_helpers.sh`): + - `log_info`, `log_success`, `log_warning`, `log_error`, `log_debug` - `log_step` for section headers - `log_command` to log before executing **Error Handling** (`_error_handling_helpers.sh`): + - `error_exit` to print error and exit - `check_command_exists`, `check_file_exists`, `check_dir_exists` - `run_with_retry` for network operations - `trap_error` for automatic error trapping **Environment** (`_environment_helpers.sh`): + - `validate_go_environment`, `validate_python_environment`, `validate_node_environment` - `validate_docker_environment` - `set_default_env` for environment variables diff --git a/COVERAGE_ANALYSIS.md b/COVERAGE_ANALYSIS.md index 7db093a3..da238a1a 100644 --- a/COVERAGE_ANALYSIS.md +++ b/COVERAGE_ANALYSIS.md @@ -1,4 +1,5 @@ # Coverage Analysis Report + **Date**: January 12, 2026 **Current Coverage**: 83.2% **Target Coverage**: 85.0% @@ -81,12 +82,14 @@ internal/api/handlers/manual_challenge_handler.go: ## DNS Challenge Feature Status -### ✅ WELL-TESTED Components: +### ✅ WELL-TESTED Components + - `pkg/dnsprovider/custom/manual_provider.go`: **91.1%** ✓ - `internal/services/dns_provider_service.go`: **81-100%** per function ✓ - `internal/services/manual_challenge_service.go`: **75-100%** per function ✓ -### ⚠️ NEEDS WORK Components: +### ⚠️ NEEDS WORK Components + - `pkg/dnsprovider/registry.go`: **0%** ❌ - `internal/api/handlers/manual_challenge_handler.go`: **35-66%** on key endpoints ⚠️ @@ -95,11 +98,13 @@ internal/api/handlers/manual_challenge_handler.go: ## Path to 85% Coverage ### Option A: Test pkg/dnsprovider/registry.go (RECOMMENDED) + **Effort**: 30-45 minutes **Impact**: ~0.5-1.0% coverage gain (129 lines) **Risk**: Low (pure logic, no external dependencies) **Test Strategy**: + ```go // Test plan for registry_test.go 1. TestNewRegistry() - constructor @@ -115,11 +120,13 @@ internal/api/handlers/manual_challenge_handler.go: ``` ### Option B: Improve manual_challenge_handler.go + **Effort**: 45-60 minutes **Impact**: ~0.8-1.2% coverage gain **Risk**: Medium (HTTP testing, state management) **Test Strategy**: + ```go // Add tests for: 1. VerifyChallenge - error paths (invalid IDs, DNS failures) @@ -130,11 +137,13 @@ internal/api/handlers/manual_challenge_handler.go: ``` ### Option C: Quick Wins (Sanitize + Init Files) + **Effort**: 20-30 minutes **Impact**: ~0.3-0.5% coverage gain **Risk**: Very Low (simple utility functions) **Test Strategy**: + ```go // Test sanitize.go functions 1. Test XSS prevention @@ -152,16 +161,19 @@ internal/api/handlers/manual_challenge_handler.go: ## Recommended Action Plan (45-60 min) **Phase 1** (20 min): Test `pkg/dnsprovider/registry.go` + - Create `pkg/dnsprovider/registry_test.go` - Test all 10 functions - Expected gain: +0.8% **Phase 2** (25 min): Test sanitization files + - Expand `internal/api/handlers/sanitize_test.go` - Create `internal/util/sanitize_test.go` - Expected gain: +0.4% **Phase 3** (15 min): Verify and adjust + - Run coverage again - Check if we hit 85% - If not, add 2-3 tests to `manual_challenge_handler.go` @@ -176,12 +188,14 @@ internal/api/handlers/manual_challenge_handler.go: **Pragmatic Option**: Set threshold to **83.0%** **Rationale**: + 1. Main entry point (`cmd/api/main.go`) is at 26% (hard to test) 2. Seed script (`cmd/seed/main.go`) is at 19% (not production code) 3. Middleware init functions are low-value test targets 4. **Core business logic is well-tested** (DNS providers, services, handlers) **Files Intentionally Untested** (acceptable): + - `cmd/api/main.go` - integration test territory - `cmd/seed/main.go` - utility script - `internal/server/server.go` - wired in integration tests @@ -219,6 +233,7 @@ internal/api/handlers/manual_challenge_handler.go: **We CAN reach 85% with targeted testing in < 1 hour.** **Recommendation**: + 1. **Immediate**: Test `pkg/dnsprovider/registry.go` (+0.8%) 2. **Quick Win**: Test sanitization utilities (+0.4%) 3. **If Needed**: Add 3-5 tests to `manual_challenge_handler.go` (+0.3-0.5%) @@ -232,6 +247,7 @@ internal/api/handlers/manual_challenge_handler.go: Choose one path: **Path A** (Testing): + ```bash # 1. Create registry_test.go touch pkg/dnsprovider/registry_test.go @@ -245,6 +261,7 @@ go tool cover -func=coverage.out | tail -1 ``` **Path B** (Threshold Adjustment): + ```bash # Update CI configuration to 83% # Focus on Patch Coverage (100% for new changes) diff --git a/README.md b/README.md index dc15b884..43b6ecff 100644 --- a/README.md +++ b/README.md @@ -42,17 +42,23 @@ You want your apps accessible online. You don't want to become a networking expe ## 🐕 Cerberus Security Suite ### 🕵️‍♂️ **CrowdSec Integration** - - Protects your applications from attacks using behavior-based detection and automated remediation. + +- Protects your applications from attacks using behavior-based detection and automated remediation. + ### 🔐 **Access Control Lists (ACLs)** - - Define fine-grained access rules for your applications, controlling who can access what and under which conditions. + +- Define fine-grained access rules for your applications, controlling who can access what and under which conditions. + ### 🧱 **Web Application Firewall (WAF)** - - Protects your applications from common web vulnerabilities such as SQL injection, XSS, and more using Coraza. + +- Protects your applications from common web vulnerabilities such as SQL injection, XSS, and more using Coraza. + ### ⏱️ **Rate Limiting** - - Protect your applications from abuse by limiting the number of requests a user or IP can make within a certain timeframe. + +- Protect your applications from abuse by limiting the number of requests a user or IP can make within a certain timeframe. --- - ## ✨ Top 10 Features ### 🎯 **Point & Click Management** @@ -136,6 +142,19 @@ services: ``` +**Using Nightly Builds:** + +To test the latest nightly build (automated daily at 02:00 UTC): + +```yaml +services: + charon: + image: ghcr.io/wikid82/charon:nightly + # ... rest of configuration +``` + +> **Note:** Nightly builds are for testing and may contain experimental features. Use `latest` for production. + Then run: ```bash @@ -144,6 +163,8 @@ docker-compose up -d ### Docker Run (One-Liner) +**Stable Release:** + ```bash docker run -d \ --name charon \ @@ -158,6 +179,24 @@ docker run -d \ ghcr.io/wikid82/charon:latest ``` +**Nightly Build (Testing):** + +```bash +docker run -d \ + --name charon \ + -p 80:80 \ + -p 443:443 \ + -p 443:443/udp \ + -p 8080:8080 \ + -v ./charon-data:/app/data \ + -v /var/run/docker.sock:/var/run/docker.sock:ro \ + -e CHARON_ENV=production \ + -e CHARON_ENCRYPTION_KEY=your-32-byte-base64-key-here \ + ghcr.io/wikid82/charon:nightly +``` + +> **Note:** Nightly builds include the latest development features and are rebuilt daily at 02:00 UTC. Use for testing only. + ### What Just Happened? 1. Charon downloaded and started diff --git a/SECURITY.md b/SECURITY.md index efdc058d..6866bc06 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,6 +16,7 @@ We take security seriously. If you discover a security vulnerability in Charon, ### Where to Report **Preferred Method**: GitHub Security Advisory (Private) + 1. Go to 2. Fill out the advisory form with: - Vulnerability description @@ -25,6 +26,7 @@ We take security seriously. If you discover a security vulnerability in Charon, - Suggested fix (if applicable) **Alternative Method**: Email + - Send to: `security@charon.dev` (if configured) - Use PGP encryption (key available below, if applicable) - Include same information as GitHub advisory @@ -100,6 +102,7 @@ Charon implements industry-leading **5-layer defense-in-depth** SSRF protection #### Learn More For complete technical details, see: + - [SSRF Protection Guide](docs/security/ssrf-protection.md) - [Manual Test Plan](docs/issues/ssrf-manual-test-plan.md) - [QA Audit Report](docs/reports/qa_ssrf_remediation_report.md) @@ -207,6 +210,7 @@ cosign verify \ ``` Successful verification output confirms: + - The image was built by GitHub Actions - The build came from the official Charon repository - The image has not been tampered with since signing @@ -321,6 +325,7 @@ Charon implements multiple layers of automated security scanning: **Trigger Timing**: Runs automatically after `docker-build.yml` completes successfully via `workflow_run` trigger. **Branch Coverage**: Triggers on **ALL branches** where docker-build completes, including: + - `main` (default branch) - `development` - `feature/*` branches (including `feature/beta-release`) @@ -329,6 +334,7 @@ Charon implements multiple layers of automated security scanning: **Why No Branch Filter**: GitHub Actions has a platform limitation where `branches` filters in `workflow_run` triggers only match the default branch. To ensure comprehensive supply chain verification across all branches and PRs, we intentionally omit the branch filter. The workflow file must exist on the branch to execute, preventing untrusted code execution. **Verification Steps**: + 1. SBOM completeness verification 2. Vulnerability scanning with Grype 3. Results uploaded as workflow artifacts @@ -336,6 +342,7 @@ Charon implements multiple layers of automated security scanning: 5. For releases: Cosign signature verification and SLSA provenance validation **Additional Triggers**: + - Runs on all published releases - Scheduled weekly on Mondays at 00:00 UTC - Can be triggered manually via `workflow_dispatch` @@ -373,6 +380,7 @@ The security scanning workflows use a coordinated orchestration pattern: 4. **Weekly Maintenance**: `security-weekly-rebuild.yml` provides ongoing monitoring This pattern ensures: + - Images are built before verification attempts to scan them - No race conditions between build and verification - Comprehensive coverage across all branches and PRs diff --git a/SECURITY_REMEDIATION_COMPLETE.md b/SECURITY_REMEDIATION_COMPLETE.md index 6296dbca..22c86e4a 100644 --- a/SECURITY_REMEDIATION_COMPLETE.md +++ b/SECURITY_REMEDIATION_COMPLETE.md @@ -9,6 +9,7 @@ ## Executive Summary Successfully implemented conservative security remediation following the Supervisor's tiered approach: + - **Fix first, suppress only when demonstrably safe** - **Zero functional code changes** (surgical annotations only) - **All existing tests passing** @@ -21,12 +22,14 @@ Successfully implemented conservative security remediation following the Supervi ### Implementation Status: COMPLETE **Files Modified:** + 1. `internal/services/notification_service.go:305` 2. `internal/utils/url_testing.go:168` **Action Taken:** Added comprehensive CodeQL suppression annotations **Annotation Format:** + ```go // codeql[go/request-forgery] Safe: URL validated by security.ValidateExternalURL() which: // 1. Validates URL format and scheme (HTTPS required in production) @@ -37,6 +40,7 @@ Successfully implemented conservative security remediation following the Supervi ``` **Rationale:** Both findings occur after comprehensive SSRF protection via `security.ValidateExternalURL()`: + - DNS resolution with IP validation - RFC 1918 private IP blocking - Connection-time revalidation (TOCTOU protection) @@ -50,6 +54,7 @@ Successfully implemented conservative security remediation following the Supervi ### Implementation Status: COMPLETE **Files Audited:** + 1. `internal/api/handlers/backup_handler.go:75` - ✅ Already sanitized 2. `internal/api/handlers/crowdsec_handler.go:711` - ✅ Already sanitized 3. `internal/api/handlers/crowdsec_handler.go:717` (4 occurrences) - ✅ System-generated paths @@ -58,11 +63,13 @@ Successfully implemented conservative security remediation following the Supervi 6. `internal/api/handlers/crowdsec_handler.go:819` - ✅ Already sanitized **Findings:** + - **ALL 10 log injection sites were already protected** via `util.SanitizeForLog()` - **No code changes required** - only added CodeQL annotations documenting existing protection - `util.SanitizeForLog()` removes control characters (0x00-0x1F, 0x7F) including CRLF **Annotation Format (User Input):** + ```go // codeql[go/log-injection] Safe: User input sanitized via util.SanitizeForLog() // which removes control characters (0x00-0x1F, 0x7F) including CRLF @@ -70,12 +77,14 @@ logger.WithField("slug", util.SanitizeForLog(slug)).Warn("message") ``` **Annotation Format (System-Generated):** + ```go // codeql[go/log-injection] Safe: archive_path is system-generated file path logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") ``` **Security Analysis:** + - `backup_handler.go:75` - User filename sanitized via `util.SanitizeForLog(filepath.Base(filename))` - `crowdsec_handler.go:711` - Slug sanitized via `util.SanitizeForLog(slug)` - `crowdsec_handler.go:717` (4x) - All values are system-generated (cache keys, file paths from Hub responses) @@ -88,6 +97,7 @@ logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") ### Implementation Status: COMPLETE **Files Modified:** + 1. `internal/services/mail_service.go:222` (buildEmail function) 2. `internal/services/mail_service.go:332` (sendSSL w.Write call) 3. `internal/services/mail_service.go:383` (sendSTARTTLS w.Write call) @@ -95,6 +105,7 @@ logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") **Action Taken:** Added comprehensive security documentation **WITHOUT CodeQL suppression** **Documentation Format:** + ```go // Security Note: Email injection protection implemented via: // - Headers sanitized by sanitizeEmailHeader() removing control chars (0x00-0x1F, 0x7F) @@ -104,6 +115,7 @@ logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") ``` **Rationale:** Per Supervisor directive: + - Email injection protection is complex and multi-layered - Keep CodeQL warnings as "architectural guardrails" - Multiple validation layers exist (`sanitizeEmailHeader`, `sanitizeEmailBody`, RFC validation) @@ -114,21 +126,25 @@ logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") ## Changes Summary by File ### 1. internal/services/notification_service.go + - **Line ~305:** Added SSRF suppression annotation (6 lines of documentation) - **Functional changes:** None - **Behavior changes:** None ### 2. internal/utils/url_testing.go + - **Line ~168:** Added SSRF suppression annotation (6 lines of documentation) - **Functional changes:** None - **Behavior changes:** None ### 3. internal/api/handlers/backup_handler.go + - **Line ~75:** Added log injection annotation (already sanitized) - **Functional changes:** None - **Behavior changes:** None ### 4. internal/api/handlers/crowdsec_handler.go + - **Line ~711:** Added log injection annotation (already sanitized) - **Line ~717:** Added log injection annotation (system-generated paths) - **Line ~721:** Added log injection annotation (system-generated paths) @@ -138,6 +154,7 @@ logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") - **Behavior changes:** None ### 5. internal/services/mail_service.go + - **Line ~222:** Enhanced buildEmail documentation with security notes - **Line ~332:** Added security documentation for sendSSL w.Write - **Line ~383:** Added security documentation for sendSTARTTLS w.Write @@ -149,11 +166,14 @@ logger.WithField("archive_path", res.Meta.ArchivePath).Error("message") ## CodeQL Behavior ### Local Scans (Current) + CodeQL suppressions (`codeql[rule-id]` comments) **do NOT suppress findings** during local scans. Output shows all 15 findings still detected - **THIS IS EXPECTED AND CORRECT**. ### GitHub Code Scanning (After Upload) + When SARIF files are uploaded to GitHub: + - **SSRF (2 findings):** Will be suppressed ✅ - **Log Injection (10 findings):** Will be suppressed ✅ - **Email Injection (3 findings):** Will remain visible ⚠️ (intentional architectural guardrail) @@ -163,6 +183,7 @@ When SARIF files are uploaded to GitHub: ## Validation Results ### ✅ Tests Passing + ``` Backend Tests: PASS Coverage: 85.35% (≥85% required) @@ -170,12 +191,14 @@ All existing tests passing with zero failures ``` ### ✅ Code Integrity + - Zero functional changes - Zero behavior modifications - Only added documentation and annotations - Surgical edits to exact flagged lines ### ✅ Security Posture + - All SSRF protections documented and validated - All log injection sanitization confirmed and annotated - Email injection protection documented (warnings intentionally kept) @@ -199,6 +222,7 @@ All existing tests passing with zero failures ## Next Steps 1. **Commit Changes:** + ```bash git add -A git commit -m "security: Conservative remediation for CodeQL findings diff --git a/VERSION.md b/VERSION.md index baada463..d20f5a8d 100644 --- a/VERSION.md +++ b/VERSION.md @@ -55,6 +55,9 @@ Example: `0.1.0-alpha`, `1.0.0-beta.1`, `2.0.0-rc.2` ### Available Tags - **`latest`**: Latest stable release (main branch) +- **`nightly`**: Latest nightly build (nightly branch, rebuilt daily at 02:00 UTC) +- **`nightly-YYYY-MM-DD`**: Date-specific nightly build +- **`nightly-`**: Commit-specific nightly build - **`development`**: Latest development build (development branch) - **`v1.2.3`**: Specific version tag - **`1.2`**: Latest patch for minor version @@ -71,13 +74,83 @@ docker pull ghcr.io/wikid82/charon:latest # Use specific version docker pull ghcr.io/wikid82/charon:v1.0.0 -# Use development builds +# Use latest nightly build (automated daily at 02:00 UTC) +docker pull ghcr.io/wikid82/charon:nightly + +# Use date-specific nightly build +docker pull ghcr.io/wikid82/charon:nightly-2026-01-13 + +# Use commit-specific nightly build +docker pull ghcr.io/wikid82/charon:nightly-abc123 + +# Use development builds (unstable, every commit) docker pull ghcr.io/wikid82/charon:development -# Use specific commit +# Use specific commit from main docker pull ghcr.io/wikid82/charon:main-abc123 ``` +### Nightly Builds + +Nightly builds provide a testing ground for features before they reach `main`: + +- **Automated**: Built daily at 02:00 UTC from the `nightly` branch +- **Source**: Auto-merged from `development` branch +- **Purpose**: Pre-release testing and validation +- **Stability**: More stable than `development`, less stable than `latest` + +**When to use nightly:** + +- Testing new features before stable release +- Validating bug fixes +- Contributing to pre-release testing +- Running in staging environments + +**When to avoid nightly:** + +- Production environments (use `latest` instead) +- Critical infrastructure +- When maximum stability is required + +## Nightly Versioning Format + +### Version Precedence + +Charon uses the following version hierarchy: + +1. **Stable releases**: `v1.2.3` (highest precedence) +2. **Nightly builds**: `nightly-YYYY-MM-DD` or `nightly-{sha}` +3. **Development builds**: `development` or `development-{sha}` (lowest precedence) + +### Nightly Version Tags + +Nightly builds use multiple tag formats: + +- **`nightly`**: Always points to the latest nightly build (floating tag) +- **`nightly-YYYY-MM-DD`**: Date-specific build (e.g., `nightly-2026-01-13`) +- **`nightly-{sha}`**: Commit-specific build (e.g., `nightly-abc1234`) + +**Tag characteristics:** + +| Tag Format | Immutable | Use Case | +|----------------------|-----------|---------------------------------| +| `nightly` | No | Latest nightly features | +| `nightly-2026-01-13` | Yes | Reproducible date-based testing | +| `nightly-abc1234` | Yes | Exact commit testing | + +**Version in API responses:** + +Nightly builds report their version in the health endpoint: + +```json +{ + "version": "nightly-2026-01-13", + "git_commit": "abc1234567890def", + "build_date": "2026-01-13T02:00:00Z", + "branch": "nightly" +} +``` + ## Version Information ### Runtime Version Endpoint @@ -153,6 +226,10 @@ git commit -m "fix: correct proxy timeout handling" ## CI Tag-based Releases (recommended) -- CI derives the release `Version` from the Git tag (e.g., `v1.2.3`) and embeds this value into the backend binary via Go ldflags; frontend reads the version from the backend's API. This avoids automatic commits to `main`. -- The `.version` file is optional. If present, use the `scripts/check-version-match-tag.sh` script or the included pre-commit hook to validate that `.version` matches the latest Git tag. -- CI will still generate changelogs automatically using the release-drafter workflow and create GitHub Releases when tags are pushed. +- CI derives the release `Version` from the Git tag (e.g., `v1.2.3`) and embeds this value into the + backend binary via Go ldflags; frontend reads the version from the backend's API. This avoids + automatic commits to `main`. +- The `.version` file is optional. If present, use the `scripts/check-version-match-tag.sh` script + or the included pre-commit hook to validate that `.version` matches the latest Git tag. +- CI will still generate changelogs automatically using the release-drafter workflow and create + GitHub Releases when tags are pushed. diff --git a/backend/internal/migrations/README.md b/backend/internal/migrations/README.md index 69f6fc92..a7cb9175 100644 --- a/backend/internal/migrations/README.md +++ b/backend/internal/migrations/README.md @@ -16,6 +16,7 @@ Charon uses GORM's AutoMigrate feature for database schema management. Migration **Purpose**: Added encryption key rotation support for DNS provider credentials. **Changes**: + - Added `KeyVersion` field to `DNSProvider` model - Type: `int` - GORM tags: `gorm:"default:1;index"` @@ -23,28 +24,33 @@ Charon uses GORM's AutoMigrate feature for database schema management. Migration - Purpose: Tracks which encryption key version was used for credentials **Backward Compatibility**: + - Existing records will automatically get `key_version = 1` (GORM default) - No data migration required - The field is indexed for efficient queries during key rotation operations - Compatible with both basic encryption and rotation service **Migration Execution**: + ```go // Automatically handled by GORM AutoMigrate in routes.go: db.AutoMigrate(&models.DNSProvider{}) ``` **Related Files**: + - `backend/internal/models/dns_provider.go` - Model definition - `backend/internal/crypto/rotation_service.go` - Key rotation logic - `backend/internal/services/dns_provider_service.go` - Service implementation **Testing**: + - All existing tests pass with the new field - Test database initialization updated to use shared cache mode - No breaking changes to existing functionality **Security Notes**: + - The `KeyVersion` field is essential for secure key rotation - It allows re-encrypting credentials with new keys while maintaining access to old data - The rotation service can decrypt using any registered key version @@ -57,6 +63,7 @@ db.AutoMigrate(&models.DNSProvider{}) ### Adding New Fields 1. **Always include GORM tags**: + ```go FieldName string `json:"field_name" gorm:"default:value;index"` ``` @@ -82,17 +89,21 @@ db.AutoMigrate(&models.DNSProvider{}) **Problem**: Tests fail with "no such table: table_name" errors **Solutions**: + 1. Ensure AutoMigrate is called in test setup: + ```go db.AutoMigrate(&models.YourModel{}) ``` 2. For parallel tests, use shared cache mode: + ```go db, _ := gorm.Open(sqlite.Open(":memory:?cache=shared&mode=memory&_mutex=full"), &gorm.Config{}) ``` 3. Verify table exists after migration: + ```go if !db.Migrator().HasTable(&models.YourModel{}) { t.Fatal("failed to create table") @@ -104,6 +115,7 @@ db.AutoMigrate(&models.DNSProvider{}) **Problem**: Foreign key constraints fail during migration **Solution**: Migrate parent tables before child tables: + ```go db.AutoMigrate( &models.Parent{}, @@ -116,6 +128,7 @@ db.AutoMigrate( **Problem**: Tests interfere with each other's database access **Solution**: Configure connection pooling for SQLite: + ```go sqlDB, _ := db.DB() sqlDB.SetMaxOpenConns(1) diff --git a/docs/AGENT_SKILLS_MIGRATION.md b/docs/AGENT_SKILLS_MIGRATION.md index 36394a41..b7721fb2 100644 --- a/docs/AGENT_SKILLS_MIGRATION.md +++ b/docs/AGENT_SKILLS_MIGRATION.md @@ -12,6 +12,7 @@ Charon has migrated from legacy shell scripts in `/scripts` to a standardized [Agent Skills](https://agentskills.io) format stored in `.github/skills/`. This migration provides AI-discoverable, self-documenting tasks that work seamlessly with GitHub Copilot and other AI assistants. **Key Benefits:** + - ✅ **AI Discoverability**: Skills are automatically discovered by GitHub Copilot - ✅ **Self-Documenting**: Each skill includes complete usage documentation - ✅ **Standardized Format**: Follows agentskills.io specification @@ -32,6 +33,7 @@ scripts/trivy-scan.sh ``` **Problems with legacy scripts:** + - ❌ No standardized metadata - ❌ Not AI-discoverable - ❌ Inconsistent documentation @@ -48,6 +50,7 @@ scripts/trivy-scan.sh ``` **Benefits of Agent Skills:** + - ✅ Standardized YAML metadata (name, version, tags, requirements) - ✅ AI-discoverable by GitHub Copilot and other tools - ✅ Comprehensive documentation in each SKILL.md file @@ -72,6 +75,7 @@ scripts/trivy-scan.sh ### Scripts Migrated: 19 of 24 **Migrated Scripts:** + 1. `go-test-coverage.sh` → `test-backend-coverage` 2. `frontend-test-coverage.sh` → `test-frontend-coverage` 3. `integration-test.sh` → `integration-test-all` @@ -91,6 +95,7 @@ scripts/trivy-scan.sh 17. Docker cleanup → `docker-prune` **Scripts NOT Migrated (by design):** + - `debug_db.py` - Interactive debugging tool - `debug_rate_limit.sh` - Interactive debugging tool - `gopls_collect.sh` - IDE-specific tooling @@ -142,6 +147,7 @@ scripts/trivy-scan.sh ### Flat Structure Rationale We chose a **flat directory structure** (no subcategories) for maximum AI discoverability: + - ✅ Simpler skill discovery (no directory traversal) - ✅ Easier reference in tasks and workflows - ✅ Category implicit in naming (`test-*`, `integration-*`, etc.) @@ -179,6 +185,7 @@ All tasks in `.vscode/tasks.json` now use the skill runner. ### GitHub Copilot Ask GitHub Copilot naturally: + - "Run backend tests with coverage" - "Start the development environment" - "Run security scans on the project" @@ -271,15 +278,18 @@ Brief description ``` ## Examples + Practical examples with explanations ## Error Handling + Common errors and solutions --- **Last Updated**: 2025-12-20 **Maintained by**: Charon Project + ``` ### Metadata Fields @@ -427,6 +437,7 @@ Error: Skill execution script is not executable: .github/skills/test-backend-cov ``` **Solution**: Make the script executable: + ```bash chmod +x .github/skills/test-backend-coverage-scripts/run.sh ``` @@ -438,6 +449,7 @@ chmod +x .github/skills/test-backend-coverage-scripts/run.sh ``` **Solution**: This is informational. The script still works, but you should migrate to skills: + ```bash # Instead of: scripts/go-test-coverage.sh diff --git a/docs/SUPPLY_CHAIN_SECURITY_FIXES.md b/docs/SUPPLY_CHAIN_SECURITY_FIXES.md index 1427c968..92cd697b 100644 --- a/docs/SUPPLY_CHAIN_SECURITY_FIXES.md +++ b/docs/SUPPLY_CHAIN_SECURITY_FIXES.md @@ -11,16 +11,19 @@ All critical and high-priority security issues in the supply chain security impl ## Critical Fixes (4/4 Complete) ### 1. ✅ Fixed Semantic SBOM Diff + **File:** `.github/skills/security-verify-sbom-scripts/run.sh` **Lines:** 132-180 **Issue:** SBOM comparison only checked package names, missing version changes **Fix:** + - Changed from comparing package names to `name@version` tuples - Added structured comparison using `jq -r '.packages[] | "\(.name)@\(.versionInfo // .version // \"unknown\")"` - Implemented version change detection for existing packages - Shows version transitions: `pkg1: 1.0.0 → 1.1.0` **Testing:** + ```bash ✅ PASS: Correctly detects added packages ✅ PASS: Correctly detects removed packages @@ -29,25 +32,30 @@ All critical and high-priority security issues in the supply chain security impl ``` ### 2. ✅ Fixed Docker Validation in Cosign Script + **File:** `.github/skills/security-sign-cosign-scripts/run.sh` **Line:** 95 **Issue:** Called undefined `validate_docker_environment` function **Fix:** + - Replaced with direct Docker check using `command -v docker` - Added Docker daemon running check with `docker info` - Provides clear error messages for missing Docker or stopped daemon **Testing:** + ```bash ✅ Syntax validation passed ✅ Error handling logic verified ``` ### 3. ✅ Fixed Cosign Checksum Verification + **File:** `.github/skills/security-sign-cosign-scripts/run.sh` **Line:** 101 **Issue:** Placeholder checksum instead of actual Cosign v2.4.1 binary hash **Fix:** + - Added actual SHA256 checksum for Cosign v2.4.1 Linux binary - Included verification command in error message: `echo 'CHECKSUM...' | sha256sum -c` - Enhanced installation instructions with checksum verification step @@ -55,16 +63,19 @@ All critical and high-priority security issues in the supply chain security impl **Security Impact:** Binary integrity verification now functional ### 4. ✅ Fixed Docker Image Detection Regex + **File:** `.github/skills/security-slsa-provenance-scripts/run.sh` **Line:** 169 **Issue:** Regex caused false positives with file paths containing colons **Fix:** + - Simplified detection logic with multiple negative checks - Excludes: `./file`, `/path/to/file`, `http://url` - Includes: `ghcr.io/user/repo:tag`, `charon:local`, `registry.io:5000/app:v1` - Added file existence check first: `[[ ! -f "${TARGET}" ]]` **Testing:** + ```bash Testing Docker image detection regex (v3 - simplified)... @@ -87,16 +98,19 @@ Results: 11 passed, 0 failed ## High Priority Fixes (4/4 Complete) ### 5. ✅ Added SBOM Schema Validation + **File:** `.github/skills/security-verify-sbom-scripts/run.sh` **Lines:** 94-116 **Issue:** No validation of SBOM structure before processing **Fix:** + - Validates SPDX format with `jq -e '.spdxVersion'` - Checks for required fields: `packages`, `name`, `documentNamespace` - Logs SPDX version on success - Fails fast with clear error messages if schema is invalid **Testing:** + ```bash ✅ spdxVersion field present ✅ packages array present @@ -105,10 +119,12 @@ Results: 11 passed, 0 failed ``` ### 6. ✅ Fixed Workflow Continue-on-Error + **File:** `.github/workflows/supply-chain-verify.yml` **Lines:** 56, 75, 117, 147 **Issue:** Critical steps marked with `continue-on-error: true` **Fix:** + - Removed `continue-on-error` from "Verify SBOM Completeness" - Removed `continue-on-error` from "Scan for Vulnerabilities" - Removed `continue-on-error` from "Verify SLSA Provenance" @@ -118,32 +134,38 @@ Results: 11 passed, 0 failed **Impact:** Critical failures now properly block the workflow ### 7. ✅ Made VS Code Task Dynamic + **File:** `.vscode/tasks.json` **Lines:** 376-377 **Issue:** Hardcoded `charon:local` image name **Fix:** + - Replaced hardcoded image with input variable: `${input:dockerImage}` - Added `inputs` section with `dockerImage` prompt - Default value: `charon:local` - Allows users to specify any image at runtime **Usage:** + ```bash # Task now prompts: "Docker image name or tag to verify" # User can input: charon:local, ghcr.io/user/charon:v1.0.0, etc. ``` ### 8. ✅ Fixed Variance Calculation + **File:** `.github/skills/security-verify-sbom-scripts/run.sh` **Line:** 119 **Issue:** Integer-only bash arithmetic caused overflow and inaccurate percentages **Fix:** + - Replaced bash integer math with `awk` for float arithmetic - Formula: `awk -v delta="${DELTA}" -v baseline="${BASELINE_COUNT}" 'BEGIN {printf "%.2f", (delta / baseline) * 100}'` - Updated threshold comparison to handle float values with `awk` - Results now show accurate percentages like `0.00%`, `5.25%`, etc. **Testing:** + ```bash Test 5: Testing variance calculation Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% @@ -153,6 +175,7 @@ Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% ## Validation Results ### Script Syntax Validation + ```bash ✅ SBOM script syntax valid ✅ Cosign script syntax valid @@ -160,6 +183,7 @@ Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% ``` ### Functional Testing + - ✅ SBOM semantic diff correctly detects version changes - ✅ Docker validation works with proper error messages - ✅ Image detection regex avoids all false positives @@ -168,6 +192,7 @@ Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% - ✅ VS Code task accepts dynamic input ### Workflow Integration + - ✅ Critical steps no longer marked as continue-on-error - ✅ Optional steps (artifact signature verification) still have continue-on-error - ✅ All syntax checks passed @@ -195,6 +220,7 @@ Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% ## Security Impact ### Before Fixes + - ❌ Version changes in packages went undetected - ❌ Invalid SBOMs could be processed silently - ❌ Docker validation failures were unclear @@ -203,6 +229,7 @@ Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% - ❌ Cosign binary integrity couldn't be verified ### After Fixes + - ✅ All package changes (add/remove/version) are detected - ✅ Invalid SBOMs fail fast with clear messages - ✅ Docker validation provides actionable error messages @@ -213,12 +240,14 @@ Baseline: 3, Current: 3, Delta: 0, Variance: 0.00% ## Next Steps ### Recommended + 1. Test the fixes in a full CI/CD pipeline run 2. Update documentation to reflect new SBOM diff capabilities 3. Consider adding version change threshold alerts 4. Monitor Rekor availability for keyless signing ### Optional Enhancements + 1. Add JSON schema validation for SBOM (beyond basic field checks) 2. Implement SBOM diff HTML report generation 3. Add metrics collection for variance trends diff --git a/docs/SUPPLY_CHAIN_VULNERABILITY_GUIDE.md b/docs/SUPPLY_CHAIN_VULNERABILITY_GUIDE.md index 578e00a7..e46dd6e4 100644 --- a/docs/SUPPLY_CHAIN_VULNERABILITY_GUIDE.md +++ b/docs/SUPPLY_CHAIN_VULNERABILITY_GUIDE.md @@ -20,6 +20,7 @@ At the top of the comment, you'll see a summary table: ``` **Priority:** + - 🔴 **Critical**: Fix immediately before merging - 🟠 **High**: Fix before next release - 🟡 **Medium**: Schedule for upcoming sprint @@ -42,6 +43,7 @@ Expand the collapsible sections to see specific vulnerabilities: #### Reading "No fix available" This means: + - The vulnerability is acknowledged but unpatched - Consider: - Using an alternative package @@ -58,6 +60,7 @@ This means: - Note if it says "No fix available" 2. **Update dependencies:** + ```bash # For Go modules go get package-name@v1.25.5 @@ -71,12 +74,14 @@ This means: ``` 3. **Test locally:** + ```bash make test-all docker build -t charon:local . ``` 4. **Push updates:** + ```bash git add go.mod go.sum # or package.json, Dockerfile, etc. git commit -m "fix: update package-name to v1.25.5 (CVE-2025-12345)" @@ -141,6 +146,7 @@ The Software Bill of Materials (SBOM) could not be validated. ``` **Action:** + 1. Click the workflow run link 2. Check logs for error details 3. Fix the underlying issue (usually build-related) @@ -159,6 +165,7 @@ For detailed analysis: 5. Extract and open `vuln-scan.json` **Use cases:** + - Import to security dashboards - Generate compliance reports - Track trends over time @@ -234,11 +241,13 @@ jq -r '.matches[] | [.vulnerability.id, .artifact.name, .artifact.version, .vuln ### Comment Not Appearing **Possible causes:** + - Docker build workflow hasn't completed - PR is from a fork (security restriction) - Workflow permissions issue **Solution:** + ```bash # Manually trigger workflow gh workflow run supply-chain-verify.yml @@ -249,6 +258,7 @@ gh workflow run supply-chain-verify.yml **Symptom:** Known-fixed vulnerability still shown **Solution:** + ```bash # Clear Grype cache grype db delete @@ -262,8 +272,8 @@ grype db update **If you believe a vulnerability is incorrectly reported:** 1. Verify package version: `go list -m all | grep package-name` -2. Check Grype database: https://github.com/anchore/grype-db -3. Report issue: https://github.com/anchore/grype/issues +2. Check Grype database: +3. Report issue: 4. Document in PR why accepted (with evidence) ### Too Many Vulnerabilities @@ -272,13 +282,17 @@ grype db update 1. Start with Critical only 2. Update base images first (biggest impact): + ```dockerfile FROM alpine:3.19 # Update to latest patch version ``` + 3. Batch dependency updates: + ```bash go get -u ./... # Update all Go dependencies ``` + 4. Consider using a vulnerability triage tool ## Integration with Other Tools @@ -286,6 +300,7 @@ grype db update ### GitHub Security Tab Vulnerabilities also appear in: + - **Security** → **Dependabot alerts** - **Security** → **Code scanning** @@ -324,21 +339,22 @@ Add to `.pre-commit-config.yaml`: ### Resources -- **Grype Documentation**: https://github.com/anchore/grype -- **CVE Database**: https://cve.mitre.org/ -- **NVD**: https://nvd.nist.gov/ -- **Go Security**: https://go.dev/security/ -- **Alpine Security**: https://alpinelinux.org/security/ +- **Grype Documentation**: +- **CVE Database**: +- **NVD**: +- **Go Security**: +- **Alpine Security**: ### Support Channels - **Project Issues**: Create issue on this repository -- **Security Team**: security@yourcompany.com -- **Grype Support**: https://github.com/anchore/grype/discussions +- **Security Team**: +- **Grype Support**: ### Escalation For urgent security issues: + 1. Do not merge PR 2. Contact security team immediately 3. Create private security advisory diff --git a/docs/api.md b/docs/api.md index a38cbece..b9245921 100644 --- a/docs/api.md +++ b/docs/api.md @@ -143,6 +143,7 @@ Webhook URLs configured in security settings are validated to prevent Server-Sid - Link-local addresses **Error Response**: + ```json { "error": "Invalid webhook URL: URL resolves to a private IP address (blocked for security)" @@ -150,6 +151,7 @@ Webhook URLs configured in security settings are validated to prevent Server-Sid ``` **Example Valid URL**: + ```json { "webhook_url": "https://webhook.example.com/receive" @@ -1312,6 +1314,7 @@ Webhook URLs are validated to prevent SSRF attacks. Blocked destinations: - Link-local addresses **Error Response**: + ```json { "error": "Invalid webhook URL: URL resolves to a private IP address (blocked for security)" diff --git a/docs/api/DNS_DETECTION_API.md b/docs/api/DNS_DETECTION_API.md index aedec09c..847c62a4 100644 --- a/docs/api/DNS_DETECTION_API.md +++ b/docs/api/DNS_DETECTION_API.md @@ -23,6 +23,7 @@ Analyzes a domain's nameservers and identifies the DNS provider. **Endpoint:** `POST /api/v1/dns-providers/detect` **Request Body:** + ```json { "domain": "example.com" @@ -30,6 +31,7 @@ Analyzes a domain's nameservers and identifies the DNS provider. ``` **Response (Success - Provider Detected):** + ```json { "domain": "example.com", @@ -58,6 +60,7 @@ Analyzes a domain's nameservers and identifies the DNS provider. ``` **Response (Provider Not Detected):** + ```json { "domain": "custom-provider.com", @@ -71,6 +74,7 @@ Analyzes a domain's nameservers and identifies the DNS provider. ``` **Response (DNS Lookup Error):** + ```json { "domain": "nonexistent.tld", @@ -82,6 +86,7 @@ Analyzes a domain's nameservers and identifies the DNS provider. ``` **Confidence Levels:** + - `high`: ≥80% of nameservers matched known patterns - `medium`: 50-79% matched - `low`: 1-49% matched @@ -96,6 +101,7 @@ Returns the list of all built-in nameserver patterns used for detection. **Endpoint:** `GET /api/v1/dns-providers/detection-patterns` **Response:** + ```json { "patterns": [ @@ -288,6 +294,7 @@ The wildcard prefix (`*.`) is automatically removed before DNS lookup, so the re ## Caching Detection results are cached for **1 hour** to: + - Reduce DNS lookup overhead - Improve response times - Minimize external DNS queries @@ -295,6 +302,7 @@ Detection results are cached for **1 hour** to: Failed lookups (DNS errors) are cached for **5 minutes** only. **Cache Characteristics:** + - Cache hits: <1ms response time - Cache misses: 100-200ms (typical DNS lookup) - Thread-safe implementation @@ -307,6 +315,7 @@ Failed lookups (DNS errors) are cached for **5 minutes** only. ### Client Errors (4xx) **400 Bad Request:** + ```json { "error": "domain is required" @@ -314,6 +323,7 @@ Failed lookups (DNS errors) are cached for **5 minutes** only. ``` **401 Unauthorized:** + ```json { "error": "invalid or missing token" @@ -323,6 +333,7 @@ Failed lookups (DNS errors) are cached for **5 minutes** only. ### Server Errors (5xx) **500 Internal Server Error:** + ```json { "error": "Failed to detect DNS provider" @@ -334,6 +345,7 @@ Failed lookups (DNS errors) are cached for **5 minutes** only. ## Rate Limiting The API uses built-in rate limiting through: + - **DNS Lookup Timeout:** 10 seconds maximum per request - **Caching:** Reduces repeated lookups for same domain - **Authentication:** Required for all endpoints @@ -411,6 +423,7 @@ Always allow users to manually override auto-detection: If a provider isn't detected but should be: 1. **Check Nameservers Manually:** + ```bash dig NS example.com +short # or @@ -426,6 +439,7 @@ If a provider isn't detected but should be: ### DNS Lookup Failures Common causes: + - Domain doesn't exist - Nameserver temporarily unavailable - Firewall blocking DNS queries @@ -460,6 +474,7 @@ Planned improvements (not yet implemented): ## Support For issues or questions: + - Check logs for detailed error messages - Verify authentication tokens are valid - Ensure domains are properly formatted diff --git a/docs/crowdsec-auto-start-quickref.md b/docs/crowdsec-auto-start-quickref.md index d12ad496..7c64ffee 100644 --- a/docs/crowdsec-auto-start-quickref.md +++ b/docs/crowdsec-auto-start-quickref.md @@ -47,18 +47,22 @@ docker exec charon cscli lapi status ## ⚠️ Troubleshooting (3 Steps) ### 1. Check Logs + ```bash docker logs charon 2>&1 | grep "CrowdSec reconciliation" ``` ### 2. Check Mode + ```bash docker exec charon sqlite3 /app/data/charon.db \ "SELECT crowdsec_mode FROM security_configs LIMIT 1;" ``` + **Expected:** `local` ### 3. Manual Start + ```bash curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start ``` diff --git a/docs/development/plugin-development.md b/docs/development/plugin-development.md index a2de26e3..2033e561 100644 --- a/docs/development/plugin-development.md +++ b/docs/development/plugin-development.md @@ -429,17 +429,20 @@ CGO_ENABLED=1 go build -buildmode=plugin -o myprovider.so main.go ### Build Requirements 1. **CGO must be enabled:** + ```bash export CGO_ENABLED=1 ``` 2. **Go version must match Charon:** + ```bash go version # Must match Charon's build Go version ``` 3. **Architecture must match:** + ```bash # For cross-compilation GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -buildmode=plugin @@ -455,23 +458,23 @@ OUTPUT = $(PLUGIN_NAME).so INSTALL_DIR = /etc/charon/plugins build: - CGO_ENABLED=1 go build -buildmode=plugin -o $(OUTPUT) main.go + CGO_ENABLED=1 go build -buildmode=plugin -o $(OUTPUT) main.go clean: - rm -f $(OUTPUT) + rm -f $(OUTPUT) install: build - install -m 755 $(OUTPUT) $(INSTALL_DIR)/ + install -m 755 $(OUTPUT) $(INSTALL_DIR)/ test: - go test -v ./... + go test -v ./... lint: - golangci-lint run + golangci-lint run signature: - @echo "SHA-256 Signature:" - @sha256sum $(OUTPUT) + @echo "SHA-256 Signature:" + @sha256sum $(OUTPUT) ``` ### Build Script @@ -772,6 +775,7 @@ var Plugin dnsprovider.ProviderPlugin = &MyProvider{} ### Distribution 1. **GitHub Releases:** + ```bash # Tag release git tag -a v1.0.0 -m "Release v1.0.0" @@ -784,6 +788,7 @@ var Plugin dnsprovider.ProviderPlugin = &MyProvider{} ``` 2. **Signature File:** + ```bash sha256sum *.so > SHA256SUMS gpg --sign SHA256SUMS @@ -811,9 +816,9 @@ var Plugin dnsprovider.ProviderPlugin = &MyProvider{} ### Community -- **GitHub Discussions:** https://github.com/Wikid82/charon/discussions -- **Plugin Registry:** https://github.com/Wikid82/charon-plugins -- **Issue Tracker:** https://github.com/Wikid82/charon/issues +- **GitHub Discussions:** +- **Plugin Registry:** +- **Issue Tracker:** ## See Also diff --git a/docs/features.md b/docs/features.md index 1b6276b8..97f0020d 100644 --- a/docs/features.md +++ b/docs/features.md @@ -339,6 +339,7 @@ Your backend application must be configured to trust proxy headers. Most framewo 6. Click **"Apply Changes"** **Bulk Apply also supports:** + - Applying or removing security header profiles across multiple hosts - Enabling/disabling Forward Auth, WAF, or Access Lists in bulk - Updating SSL certificate assignments for multiple hosts at once @@ -761,21 +762,25 @@ Your uptime history will be preserved. ### Key Features **Failure Debouncing**: Requires **2 consecutive failures** before marking a host as "down" + - Prevents false alarms from transient network hiccups - Container restarts don't trigger unnecessary alerts - Single TCP timeouts are logged but don't change status **Automatic Retries**: Up to 2 retry attempts per check with 2-second delay + - Handles slow networks and warm-up periods - 10-second timeout per attempt (increased from 5s) - Total check time: up to 22 seconds for marginal hosts **Concurrent Processing**: All host checks run in parallel + - Fast overall check times even with many hosts - No single slow host blocks others - Synchronized completion prevents race conditions **Status Consistency**: Checks complete before UI reads database + - Eliminates stale status during page refreshes - No race conditions between checks and API calls - Reliable status display across rapid refreshes @@ -789,6 +794,7 @@ Charon uses a **two-level check system** with enhanced reliability: **What it does:** Tests if the backend host/container is reachable via TCP connection with automatic retry on failure. **How it works:** + - Groups monitors by their backend IP address (e.g., `172.20.0.11`) - Attempts TCP connection to the actual backend port (e.g., port `5690` for Wizarr) - **First failure**: Increments failure counter, status unchanged, waits 2s and retries @@ -798,6 +804,7 @@ Charon uses a **two-level check system** with enhanced reliability: - If successful → Proceeds to Level 2 checks **Why it matters:** + - Avoids redundant HTTP checks when an entire backend container is stopped or unreachable - Prevents false "down" alerts from single network hiccups - Handles slow container startups gracefully @@ -810,6 +817,7 @@ This ensures correct connectivity checks for services on non-standard ports. **What it does:** Verifies the specific service is responding correctly via HTTP request. **How it works:** + - Only runs if Level 1 passes - Performs HTTP GET to the public URL (e.g., `https://wizarr.hatfieldhosted.com`) - Accepts these as "up": 2xx (success), 3xx (redirect), 401 (auth required), 403 (forbidden) @@ -823,6 +831,7 @@ This ensures correct connectivity checks for services on non-standard ports. ### When Things Go Wrong **Scenario 1: Backend container stopped** + - Level 1: TCP connection fails (attempt 1) ❌ - Level 1: TCP connection fails (attempt 2) ❌ - Failure count: 2 → Host marked "down" @@ -830,17 +839,20 @@ This ensures correct connectivity checks for services on non-standard ports. - Status: "down" with message "Host unreachable" **Scenario 2: Transient network issue** + - Level 1: TCP connection fails (attempt 1) ❌ - Failure count: 1 (threshold not met) - Status: Remains "up" - Next check: Success ✅ → Failure count reset to 0 **Scenario 3: Service crashed but container running** + - Level 1: TCP connection succeeds ✅ - Level 2: HTTP request fails or returns 500 ❌ - Status: "down" with specific HTTP error **Scenario 4: Everything working** + - Level 1: TCP connection succeeds ✅ - Level 2: HTTP request succeeds ✅ - Status: "up" with latency measurement @@ -851,11 +863,13 @@ This ensures correct connectivity checks for services on non-standard ports. **Issue**: Host shows "down" but service is accessible **Common causes**: + 1. **Timeout too short**: Increase from 10s if network is slow 2. **Container warmup**: Service takes >10s to respond during startup 3. **Firewall blocking**: Ensure Charon container can reach proxy host ports **Check logs**: + ```bash docker logs charon 2>&1 | grep "Host TCP check completed" docker logs charon 2>&1 | grep "Retrying TCP check" @@ -869,6 +883,7 @@ docker logs charon 2>&1 | grep "failure_count" **Per-Host**: Edit any proxy host and toggle "Enable Uptime Monitoring" **Bulk Operations**: + 1. Select multiple hosts (checkboxes) 2. Click "Bulk Apply" 3. Toggle "Uptime Monitoring" section @@ -1040,6 +1055,7 @@ Uses WebSocket technology to stream logs with zero delay. **Template Styles:** **Minimal Template** — Clean, simple text notifications: + ```json { "content": "{{.Title}}: {{.Message}}" @@ -1047,6 +1063,7 @@ Uses WebSocket technology to stream logs with zero delay. ``` **Detailed Template** — Rich formatting with all event details: + ```json { "embeds": [{ @@ -1063,6 +1080,7 @@ Uses WebSocket technology to stream logs with zero delay. ``` **Custom Template** — Design your own structure with template variables: + - `{{.Title}}` — Event title (e.g., "SSL Certificate Renewed") - `{{.Message}}` — Event details - `{{.EventType}}` — Event classification (ssl_renewal, uptime_down, waf_block) @@ -1104,6 +1122,7 @@ Uses WebSocket technology to stream logs with zero delay. **Minimum Log Level** (Legacy Setting): For backward compatibility, you can still configure minimum log level for security event notifications: + - Only notify for warnings and errors (ignore info/debug) - Applies to Cerberus security events only - Accessible via Cerberus Dashboard → "Notification Settings" diff --git a/docs/features/audit-logging.md b/docs/features/audit-logging.md index 768b5020..ea05ab94 100644 --- a/docs/features/audit-logging.md +++ b/docs/features/audit-logging.md @@ -150,6 +150,7 @@ The details modal displays: The details field contains a JSON object with event-specific information: **Create Event Example:** + ```json { "name": "Cloudflare Production", @@ -159,6 +160,7 @@ The details field contains a JSON object with event-specific information: ``` **Update Event Example:** + ```json { "changed_fields": ["credentials", "is_default"], @@ -172,6 +174,7 @@ The details field contains a JSON object with event-specific information: ``` **Test Event Example:** + ```json { "test_result": "success", @@ -180,6 +183,7 @@ The details field contains a JSON object with event-specific information: ``` **Decrypt Event Example:** + ```json { "purpose": "certificate_issuance", @@ -230,12 +234,14 @@ Export audit logs for external analysis, compliance reporting, or archival: ### Scenario 1: New DNS Provider Setup **Timeline:** + 1. User `admin@example.com` logs in from `192.168.1.100` 2. Navigates to DNS Providers page 3. Clicks "Add DNS Provider" 4. Fills in Cloudflare credentials and clicks Save **Audit Log Entries:** + ``` 2026-01-03 14:23:45 | user:5 | dns_provider_create | dns_provider | {"name":"Cloudflare Prod","type":"cloudflare","is_default":true} ``` @@ -243,10 +249,12 @@ Export audit logs for external analysis, compliance reporting, or archival: ### Scenario 2: Credential Testing **Timeline:** + 1. User tests existing provider credentials 2. API validation succeeds **Audit Log Entries:** + ``` 2026-01-03 14:25:12 | user:5 | credential_test | dns_provider | {"test_result":"success","response_time_ms":342} ``` @@ -254,12 +262,14 @@ Export audit logs for external analysis, compliance reporting, or archival: ### Scenario 3: Certificate Issuance **Timeline:** + 1. Caddy detects new host requires SSL certificate 2. Caddy decrypts DNS provider credentials 3. ACME DNS-01 challenge completes successfully 4. Certificate issued **Audit Log Entries:** + ``` 2026-01-03 14:30:00 | system | credential_decrypt | dns_provider | {"purpose":"certificate_issuance","success":true} 2026-01-03 14:30:45 | system | certificate_issued | certificate | {"domain":"app.example.com","provider":"cloudflare","result":"success"} @@ -268,10 +278,12 @@ Export audit logs for external analysis, compliance reporting, or archival: ### Scenario 4: Provider Update **Timeline:** + 1. User updates default provider setting 2. API saves changes **Audit Log Entries:** + ``` 2026-01-03 15:00:22 | user:5 | dns_provider_update | dns_provider | {"changed_fields":["is_default"],"old_values":{"is_default":false},"new_values":{"is_default":true}} ``` @@ -279,10 +291,12 @@ Export audit logs for external analysis, compliance reporting, or archival: ### Scenario 5: Provider Deletion **Timeline:** + 1. User deletes unused DNS provider 2. Credentials are securely wiped **Audit Log Entries:** + ``` 2026-01-03 16:45:33 | user:5 | dns_provider_delete | dns_provider | {"name":"Old Provider","type":"route53","had_credentials":true} ``` @@ -342,6 +356,7 @@ Audit logging is designed for minimal performance impact: - **Automatic Cleanup**: Old logs are periodically deleted to prevent database bloat **Typical Impact:** + - API request latency: +0.1ms (sending to channel) - Database writes: Batched in background, no user-facing impact - Storage: ~500 bytes per event, ~1.5 GB per year at 100 events/day @@ -371,11 +386,13 @@ If audit log pages load slowly: Retrieve audit logs with pagination and filtering. **Endpoint:** + ```http GET /api/v1/audit-logs ``` **Query Parameters:** + - `page` (int, default: 1): Page number - `limit` (int, default: 50, max: 100): Results per page - `actor` (string): Filter by actor (user ID or "system") @@ -386,12 +403,14 @@ GET /api/v1/audit-logs - `end_date` (RFC3339): End of date range **Example Request:** + ```bash curl -X GET "https://charon.example.com/api/v1/audit-logs?page=1&limit=50&event_category=dns_provider&start_date=2026-01-01T00:00:00Z" \ -H "Authorization: Bearer YOUR_TOKEN" ``` **Response:** + ```json { "audit_logs": [ @@ -423,20 +442,24 @@ curl -X GET "https://charon.example.com/api/v1/audit-logs?page=1&limit=50&event_ Retrieve complete details for a specific audit event. **Endpoint:** + ```http GET /api/v1/audit-logs/:uuid ``` **Parameters:** + - `uuid` (string, required): Event UUID **Example Request:** + ```bash curl -X GET "https://charon.example.com/api/v1/audit-logs/550e8400-e29b-41d4-a716-446655440000" \ -H "Authorization: Bearer YOUR_TOKEN" ``` **Response:** + ```json { "id": 1, @@ -458,24 +481,29 @@ curl -X GET "https://charon.example.com/api/v1/audit-logs/550e8400-e29b-41d4-a71 Retrieve all audit events for a specific DNS provider. **Endpoint:** + ```http GET /api/v1/dns-providers/:id/audit-logs ``` **Parameters:** + - `id` (int, required): DNS provider ID **Query Parameters:** + - `page` (int, default: 1): Page number - `limit` (int, default: 50, max: 100): Results per page **Example Request:** + ```bash curl -X GET "https://charon.example.com/api/v1/dns-providers/3/audit-logs?page=1&limit=50" \ -H "Authorization: Bearer YOUR_TOKEN" ``` **Response:** + ```json { "audit_logs": [ @@ -543,11 +571,13 @@ Authorization: Bearer YOUR_API_TOKEN Configure how long audit logs are retained before automatic deletion: **Environment Variable:** + ```bash AUDIT_LOG_RETENTION_DAYS=90 # Default: 90 days ``` **Docker Compose:** + ```yaml services: charon: @@ -560,6 +590,7 @@ services: Configure the size of the audit log channel buffer (advanced): **Environment Variable:** + ```bash AUDIT_LOG_CHANNEL_SIZE=1000 # Default: 1000 events ``` diff --git a/docs/features/custom-plugins.md b/docs/features/custom-plugins.md index 6dc7bbf3..4099050d 100644 --- a/docs/features/custom-plugins.md +++ b/docs/features/custom-plugins.md @@ -354,9 +354,9 @@ sudo chmod -R o-w /etc/charon/plugins ### Getting Help -- **GitHub Discussions:** https://github.com/Wikid82/charon/discussions -- **Issue Tracker:** https://github.com/Wikid82/charon/issues -- **Documentation:** https://docs.charon.example.com +- **GitHub Discussions:** +- **Issue Tracker:** +- **Documentation:** ### Reporting Issues diff --git a/docs/features/dns-auto-detection.md b/docs/features/dns-auto-detection.md index 33786fc8..cdee51fd 100644 --- a/docs/features/dns-auto-detection.md +++ b/docs/features/dns-auto-detection.md @@ -14,6 +14,7 @@ DNS Provider Auto-Detection is an intelligent feature that automatically identif ### When Detection Occurs Auto-detection runs automatically when you: + - Enter a wildcard domain (`*.example.com`) in the proxy host creation form - The domain requires DNS-01 challenge validation for Let's Encrypt SSL certificates @@ -55,6 +56,7 @@ When creating a new proxy host with a wildcard domain: 4. If a match is found, the provider is automatically selected **Visual Indicator**: A detection status badge appears next to the DNS Provider dropdown showing: + - ✓ Provider detected - ⚠ No provider detected - ℹ Multiple nameservers found @@ -166,6 +168,7 @@ The system recognizes the following DNS providers by their nameserver patterns: ### Provider-Specific Examples #### Cloudflare + ``` Nameservers: ns1.cloudflare.com @@ -175,6 +178,7 @@ Detected: cloudflare (High confidence) ``` #### AWS Route 53 + ``` Nameservers: ns-1234.awsdns-12.com @@ -184,6 +188,7 @@ Detected: route53 (High confidence) ``` #### Google Cloud DNS + ``` Nameservers: ns-cloud-a1.googledomains.com @@ -193,6 +198,7 @@ Detected: googleclouddns (High confidence) ``` #### DigitalOcean + ``` Nameservers: ns1.digitalocean.com @@ -240,6 +246,7 @@ For custom or internal nameservers: 4. Configure appropriate API credentials in the DNS Provider settings Example: + ``` Domain: *.corp.internal Nameservers: ns1.corp.internal, ns2.corp.internal @@ -255,11 +262,13 @@ Manual selection required: Select compatible provider or configure custom **Symptom**: Error message "Failed to detect DNS provider" or "Domain not found" **Causes**: + - Domain doesn't exist yet - Domain not propagated to public DNS - DNS resolution blocked by firewall **Solutions**: + - Verify domain exists and is registered - Wait for DNS propagation (up to 48 hours) - Check network connectivity and DNS resolution @@ -270,11 +279,13 @@ Manual selection required: Select compatible provider or configure custom **Symptom**: System detects incorrect provider type **Causes**: + - Domain using DNS proxy/forwarding service - Recent nameserver change not yet propagated - Multiple providers in nameserver list **Solutions**: + - Wait for DNS propagation (up to 24 hours) - Manually override provider selection - Verify nameservers at your domain registrar @@ -285,11 +296,13 @@ Manual selection required: Select compatible provider or configure custom **Symptom**: Detection shows multiple provider types **Causes**: + - Nameservers from different providers (unusual) - DNS migration in progress - Misconfigured nameservers **Solutions**: + - Check nameserver configuration at your registrar - Complete DNS migration to single provider - Manually select the primary/correct provider @@ -300,12 +313,14 @@ Manual selection required: Select compatible provider or configure custom **Symptom**: Provider detected but no matching provider configured in system **Example**: + ``` Detected Provider Type: cloudflare Error: No DNS provider of type 'cloudflare' is configured ``` **Solutions**: + 1. Navigate to **Settings** → **DNS Providers** 2. Click **Add DNS Provider** 3. Select the detected provider type (e.g., Cloudflare) @@ -326,6 +341,7 @@ Error: No DNS provider of type 'cloudflare' is configured **This is expected behavior**. Custom DNS servers don't match public provider patterns. **Solutions**: + 1. Manually select a provider that uses a compatible API 2. If using BIND, PowerDNS, or other custom DNS: - Configure acme.sh or certbot direct integration @@ -342,6 +358,7 @@ Error: No DNS provider of type 'cloudflare' is configured **Cause**: Results cached for 1 hour **Solutions**: + - Wait up to 1 hour for cache to expire - Use **Detect Provider** button for manual detection (bypasses cache) - DNS propagation may also take additional time (separate from caching) @@ -375,6 +392,7 @@ Authorization: Bearer YOUR_API_TOKEN ``` **Parameters**: + - `domain` (required): Full domain name including wildcard (e.g., `*.example.com`) #### Response: Success @@ -395,6 +413,7 @@ Authorization: Bearer YOUR_API_TOKEN ``` **Response Fields**: + - `status`: `"detected"` or `"not_detected"` - `provider_type`: Detected provider type (string) or `null` - `confidence`: `"high"`, `"medium"`, `"low"`, or `"none"` @@ -430,6 +449,7 @@ Authorization: Bearer YOUR_API_TOKEN ``` **HTTP Status Codes**: + - `200 OK`: Detection completed successfully - `400 Bad Request`: Invalid domain format - `401 Unauthorized`: Missing or invalid API token diff --git a/docs/features/dns-autodetection.md b/docs/features/dns-autodetection.md index e770152d..725ee62f 100644 --- a/docs/features/dns-autodetection.md +++ b/docs/features/dns-autodetection.md @@ -11,12 +11,14 @@ DNS Provider Auto-Detection is an intelligent system that automatically identifies which DNS provider manages your domain's nameservers. When configuring wildcard SSL certificates in Charon, you no longer need to manually select your DNS provider—Charon detects it for you in less than a second. **Who Benefits:** + - **Managed Service Providers (MSPs):** Managing multiple customer domains across different DNS providers - **System Administrators:** Setting up wildcard certificates for multiple domains - **DevOps Teams:** Automating certificate provisioning workflows - **Small Businesses:** Simplifying SSL certificate setup without technical expertise **Key Benefits:** + - ⚡ **Instant Detection:** Identifies your DNS provider in 100-200ms - 🎯 **High Accuracy:** Supports 10+ major DNS providers with confidence scoring - ⏱️ **Time Savings:** Reduces setup time from 5-10 minutes to under 30 seconds @@ -37,6 +39,7 @@ DNS auto-detection uses a simple but powerful process: 6. **Manual Override:** You can always override the auto-detected provider **Technical Details:** + - Uses standard DNS NS (nameserver) record lookups - Matches nameserver hostnames against built-in pattern database - Case-insensitive pattern matching for reliability @@ -108,6 +111,7 @@ Charon has built-in detection for these major DNS providers: 4. **Review Detection Result** **High Confidence Example:** + ``` ✓ Cloudflare detected (High confidence) @@ -119,6 +123,7 @@ Charon has built-in detection for these major DNS providers: ``` **Medium/Low Confidence Example:** + ``` ⚠ DigitalOcean detected (Medium confidence) @@ -132,6 +137,7 @@ Charon has built-in detection for these major DNS providers: ``` **No Detection Example:** + ``` ✗ DNS provider not detected @@ -154,6 +160,7 @@ Charon has built-in detection for these major DNS providers: - Click **Save** **Tips:** + - Detection works best with production domains already using their final nameservers - If detection fails, check that your domain's DNS is properly configured - Manual selection is always available as a fallback @@ -223,9 +230,11 @@ curl -X POST https://your-charon-instance/api/v1/dns-providers/detect \ ``` **Request Parameters:** + - `domain` (string, required): Domain to detect (with or without wildcard `*`) **Response Fields:** + - `domain` (string): The base domain that was checked - `detected` (boolean): Whether a provider was successfully identified - `provider_type` (string): Type identifier for the detected provider @@ -267,6 +276,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ ``` **Response Format:** + - `patterns` (object): Map of nameserver patterns to provider type identifiers - Pattern keys are substring matches (case-insensitive) - Provider type values match Charon's DNS provider types @@ -280,6 +290,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ **Scenario:** MSP managing 50+ customer domains across multiple DNS providers **Before Auto-Detection:** + - Manually research DNS provider for each customer domain - Look up nameservers using external tools (`dig`, `nslookup`) - Risk of selecting wrong provider → certificate issuance fails @@ -287,6 +298,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ - Total time for 50 domains: 4-8 hours **With Auto-Detection:** + - Enter customer's wildcard domain - Provider detected automatically in <200ms - One-click to use detected provider @@ -302,6 +314,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ **Scenario:** Service provider managing customers using different DNS providers **Customer Portfolio:** + - `*.customer1.com` → Cloudflare (High confidence) - `*.customer2.com` → Route53 (High confidence) - `*.customer3.com` → DigitalOcean (High confidence) @@ -309,6 +322,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ - `*.customer5.com` → Namecheap (Medium confidence - verify) **Benefits:** + - No need to remember which customer uses which provider - Automatic correct provider suggestion - Confidence levels flag domains needing verification @@ -321,6 +335,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ **Scenario:** Company with domains split across multiple DNS providers **Infrastructure:** + - Production domains (`*.prod.company.com`) → Cloudflare - Development domains (`*.dev.company.com`) → DigitalOcean - Legacy domains (`*.legacy.company.com`) → Namecheap @@ -329,6 +344,7 @@ curl https://your-charon-instance/api/v1/dns-providers/detection-patterns \ **Challenge:** Developers frequently set up new wildcard proxies and forget which DNS provider manages each environment. **Solution:** Auto-detection eliminates guesswork: + - Developers enter domain - Correct provider automatically detected - Zero configuration errors @@ -369,6 +385,7 @@ fi ``` **Benefits:** + - Fully automated provisioning - Self-documenting configuration - Confidence checks prevent misconfiguration @@ -407,6 +424,7 @@ fi **Solutions:** **Check Domain's Nameservers:** + ```bash # Linux/Mac dig NS example.com +short @@ -416,12 +434,14 @@ nslookup -type=NS example.com ``` Expected output: + ``` ns1.cloudflare.com. ns2.cloudflare.com. ``` **Verify Nameserver Propagation:** + ```bash # Check multiple DNS servers dig @8.8.8.8 NS example.com +short @@ -429,16 +449,19 @@ dig @1.1.1.1 NS example.com +short ``` **Wait for DNS Propagation:** + - Initial DNS setup: Up to 48 hours - DNS changes: Up to 24 hours - Check again after propagation completes **Use Manual Provider Selection:** + - Click **Select Manually** button - Choose provider from dropdown - Detection is optional—manual selection always works **Check Network Connectivity:** + ```bash # Test DNS connectivity dig cloudflare.com +short @@ -470,6 +493,7 @@ dig cloudflare.com +short **Solutions:** **Verify Current Nameservers:** + ```bash dig NS example.com +short ``` @@ -477,16 +501,19 @@ dig NS example.com +short Compare with detected nameservers in Charon's result. **Clear Charon's Detection Cache:** + - Cache expires automatically after 1 hour - Wait 60 minutes and try detection again - Or restart Charon to clear in-memory cache **Check DNS Provider Account:** + - Log into your DNS provider's control panel - Verify the nameservers listed there - Compare with Charon's detection result **Use Manual Override:** + - If detection is consistently wrong - Click **Select Manually** - Choose correct provider @@ -499,6 +526,7 @@ Compare with detected nameservers in Charon's result. **Symptom:** "DigitalOcean detected (Medium confidence)" or "Low confidence" **What This Means:** + - Nameserver pattern match is partial or ambiguous - Provider type identified, but match isn't strong - Manual verification recommended before proceeding @@ -555,16 +583,19 @@ the typical pattern. Please verify this is correct. **Symptom:** Detection hangs or takes more than 5 seconds **Possible Causes:** + - DNS server not responding - Network latency or packet loss - Domain's authoritative DNS servers offline **Built-in Protections:** + - Detection timeout: 10 seconds maximum - After timeout, detection fails gracefully - Error message: "DNS lookup timeout" **Solutions:** + - Wait for timeout (max 10 seconds) - Check network connectivity - Verify domain's DNS is operational @@ -577,6 +608,7 @@ the typical pattern. Please verify this is correct. **Symptom:** Detection shows old provider after DNS migration **Explanation:** + - Successful detections cached for 1 hour - Improves performance for repeated requests - May show outdated results during cache window @@ -584,15 +616,18 @@ the typical pattern. Please verify this is correct. **Solutions:** **Wait for Cache Expiration:** + - Cache automatically expires after 1 hour - Try detection again after 60 minutes **Restart Charon:** + - Cache is in-memory (not persistent) - Restarting clears all cached detections - Only necessary if you need immediate refresh **Use Manual Selection:** + - Override cached detection - Select correct provider manually - Detection cache doesn't affect manual selection @@ -629,6 +664,7 @@ Content-Type: application/json | `domain` | string | Yes | Domain to detect (with or without `*.` wildcard) | **Valid Domain Formats:** + - `example.com` → base domain - `*.example.com` → wildcard (auto-stripped to base domain) - `subdomain.example.com` → uses `example.com` for detection @@ -669,6 +705,7 @@ Content-Type: application/json | `error` | string | Error message (only present if detection failed) | **Confidence Scoring:** + - **High (≥80%):** Most nameservers match pattern, strong confidence - **Medium (50-79%):** Some nameservers match, partial confidence - **Low (1-49%):** Few nameservers match, weak confidence @@ -677,6 +714,7 @@ Content-Type: application/json **Error Responses:** **400 Bad Request** - Invalid domain + ```json { "error": "domain is required" @@ -684,6 +722,7 @@ Content-Type: application/json ``` **401 Unauthorized** - Missing or invalid token + ```json { "error": "Unauthorized" @@ -691,6 +730,7 @@ Content-Type: application/json ``` **500 Internal Server Error** - Detection failure + ```json { "domain": "example.com", @@ -702,12 +742,14 @@ Content-Type: application/json ``` **Status Codes:** + - `200 OK` - Detection completed (success or failure) - `400 Bad Request` - Invalid request parameters - `401 Unauthorized` - Authentication required or failed - `500 Internal Server Error` - Unexpected server error **Rate Limiting:** + - Detection results cached for 1 hour - Repeated requests for same domain return cached result - No explicit rate limit (DNS timeout provides natural throttling) @@ -827,11 +869,13 @@ Authorization: Bearer YOUR_TOKEN ``` **Response Format:** + - `patterns` (object): Map of nameserver patterns to provider types - **Keys:** Substring pattern to match in nameserver hostname (case-insensitive) - **Values:** Provider type identifier used in Charon **Pattern Matching:** + - Case-insensitive substring matching - If any nameserver contains pattern, it's a match - Multiple patterns can match the same provider (e.g., Google Cloud DNS) @@ -840,6 +884,7 @@ Authorization: Bearer YOUR_TOKEN **Error Responses:** **401 Unauthorized** - Missing or invalid token + ```json { "error": "Unauthorized" @@ -847,6 +892,7 @@ Authorization: Bearer YOUR_TOKEN ``` **Status Codes:** + - `200 OK` - Patterns returned successfully - `401 Unauthorized` - Authentication required or failed @@ -859,6 +905,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ``` **Use Cases:** + - Building custom detection tools - Debugging detection issues - Understanding which providers are supported @@ -871,6 +918,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ### Detection Speed **Typical Performance:** + - **First Detection:** 100-200ms (includes DNS lookup) - **Cached Detection:** <1ms (from in-memory cache) - **DNS Timeout:** 10 seconds maximum (prevents hanging) @@ -886,6 +934,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ | Network latency | Varies | Between Charon and DNS servers | **Performance Optimization:** + - Results cached for 1 hour - Reduces repeated DNS lookups - Cache hit rate typically 60-80%+ for active domains @@ -907,22 +956,26 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Cache Key:** Base domain (e.g., `example.com`) **Cache Invalidation:** + - Automatic expiration after TTL - No manual invalidation API - Restart Charon to clear all cached entries **Cache Hit Scenarios:** + - Same domain detected multiple times - Multiple wildcard proxies for same domain - Repeated API calls within 1-hour window **Cache Miss Scenarios:** + - First detection for a domain - Cache entry expired (>1 hour old) - Domain's DNS recently changed - Charon restarted **Performance Impact:** + - Cache hit: <1ms response time - Cache miss: 100-200ms response time (DNS lookup required) - Cache reduces DNS query load by ~80% @@ -941,6 +994,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ | DNS timeout | 10 seconds | Per-request maximum | **Recommendations for High-Volume Usage:** + - Deploy Charon with adequate memory (cache can grow) - Consider DNS server location/latency - Monitor cache hit rate for optimization @@ -953,12 +1007,14 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ### Authentication & Authorization **Endpoint Security:** + - All detection endpoints require authentication - Bearer token must be provided in `Authorization` header - Same permission model as DNS provider management - Unauthorized requests return `401 Unauthorized` **Permission Requirements:** + - User must have access to DNS provider features - No special permissions required for detection - Detection doesn't expose sensitive credentials @@ -969,11 +1025,13 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ### Data Privacy **What Charon Collects:** + - ✅ Domain name (from user input) - ✅ Nameserver hostnames (from DNS lookup) - ✅ Detection result (cached for 1 hour) **What Charon Does NOT Collect:** + - ❌ DNS credentials or API keys - ❌ Certificate private keys - ❌ User browsing history @@ -981,12 +1039,14 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ - ❌ Personal identifiable information (beyond domain ownership) **Data Storage:** + - Detection results cached in-memory only - No persistent storage of detection data - Cache cleared on restart - No logging of detected domains (unless debug logging enabled) **Third-Party Access:** + - No data sent to third-party services - DNS lookups go directly to configured DNS resolvers - No analytics or telemetry for detection feature @@ -996,6 +1056,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ### DNS Query Security **Query Characteristics:** + - Standard DNS NS (nameserver) record lookups - Uses system DNS resolver by default - Respects DNS timeout (10 seconds) @@ -1003,12 +1064,14 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ - Read-only DNS operations **Security Measures:** + - DNS timeout prevents hanging on unresponsive servers - No user-controlled DNS servers (uses system config) - Input validation on domain names - Error handling for malformed responses **Network Security:** + - DNS queries over UDP/TCP port 53 - No TLS/HTTPS for DNS (standard DNS protocol) - Consider using DNS-over-HTTPS (DoH) in system resolver for privacy @@ -1029,6 +1092,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ | Credential exposure | Detection doesn't access credentials | None | **Security Best Practices:** + - Use trusted, secure DNS resolvers (e.g., 1.1.1.1, 8.8.8.8) - Enable DNSSEC validation if possible - Monitor detection error rates for anomalies @@ -1043,11 +1107,13 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Limitation:** Currently supports 10 major DNS providers **Impact:** + - Custom DNS providers won't be auto-detected - Niche/regional providers not in pattern database - Self-hosted DNS servers not recognized **Workaround:** + - Use manual provider selection - Request provider pattern addition via GitHub issue - Contribute pattern via pull request @@ -1061,15 +1127,18 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Limitation:** Some hosting providers use shared nameserver pools **Impact:** + - Nameserver patterns may be ambiguous - Detection may suggest incorrect provider - Confidence scoring may be lower **Example:** + - Some resellers use white-labeled nameservers - Shared hosting platforms with generic nameserver names **Workaround:** + - Verify detection result against your account - Use manual selection if detection is incorrect - Report ambiguous patterns for improvement @@ -1081,11 +1150,13 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Limitation:** DNS changes take up to 48 hours to propagate globally **Impact:** + - Detection may show old/outdated provider - Recent migrations not immediately reflected - Newly registered domains may fail detection **Workaround:** + - Wait for DNS propagation to complete - Check nameservers with `dig` or `nslookup` - Use manual selection during migration period @@ -1098,11 +1169,13 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Limitation:** Requires DNS connectivity to function **Impact:** + - Offline/airgapped environments cannot use auto-detection - Network issues cause detection failures - DNS server outages prevent detection **Workaround:** + - Use manual provider selection in offline environments - Ensure DNS connectivity for auto-detection - Detection failure doesn't block manual configuration @@ -1114,11 +1187,13 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Limitation:** Results cached for 1 hour **Impact:** + - Recent DNS changes not immediately reflected - Cache may show outdated information - No manual cache invalidation **Workaround:** + - Wait 60 minutes for cache expiration - Restart Charon to clear cache immediately - Use manual selection to override cached result @@ -1132,11 +1207,13 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ **Limitation:** Only one domain detected at a time **Impact:** + - Cannot detect multiple domains in one request - API requires separate call per domain - Bulk operations require iteration **Workaround:** + - Implement client-side batching - Leverage cache for repeated domains - Use async/parallel API calls @@ -1150,17 +1227,20 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ### 1. Verify Detection Results **Always Review Before Proceeding:** + - ✅ Check detected provider name matches your expectation - ✅ Review nameserver list for accuracy - ✅ Verify confidence level is acceptable - ✅ Compare with your DNS account if uncertain **Why:** + - Detection is not 100% accurate - DNS configuration can be complex - Wrong provider = certificate issuance failure **Example Review Checklist:** + ``` ✓ Provider name: "Cloudflare" ← Correct? ✓ Nameservers: ns1.cloudflare.com ← Recognized? @@ -1183,6 +1263,7 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ | Development | Low/Any | Manual verify | **Why:** + - Production certificate failures are costly - High confidence = strong, unambiguous match - Medium/Low = requires human verification @@ -1192,12 +1273,14 @@ curl https://charon.example.com/api/v1/dns-providers/detection-patterns \ ### 3. Keep Manual Override Available **Always Provide Manual Selection:** + - Don't remove "Select Manually" button - Auto-detection is a convenience, not requirement - Users may know better than detection algorithm - Edge cases always exist **UI Pattern:** + ``` ✓ Cloudflare detected (High confidence) [✓ Use Cloudflare] [Select Manually] ← Keep both options! @@ -1224,6 +1307,7 @@ curl -X POST https://charon-dev.internal/api/v1/dns-providers/detect \ ``` **Benefits:** + - Identify detection issues early - Verify your DNS setup is detectable - Test integration before production use @@ -1233,18 +1317,21 @@ curl -X POST https://charon-dev.internal/api/v1/dns-providers/detect \ ### 5. Monitor Detection Success Rates **Track Metrics:** + - Detection success rate (detected vs. not detected) - Confidence distribution (high/medium/low/none) - Manual override rate (users choosing manual selection) - Detection errors (timeouts, failures) **Use Metrics to:** + - Identify common providers not in database - Detect DNS configuration issues - Improve pattern database - Optimize cache hit rate **Example Monitoring:** + ``` Detection Stats (Last 7 Days): - Total detections: 1,234 @@ -1261,6 +1348,7 @@ Detection Stats (Last 7 Days): **Help Improve Detection:** When detection fails or is incorrect: + 1. ✅ Note the domain (if not sensitive) 2. ✅ Check actual nameservers: `dig NS domain.com +short` 3. ✅ Note expected provider @@ -1268,6 +1356,7 @@ When detection fails or is incorrect: 5. ✅ Report via GitHub issue **Example GitHub Issue:** + ```markdown **Title:** Detection fails for Linode DNS @@ -1283,6 +1372,7 @@ Add pattern: "linode.com" → "linode" ``` **Benefits:** + - Helps other users with same provider - Improves detection accuracy - Expands supported provider list @@ -1298,16 +1388,19 @@ Add pattern: "linode.com" → "linode" - ✅ After 1 hour: Fresh DNS lookup **Considerations:** + - Don't rely on immediate updates after DNS changes - Wait 60 minutes or restart Charon after migration - Cache improves performance—embrace it! **When Cache Matters:** + - DNS provider migration in progress - Testing detection repeatedly - Debugging detection issues **Cache Doesn't Affect:** + - Manual provider selection - Certificate issuance - Existing proxy host configurations @@ -1319,42 +1412,49 @@ Add pattern: "linode.com" → "linode" ### Planned Features **1. Custom Nameserver Pattern Definitions** + - Allow users to add custom provider patterns - Define patterns via Web UI or configuration file - Support for internal/private DNS providers - Pattern validation and testing tools **2. Detection History and Statistics** + - View past detection results - Success/failure rates per provider - Confidence distribution charts - Most common providers in your environment **3. Support for Additional DNS Providers** + - Add more regional providers - Support for niche/specialized DNS services - Community-contributed pattern library - Automatic pattern updates **4. Detection Caching Configuration** + - Configurable cache TTL (currently fixed at 1 hour) - Per-provider cache settings - Manual cache invalidation API - Cache statistics dashboard **5. Batch Domain Detection** + - Detect multiple domains in one API call - Bulk import with auto-detection - CSV upload with detection report - Parallel detection processing **6. Enhanced Confidence Scoring** + - Machine learning-based scoring - Historical accuracy feedback - Provider-specific confidence thresholds - Confidence explanation details **7. Detection Webhooks** + - Notify external systems of detection results - Integrate with automation workflows - Detection event logging @@ -1365,6 +1465,7 @@ Add pattern: "linode.com" → "linode" ### Community Contributions **We Welcome:** + - 🌟 New provider pattern additions - 🐛 Bug reports for incorrect detections - 💡 Feature requests and ideas @@ -1374,6 +1475,7 @@ Add pattern: "linode.com" → "linode" **How to Contribute:** **Add a Provider Pattern:** + ```bash # 1. Fork repository # 2. Edit: backend/internal/services/dns_detection_service.go @@ -1391,11 +1493,13 @@ var BuiltInNameservers = map[string]string{ ``` **Report Detection Issues:** -- GitHub Issues: https://github.com/Wikid82/Charon/issues + +- GitHub Issues: - Label: `enhancement`, `dns-detection` - Provide: Domain example, nameservers, expected provider **Share Use Cases:** + - How are you using auto-detection? - What workflows does it enable? - What features would be helpful? @@ -1405,16 +1509,18 @@ var BuiltInNameservers = map[string]string{ ### Feedback Welcome **Help Us Improve:** + - Share your experience with auto-detection - Report detection accuracy issues - Suggest new provider patterns - Request feature enhancements **Contact:** -- GitHub Issues: https://github.com/Wikid82/Charon/issues -- GitHub Discussions: https://github.com/Wikid82/Charon/discussions -- Documentation: https://docs.charon.example.com -- Community: https://community.charon.example.com + +- GitHub Issues: +- GitHub Discussions: +- Documentation: +- Community: --- @@ -1433,6 +1539,7 @@ var BuiltInNameservers = map[string]string{ ### Version 1.0.0 (January 2026) **Initial Release** + - ✨ DNS provider auto-detection for 10+ major providers - 🚀 Web UI integration with ProxyHost form - 🔌 RESTful API endpoints (`/detect`, `/detection-patterns`) @@ -1445,6 +1552,7 @@ var BuiltInNameservers = map[string]string{ - ♿ Accessibility: ARIA labels, keyboard navigation **Supported Providers:** + - Cloudflare - Amazon Route 53 - DigitalOcean @@ -1457,6 +1565,7 @@ var BuiltInNameservers = map[string]string{ - DNSimple **Technical Details:** + - Pattern-based nameserver matching - Automatic wildcard domain normalization - Thread-safe cache implementation @@ -1503,17 +1612,20 @@ A: Typically 100-200ms for first detection, <1ms for cached results. ## Support **Questions or Issues?** -- 📖 Documentation: https://docs.charon.example.com -- 🐛 GitHub Issues: https://github.com/Wikid82/Charon/issues -- 💬 GitHub Discussions: https://github.com/Wikid82/Charon/discussions -- 👥 Community Forum: https://community.charon.example.com + +- 📖 Documentation: +- 🐛 GitHub Issues: +- 💬 GitHub Discussions: +- 👥 Community Forum: **Feature Requests:** + - Submit via GitHub Issues with label `enhancement` - Describe your use case and desired functionality - Include examples and expected behavior **Bug Reports:** + - Submit via GitHub Issues with label `bug` - Include: Domain (if not sensitive), nameservers, expected vs. actual result - Attach detection API response if available diff --git a/docs/features/key-rotation.md b/docs/features/key-rotation.md index 87e0c5db..112fc5d1 100644 --- a/docs/features/key-rotation.md +++ b/docs/features/key-rotation.md @@ -133,6 +133,7 @@ Every encrypted credential stores its **key version** alongside the ciphertext. - **Rotation tracking**: Verify rotation completed successfully **Example**: + - Before rotation: All 15 DNS providers have `key_version = 1` - After rotation: All 15 DNS providers have `key_version = 2` @@ -153,11 +154,13 @@ CHARON_ENCRYPTION_KEY_V2="OlderK1234567890OlderK1234567890OlderK1==" ``` **Key Format Requirements**: + - **Length**: 32 bytes (before base64 encoding) - **Encoding**: Base64-encoded - **Generation**: Use cryptographically secure random number generator **Generate a new key**: + ```bash # Using OpenSSL openssl rand -base64 32 @@ -182,6 +185,7 @@ node -e "console.log(require('crypto').randomBytes(32).toString('base64'))" ### Permission Requirements **Admin Role Required**: Only users with `role = "admin"` can: + - View encryption status - Trigger key rotation - Validate key configuration @@ -220,6 +224,7 @@ The Encryption Management page includes: **What it shows**: The active key version in use. **Possible values**: + - `Version 1` — Initial key (default state) - `Version 2` — After first rotation - `Version 3+` — After subsequent rotations @@ -241,6 +246,7 @@ The Encryption Management page includes: **Example**: `3 Providers` — Three providers still use legacy keys. **What to check**: + - Should be **0** immediately after successful rotation - If non-zero after rotation, check audit logs for errors @@ -249,6 +255,7 @@ The Encryption Management page includes: **What it shows**: Whether `CHARON_ENCRYPTION_KEY_NEXT` is configured. **Possible values**: + - ✅ **Configured** — Ready for rotation - ❌ **Not Configured** — Cannot rotate (next key not set) @@ -284,6 +291,7 @@ Before rotating keys, ensure: **Action**: Configure `CHARON_ENCRYPTION_KEY_NEXT` environment variable. **Docker Compose Example**: + ```yaml services: charon: @@ -293,6 +301,7 @@ services: ``` **Docker CLI Example**: + ```bash docker run -d \ -e CHARON_ENCRYPTION_KEY="ABcdEF1234567890ABcdEF1234567890ABCDEFGH=" \ @@ -301,6 +310,7 @@ docker run -d \ ``` **Kubernetes Example**: + ```yaml apiVersion: v1 kind: Secret @@ -332,12 +342,14 @@ kubectl rollout restart deployment/charon **What happens**: Charon loads both current and next keys into memory. **Verification**: + ```bash # Check logs for successful startup docker logs charon 2>&1 | grep "encryption" ``` Expected output: + ``` {"level":"info","msg":"Encryption keys loaded: current + next configured"} ``` @@ -347,6 +359,7 @@ Expected output: **Action**: Click **"Validate Configuration"** button in the Encryption Management UI. **Alternative (API)**: + ```bash curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ -H "Authorization: Bearer " @@ -355,6 +368,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ **What happens**: Charon tests round-trip encryption with all configured keys (current, next, legacy). **Success response**: + ```json { "status": "valid", @@ -370,17 +384,20 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ **Action**: Click **"Rotate Encryption Key"** button in the Encryption Management UI. **Confirmation dialog**: + - Review the warning: "This will re-encrypt all DNS provider credentials with the new key. This operation cannot be undone." - Check **"I understand"** checkbox - Click **"Start Rotation"** **Alternative (API)**: + ```bash curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ -H "Authorization: Bearer " ``` **What happens**: + 1. Charon fetches all DNS providers from the database 2. For each provider: - Decrypts credentials with current key @@ -390,6 +407,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ 3. Returns detailed rotation result **Success response**: + ```json { "total_providers": 15, @@ -410,17 +428,20 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ **Action**: Refresh the Encryption Management page. **What to check**: + - ✅ **Current Key Version**: Should now show `Version 2` - ✅ **Providers Updated**: Should show `15 Providers` (your total count) - ✅ **Providers Outdated**: Should show `0 Providers` **Alternative (API)**: + ```bash curl https://your-charon-instance/api/v1/admin/encryption/status \ -H "Authorization: Bearer " ``` **Expected response**: + ```json { "current_version": 2, @@ -439,12 +460,14 @@ curl https://your-charon-instance/api/v1/admin/encryption/status \ **Action**: Update environment variables to make the new key permanent. **Before**: + ```bash CHARON_ENCRYPTION_KEY="ABcdEF1234567890ABcdEF1234567890ABCDEFGH=" # Old key CHARON_ENCRYPTION_KEY_NEXT="XyZaBcDeF1234567890XyZaBcDeF1234567890XY=" # New key ``` **After**: + ```bash CHARON_ENCRYPTION_KEY="XyZaBcDeF1234567890XyZaBcDeF1234567890XY=" # New key (promoted) CHARON_ENCRYPTION_KEY_V1="ABcdEF1234567890ABcdEF1234567890ABCDEFGH=" # Old key (kept as legacy) @@ -464,11 +487,13 @@ docker-compose restart charon **What happens**: Charon now uses the new key for future encryptions and keeps the old key for fallback. **Verification**: + ```bash docker logs charon 2>&1 | grep "encryption" ``` Expected output: + ``` {"level":"info","msg":"Encryption keys loaded: current + 1 legacy keys"} ``` @@ -484,16 +509,19 @@ Expected output: ### Monitoring Rotation Progress **During rotation**: + - The UI shows a loading overlay with "Rotating..." message - The rotation button is disabled - You'll see a progress toast notification **After rotation**: + - Success toast appears with provider count and duration - Status cards update immediately - Audit log entry is created **If rotation takes longer than expected**: + - Check the backend logs: `docker logs charon -f` - Look for errors like "Failed to decrypt provider X credentials" - See [Troubleshooting](#troubleshooting) section @@ -505,6 +533,7 @@ Expected output: ### Why Validate? Validation tests that all configured keys work correctly **before** triggering rotation. This prevents: + - ❌ Broken keys being used for rotation - ❌ Credentials becoming inaccessible - ❌ Failed rotations due to corrupted keys @@ -512,6 +541,7 @@ Validation tests that all configured keys work correctly **before** triggering r ### When to Validate Run validation: + - ✅ **Before** every key rotation - ✅ **After** changing environment variables - ✅ **After** restoring from backup @@ -520,11 +550,13 @@ Run validation: ### How to Validate **Via UI**: + 1. Go to **Security** → **Encryption Management** 2. Click **"Validate Configuration"** button 3. Wait for validation to complete (usually < 1 second) **Via API**: + ```bash curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ -H "Authorization: Bearer " @@ -554,6 +586,7 @@ Charon performs round-trip encryption for each configured key: **UI**: Green success toast: "Key configuration is valid and ready for rotation" **API Response**: + ```json { "status": "valid", @@ -572,6 +605,7 @@ Charon performs round-trip encryption for each configured key: **UI**: Red error toast: "Key configuration validation failed. Check errors below." **API Response**: + ```json { "status": "invalid", @@ -587,6 +621,7 @@ Charon performs round-trip encryption for each configured key: ``` **Common errors**: + - `"decryption failed"` — Key is corrupted or not base64-encoded correctly - `"key too short"` — Key is not 32 bytes after base64 decoding - `"invalid base64"` — Key contains invalid base64 characters @@ -596,6 +631,7 @@ Charon performs round-trip encryption for each configured key: **Error**: `"next_key: decryption failed"` **Fix**: + 1. Regenerate the next key: `openssl rand -base64 32` 2. Update `CHARON_ENCRYPTION_KEY_NEXT` environment variable 3. Restart Charon @@ -604,6 +640,7 @@ Charon performs round-trip encryption for each configured key: **Error**: `"key too short"` **Fix**: + 1. Ensure you're generating 32 bytes: `openssl rand -base64 32` (not `openssl rand 32`) 2. Verify base64 encoding is correct 3. Update environment variable @@ -612,6 +649,7 @@ Charon performs round-trip encryption for each configured key: **Error**: `"invalid base64"` **Fix**: + 1. Check for extra whitespace or newlines in the key 2. Ensure the key is properly quoted in docker-compose.yml 3. Re-copy the key carefully @@ -625,11 +663,13 @@ Charon performs round-trip encryption for each configured key: ### Accessing Audit History **Via UI**: + 1. Go to **Security** → **Encryption Management** 2. Scroll to the **Rotation History** section at the bottom 3. View paginated list of rotation events **Via API**: + ```bash curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit=20" \ -H "Authorization: Bearer " @@ -646,6 +686,7 @@ Charon logs the following encryption-related audit events: **When**: Immediately when rotation is triggered **Details**: + ```json { "timestamp": "2026-01-04T10:00:00Z", @@ -666,6 +707,7 @@ Charon logs the following encryption-related audit events: **When**: After all providers are successfully re-encrypted **Details**: + ```json { "timestamp": "2026-01-04T10:00:02Z", @@ -688,6 +730,7 @@ Charon logs the following encryption-related audit events: **When**: If rotation encounters critical errors **Details**: + ```json { "timestamp": "2026-01-04T10:05:00Z", @@ -709,6 +752,7 @@ Charon logs the following encryption-related audit events: **When**: After successful validation **Details**: + ```json { "timestamp": "2026-01-04T09:55:00Z", @@ -728,6 +772,7 @@ Charon logs the following encryption-related audit events: **When**: If validation detects issues **Details**: + ```json { "timestamp": "2026-01-04T09:50:00Z", @@ -742,6 +787,7 @@ Charon logs the following encryption-related audit events: ### Filtering History **By page**: + ```bash curl "https://your-charon-instance/api/v1/admin/encryption/history?page=2&limit=10" ``` @@ -751,6 +797,7 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=2&limit= ### Exporting History **Via API** (JSON): + ```bash curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit=1000" \ -H "Authorization: Bearer " \ @@ -775,16 +822,19 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= ### Key Retention Policies **Legacy Key Retention**: + - ✅ Keep legacy keys for **at least 30 days** after rotation - ✅ Extend to **90 days** for high-risk environments - ✅ Never delete legacy keys immediately after rotation **Why**: + - Allows rollback if issues are discovered - Supports disaster recovery from old backups - Provides time to verify rotation success **After Retention Period**: + 1. Verify no issues occurred during retention window 2. Remove legacy key from environment variables 3. Restart Charon to apply changes @@ -793,24 +843,30 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= ### Backup Procedures **Before Every Rotation**: + 1. **Backup the database**: + ```bash docker exec charon_db pg_dump -U charon charon_db > backup_before_rotation_$(date +%Y%m%d).sql ``` 2. **Backup environment variables**: + ```bash cp docker-compose.yml docker-compose.yml.backup_$(date +%Y%m%d) ``` 3. **Test backup restoration**: + ```bash # Restore database docker exec -i charon_db psql -U charon charon_db < backup_before_rotation_20260104.sql ``` **After Rotation**: + 1. **Backup the new state**: + ```bash docker exec charon_db pg_dump -U charon charon_db > backup_after_rotation_$(date +%Y%m%d).sql ``` @@ -823,6 +879,7 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= ### Testing in Staging First **Before rotating production keys**: + 1. ✅ Deploy exact production configuration to staging 2. ✅ Perform full rotation in staging 3. ✅ Verify all DNS providers still work @@ -832,6 +889,7 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= 7. ✅ Apply same procedure to production **Staging checklist**: + - [ ] Same Charon version as production - [ ] Same number of DNS providers - [ ] Same encryption key length and format @@ -847,17 +905,21 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Scenario**: Rotation just completed but providers are failing. **Steps**: + 1. **Restore database from pre-rotation backup**: + ```bash docker exec -i charon_db psql -U charon charon_db < backup_before_rotation_20260104.sql ``` 2. **Revert environment variables**: + ```bash cp docker-compose.yml.backup_20260104 docker-compose.yml ``` 3. **Restart Charon**: + ```bash docker-compose restart charon ``` @@ -872,7 +934,9 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Scenario**: Issues discovered hours or days after rotation. **Steps**: + 1. **Keep new key as legacy**: + ```bash CHARON_ENCRYPTION_KEY="" # Revert to old key CHARON_ENCRYPTION_KEY_V2="" # Keep new key as legacy @@ -893,24 +957,28 @@ If rotation fails or issues are discovered, follow this rollback procedure: ### Security Considerations **Key Storage**: + - ❌ **NEVER** commit keys to version control - ✅ Use environment variables or secrets manager - ✅ Restrict access to key values (need-to-know basis) - ✅ Audit access to secrets manager **Key Generation**: + - ✅ Always use cryptographically secure RNG (`openssl`, `secrets`, `crypto`) - ❌ Never use predictable sources (`date`, `rand()`, keyboard mashing) - ✅ Generate keys on secure, trusted systems - ✅ Never reuse keys across environments (prod vs staging) **Key Transmission**: + - ✅ Use encrypted channels (SSH, TLS) to transmit keys - ❌ Never send keys via email, Slack, or unencrypted chat - ✅ Use secrets managers with RBAC (e.g., Vault, AWS Secrets Manager) - ✅ Rotate keys immediately if transmission is compromised **Access Control**: + - ✅ Limit key rotation to admin users only - ✅ Require MFA for admin accounts - ✅ Audit all key-related operations @@ -927,11 +995,13 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Symptom**: "Rotate Encryption Key" button is grayed out. **Possible causes**: + 1. ❌ Next key not configured 2. ❌ Not logged in as admin 3. ❌ Rotation already in progress **Solution**: + 1. Check **Next Key Status** — should show "Configured" 2. Verify you're logged in as admin (check user menu) 3. Wait for in-progress rotation to complete @@ -942,12 +1012,15 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Symptom**: Toast shows "Warning: 3 providers failed to rotate." **Possible causes**: + 1. ❌ Corrupted credentials in database 2. ❌ Missing key versions 3. ❌ Database transaction errors **Solution**: + 1. **Check audit logs** for specific errors: + ```bash curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1" \ -H "Authorization: Bearer " @@ -972,12 +1045,15 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Symptom**: After promoting next key, Charon won't start or credentials fail. **Error log**: + ``` {"level":"fatal","msg":"CHARON_ENCRYPTION_KEY not set"} ``` **Solution**: + 1. **Check environment variables**: + ```bash docker exec charon env | grep CHARON_ENCRYPTION ``` @@ -988,6 +1064,7 @@ If rotation fails or issues are discovered, follow this rollback procedure: - Verify base64 encoding is correct 3. **Restart with corrected config**: + ```bash docker-compose down docker-compose up -d @@ -998,20 +1075,24 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Symptom**: Status shows "Providers Outdated: 15" even after rotation. **Possible causes**: + 1. ❌ Rotation didn't complete successfully 2. ❌ Database rollback occurred 3. ❌ Frontend cache showing stale data **Solution**: + 1. **Refresh the page** (hard refresh: Ctrl+Shift+R) 2. **Check API directly**: + ```bash curl https://your-charon-instance/api/v1/admin/encryption/status \ -H "Authorization: Bearer " ``` 3. **Verify database state**: + ```sql SELECT key_version, COUNT(*) FROM dns_providers GROUP BY key_version; ``` @@ -1025,17 +1106,20 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Error**: `"v1: decryption failed"` **Possible causes**: + 1. ❌ Key was changed accidentally 2. ❌ Key is corrupted 3. ❌ Wrong key assigned to V1 **Solution**: + 1. **Identify the correct key**: - Check your key rotation history - Review backup files - Consult secrets manager logs 2. **Update environment variable**: + ```bash CHARON_ENCRYPTION_KEY_V1="" ``` @@ -1052,27 +1136,33 @@ If rotation fails or issues are discovered, follow this rollback procedure: **Symptom**: Rotation running for > 5 minutes with many providers. **Expected duration**: + - 1-10 providers: < 5 seconds - 10-50 providers: < 30 seconds - 50-100 providers: < 2 minutes **Possible causes**: + 1. ❌ Database performance issues 2. ❌ Database locks or contention 3. ❌ Network issues (if database is remote) **Solution**: + 1. **Check backend logs**: + ```bash docker logs charon -f | grep "rotation" ``` 2. **Look for slow queries**: + ```bash docker logs charon | grep "slow query" ``` 3. **Check database health**: + ```bash docker exec charon_db pg_stat_activity ``` @@ -1086,11 +1176,13 @@ If rotation fails or issues are discovered, follow this rollback procedure: If you encounter issues not covered here: 1. **Check the logs**: + ```bash docker logs charon -f ``` 2. **Enable debug logging** (if needed): + ```yaml environment: - LOG_LEVEL=debug @@ -1123,12 +1215,14 @@ All encryption management endpoints require **admin authentication**. **Authentication**: Required (admin only) **Request**: + ```bash curl https://your-charon-instance/api/v1/admin/encryption/status \ -H "Authorization: Bearer " ``` **Success Response** (HTTP 200): + ```json { "current_version": 2, @@ -1143,6 +1237,7 @@ curl https://your-charon-instance/api/v1/admin/encryption/status \ ``` **Response Fields**: + - `current_version` (int): Active key version (1, 2, 3, etc.) - `next_key_configured` (bool): Whether `CHARON_ENCRYPTION_KEY_NEXT` is set - `legacy_key_count` (int): Number of legacy keys (V1-V10) configured @@ -1151,6 +1246,7 @@ curl https://your-charon-instance/api/v1/admin/encryption/status \ - `providers_on_older_versions` (int): Count needing rotation **Error Responses**: + - **401 Unauthorized**: Missing or invalid token - **403 Forbidden**: Non-admin user - **500 Internal Server Error**: Database or encryption service error @@ -1166,10 +1262,12 @@ curl https://your-charon-instance/api/v1/admin/encryption/status \ **Authentication**: Required (admin only) **Prerequisites**: + - `CHARON_ENCRYPTION_KEY_NEXT` must be configured - Application must be restarted to load next key **Request**: + ```bash curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ -H "Authorization: Bearer " \ @@ -1177,6 +1275,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ ``` **Success Response** (HTTP 200): + ```json { "total_providers": 15, @@ -1191,6 +1290,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ ``` **Partial Success Response** (HTTP 200): + ```json { "total_providers": 15, @@ -1205,6 +1305,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ ``` **Response Fields**: + - `total_providers` (int): Total DNS providers in database - `success_count` (int): Providers successfully re-encrypted - `failure_count` (int): Providers that failed re-encryption @@ -1215,17 +1316,21 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ - `new_key_version` (int): New key version after rotation **Error Responses**: + - **400 Bad Request**: `CHARON_ENCRYPTION_KEY_NEXT` not configured + ```json { "error": "Next key not configured. Set CHARON_ENCRYPTION_KEY_NEXT and restart." } ``` + - **401 Unauthorized**: Missing or invalid token - **403 Forbidden**: Non-admin user - **500 Internal Server Error**: Critical failure during rotation **Audit Events Created**: + - `encryption_key_rotation_started` — When rotation begins - `encryption_key_rotation_completed` — When rotation succeeds - `encryption_key_rotation_failed` — When rotation fails @@ -1241,6 +1346,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ **Authentication**: Required (admin only) **Request**: + ```bash curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ -H "Authorization: Bearer " \ @@ -1248,6 +1354,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ ``` **Success Response** (HTTP 200): + ```json { "status": "valid", @@ -1264,6 +1371,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ ``` **Failure Response** (HTTP 400): + ```json { "status": "invalid", @@ -1279,6 +1387,7 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ ``` **Response Fields**: + - `status` (string): `"valid"` or `"invalid"` - `keys_tested` (int): Total keys tested - `message` (string): Human-readable summary @@ -1286,11 +1395,13 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ - `errors` (array): List of validation errors (if any) **Error Responses**: + - **401 Unauthorized**: Missing or invalid token - **403 Forbidden**: Non-admin user - **500 Internal Server Error**: Validation service error **Audit Events Created**: + - `encryption_key_validation_success` — When validation passes - `encryption_key_validation_failed` — When validation fails @@ -1305,16 +1416,19 @@ curl -X POST https://your-charon-instance/api/v1/admin/encryption/validate \ **Authentication**: Required (admin only) **Query Parameters**: + - `page` (int, optional): Page number (default: 1) - `limit` (int, optional): Results per page (default: 20, max: 100) **Request**: + ```bash curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit=20" \ -H "Authorization: Bearer " ``` **Success Response** (HTTP 200): + ```json { "events": [ @@ -1355,6 +1469,7 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= ``` **Response Fields**: + - `events` (array): List of audit log entries - `id` (int): Audit log entry ID - `timestamp` (string): ISO 8601 timestamp @@ -1369,6 +1484,7 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= - `total_pages` (int): Total pages available **Error Responses**: + - **400 Bad Request**: Invalid page or limit parameter - **401 Unauthorized**: Missing or invalid token - **403 Forbidden**: Non-admin user @@ -1381,6 +1497,7 @@ curl "https://your-charon-instance/api/v1/admin/encryption/history?page=1&limit= All encryption management endpoints use **Bearer token authentication**. **Obtaining a token**: + ```bash # Login to get token curl -X POST https://your-charon-instance/api/v1/auth/login \ @@ -1402,6 +1519,7 @@ curl -X POST https://your-charon-instance/api/v1/auth/login \ ``` **Using the token**: + ```bash curl https://your-charon-instance/api/v1/admin/encryption/status \ -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIs..." @@ -1436,6 +1554,7 @@ Encryption management endpoints are not rate-limited by default, but general API ## Summary Encryption key rotation is a critical security practice that Charon makes easy with: + - ✅ **Zero-downtime rotation** — Services remain available throughout the process - ✅ **Multi-key support** — Current + next + legacy keys coexist seamlessly - ✅ **Admin-friendly UI** — No command-line expertise required @@ -1444,6 +1563,7 @@ Encryption key rotation is a critical security practice that Charon makes easy w - ✅ **Validation tools** — Test keys before using them **Next Steps**: + 1. Review your organization's key rotation policy 2. Schedule your first rotation (test in staging first!) 3. Set a recurring reminder for future rotations diff --git a/docs/features/multi-credential.md b/docs/features/multi-credential.md index effaab68..c6333e2d 100644 --- a/docs/features/multi-credential.md +++ b/docs/features/multi-credential.md @@ -9,12 +9,14 @@ Multi-Credential per Provider is an advanced feature that allows you to configur ### Why Use Multi-Credentials? **Security Benefits:** + - **Zone-level Isolation**: Compromise of one credential doesn't expose all your domains - **Least Privilege Principle**: Each credential can have minimal permissions for only the zones it manages - **Independent Rotation**: Rotate credentials for specific zones without affecting others - **Audit Trail**: Track which credentials were used for certificate operations **Business Use Cases:** + - **Managed Service Providers (MSPs)**: Use separate customer-specific credentials for each client's domains - **Large Enterprises**: Isolate credentials by department, environment, or geographic region - **Multi-Tenant Platforms**: Provide credential isolation between tenants @@ -63,6 +65,7 @@ Multi-Credential Mode: ### Decision Criteria **Use Multi-Credentials When:** + - You manage domains for multiple customers or tenants - You need credential isolation for security or compliance - Different teams or departments manage different zones @@ -71,6 +74,7 @@ Multi-Credential Mode: - You need independent credential rotation schedules **Use Single Credential When:** + - You manage a small number of domains under one organization - All domains have the same security requirements - Simpler management is preferred over isolation @@ -108,6 +112,7 @@ Multi-Credential Mode: - A confirmation dialog will appear 3. **Review Migration Impact** + ``` ⚠️ IMPORTANT: This action will: - Convert your existing provider credential into a "catch-all" credential @@ -181,6 +186,7 @@ Click the **Add Credential** button in the credential management interface. The zone filter determines which domains this credential will be used for: **Option 1: Exact Domain Match** + ``` Zone Filter: example.com Matches: example.com, www.example.com, api.example.com @@ -188,6 +194,7 @@ Does NOT Match: subdomain.example.com.au, example.org ``` **Option 2: Wildcard Match** + ``` Zone Filter: *.customer-a.com Matches: shop.customer-a.com, api.customer-a.com, *.customer-a.com @@ -195,6 +202,7 @@ Does NOT Match: customer-a.com (root), customer-b.com ``` **Option 3: Multiple Zones (Comma-Separated)** + ``` Zone Filter: example.com,api.example.org,*.dev.example.net Matches: @@ -204,6 +212,7 @@ Matches: ``` **Option 4: Catch-All (Empty Filter)** + ``` Zone Filter: (leave empty) Matches: Any domain not matched by other credentials @@ -245,6 +254,7 @@ Use Case: Fallback credential for miscellaneous domains #### Validation Rules When saving a credential, Charon validates: + - ✅ Zone filter syntax is correct - ✅ No duplicate exact matches across credentials - ⚠️ Warning if multiple wildcard patterns could overlap @@ -258,6 +268,7 @@ When saving a credential, Charon validates: 4. Click **Save Changes** **⚠️ Important Notes:** + - Changing zone filters may affect which credential is used for existing proxy hosts - Charon will re-evaluate credential matching for all proxy hosts after the change - Consider testing in a non-production environment first if making significant changes @@ -309,18 +320,21 @@ When Charon needs to issue or renew a certificate for a domain, it selects a cre Credentials are evaluated in this order: **1. Exact Match (Highest Priority)** + ``` Zone Filter: example.com Domain: www.example.com → Zone: example.com → ✅ MATCH ``` **2. Wildcard Match** + ``` Zone Filter: *.customer-a.com Domain: shop.customer-a.com → Zone: customer-a.com → ✅ MATCH (after exact check fails) ``` **3. Catch-All (Lowest Priority)** + ``` Zone Filter: (empty) Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildcard matches) @@ -331,6 +345,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca #### Example 1: MSP with Multiple Customers **Configured Credentials:** + ``` 1. Name: "Customer A Production" Zone Filter: *.customer-a.com @@ -346,6 +361,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca ``` **Domain Matching:** + - `shop.customer-a.com` → Credential 1 ("Customer A Production") - `api.customer-b.com` → Credential 2 ("Customer B Production") - `example.com` → Credential 3 ("Catch-all") @@ -354,6 +370,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca #### Example 2: Environment Separation **Configured Credentials:** + ``` 1. Name: "Production" Zone Filter: example.com @@ -369,6 +386,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca ``` **Domain Matching:** + - `www.example.com` → Credential 1 ("Production") - `api.example.com` → Credential 1 ("Production") - `app.staging.example.com` → Credential 2 ("Staging") @@ -378,6 +396,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca #### Example 3: Geographic Separation **Configured Credentials:** + ``` 1. Name: "US Zones" Zone Filter: *.us.example.com @@ -393,6 +412,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca ``` **Domain Matching:** + - `shop.us.example.com` → Credential 1 ("US Zones") - `api.eu.example.com` → Credential 2 ("EU Zones") - `portal.apac.example.com` → Credential 3 ("APAC Zones") @@ -405,6 +425,7 @@ Domain: anything.com → Zone: anything.com → ✅ MATCH (if no exact or wildca If multiple credentials could match the same zone, Charon uses **first match** based on priority order: **Example:** + ``` Credential A: Zone Filter: example.com (Exact) Credential B: Zone Filter: *.example.com (Wildcard) @@ -424,10 +445,12 @@ Match Process: #### Issue: Domain doesn't match any credential **Symptoms:** + - Certificate issuance fails with "No matching credential for zone" - Error message: `No credential found for provider 'cloudflare' and zone 'example.com'` **Solutions:** + 1. **Add a catch-all credential**: Create a credential with an empty zone filter 2. **Add specific credential**: Create a credential with zone filter matching your domain 3. **Check zone extraction**: Ensure Charon is correctly extracting the zone from your domain @@ -435,10 +458,12 @@ Match Process: #### Issue: Wrong credential is being used **Symptoms:** + - Expected credential "Production" but "Catch-all" is being used - Certificate issued but not with the intended credential **Solutions:** + 1. **Check zone filter syntax**: Verify your zone filters are correctly configured 2. **Check priority order**: Exact matches override wildcards; ensure your exact match is configured 3. **Review audit logs**: Check which credential was actually selected and why @@ -446,10 +471,12 @@ Match Process: #### Issue: Zone filter validation error **Symptoms:** + - Error: "Invalid zone filter format" - Error: "Zone filter 'example.com' conflicts with existing credential" **Solutions:** + 1. **Check syntax**: Ensure no spaces, only commas separating patterns 2. **Check for duplicates**: Ensure no two credentials have the exact same zone filter pattern 3. **Review wildcard syntax**: Wildcards must be `*.domain.com`, not `*domain.com` @@ -492,8 +519,10 @@ When you create a proxy host with multi-credential mode enabled: ### Viewing Which Credential Was Used **Method 1: Proxy Host Details** + 1. Open the proxy host from the dashboard 2. In the **SSL/TLS** section, look for: + ``` Certificate: Active (Expires: 2026-04-01) Credential Used: Customer A Production (Cloudflare) @@ -501,9 +530,11 @@ When you create a proxy host with multi-credential mode enabled: ``` **Method 2: Audit Logs** + 1. Go to **Settings** → **Audit Logs** 2. Filter by: `Action: certificate_issued` or `Action: certificate_renewed` 3. View log entry: + ```json { "timestamp": "2026-01-02T14:30:00Z", @@ -518,6 +549,7 @@ When you create a proxy host with multi-credential mode enabled: ``` **Method 3: Credential Statistics** + 1. Go to **Settings** → **DNS Providers** → **Manage Credentials** 2. Each credential shows: - **Usage Count**: Number of domains using this credential @@ -529,12 +561,14 @@ When you create a proxy host with multi-credential mode enabled: #### Issue: Certificate issuance fails with "No matching credential" **Error Message:** + ``` Failed to issue certificate for shop.customer-a.com: No credential found for provider 'cloudflare' and zone 'customer-a.com' ``` **Solution:** + 1. Check DNS provider has multi-credential enabled 2. Verify a credential exists with zone filter matching `customer-a.com` 3. Add a credential with zone filter: `*.customer-a.com` or use catch-all @@ -542,12 +576,14 @@ No credential found for provider 'cloudflare' and zone 'customer-a.com' #### Issue: Certificate issuance fails with "API authentication failed" **Error Message:** + ``` Failed to issue certificate for shop.customer-a.com: Cloudflare API returned 403: Invalid credentials ``` **Solution:** + 1. Test the credential being used: **Manage Credentials** → **Test** 2. Verify API token/key is still valid in your DNS provider dashboard 3. Check API token has correct permissions (`Zone:DNS:Edit`) @@ -556,10 +592,12 @@ Cloudflare API returned 403: Invalid credentials #### Issue: Wrong credential is being used **Symptoms:** + - Certificate issued successfully but with unexpected credential - Audit logs show different credential than expected **Solution:** + 1. Review zone filter configuration for all credentials 2. Check priority order (exact > wildcard > catch-all) 3. Ensure your expected credential has the most specific zone filter @@ -572,6 +610,7 @@ Cloudflare API returned 403: Invalid credentials **Recommended Naming Patterns:** **By Customer/Tenant:** + ``` - "Customer A - Production" - "Customer B - Staging" @@ -579,6 +618,7 @@ Cloudflare API returned 403: Invalid credentials ``` **By Environment:** + ``` - "Production Zones" - "Staging Zones" @@ -586,6 +626,7 @@ Cloudflare API returned 403: Invalid credentials ``` **By Department:** + ``` - "Marketing - example.com" - "Engineering - api.example.com" @@ -593,6 +634,7 @@ Cloudflare API returned 403: Invalid credentials ``` **By Geography:** + ``` - "US East Zones" - "EU West Zones" @@ -606,6 +648,7 @@ Cloudflare API returned 403: Invalid credentials **Use Case:** Small number of high-value domains **Example:** + ``` Credential: "example.com Primary" Zone Filter: example.com @@ -615,11 +658,13 @@ Zone Filter: api.example.org ``` **Pros:** + - Maximum specificity - Easy to understand - Clear audit trail **Cons:** + - Requires one credential per domain - Not scalable for many domains @@ -628,6 +673,7 @@ Zone Filter: api.example.org **Use Case:** Logical grouping of subdomains **Example:** + ``` Credential: "Customer Zones" Zone Filter: *.customers.example.com @@ -637,11 +683,13 @@ Zone Filter: *.internal.example.com ``` **Pros:** + - Scalable for many subdomains - Clear organizational boundaries - Reduces credential count **Cons:** + - Broader scope than exact match - Requires careful namespace planning @@ -650,6 +698,7 @@ Zone Filter: *.internal.example.com **Use Case:** Most common for production deployments **Example:** + ``` 1. Exact matches for critical domains: - "Production Root" → example.com @@ -664,11 +713,13 @@ Zone Filter: *.internal.example.com ``` **Pros:** + - Balance of specificity and scalability - Flexible and maintainable - Handles edge cases **Cons:** + - More credentials to manage - Requires understanding of priority order @@ -718,6 +769,7 @@ Zone Filter: *.internal.example.com ### Viewing Credential Usage Statistics **Dashboard View:** + 1. Navigate to **Settings** → **DNS Providers** 2. For each provider with multi-credential enabled, click **View Statistics** 3. Dashboard shows: @@ -727,6 +779,7 @@ Zone Filter: *.internal.example.com - Top credentials by usage **Per-Credential View:** + 1. Go to **Settings** → **DNS Providers** → **Manage Credentials** 2. Each credential displays: @@ -756,6 +809,7 @@ Zone Filter: *.internal.example.com **Success Rate**: Percentage of successful operations (Success / (Success + Failure) × 100%) **⚠️ Low Success Rate Alert:** + - If success rate drops below 90%, investigate immediately - Common causes: expired API token, insufficient permissions, DNS provider API issues - Click **Test Credential** to diagnose @@ -769,6 +823,7 @@ Zone Filter: *.internal.example.com **Last Failure**: Timestamp of the most recent failed operation **Use Cases:** + - **Identify Unused Credentials**: If "Last Used" is > 90 days ago, consider removing - **Credential Rotation**: Track when credentials were last active - **Incident Response**: Correlate failures with outages or credential changes @@ -776,6 +831,7 @@ Zone Filter: *.internal.example.com ### Audit Trail for Credential Operations **Viewing Audit Logs:** + 1. Go to **Settings** → **Audit Logs** 2. Filter by: - **Action Type**: `credential_created`, `credential_updated`, `credential_deleted`, `certificate_issued`, `certificate_renewed` @@ -783,6 +839,7 @@ Zone Filter: *.internal.example.com - **User**: Filter by who performed the action **Log Entry Example:** + ```json { "timestamp": "2026-01-04T15:30:00Z", @@ -802,6 +859,7 @@ Zone Filter: *.internal.example.com ``` **Exported Logs:** + - Export to CSV or JSON for external analysis - Integrate with SIEM (Security Information and Event Management) systems - Use for compliance reporting and security audits @@ -813,11 +871,13 @@ Zone Filter: *.internal.example.com #### Issue 1: No matching credential for domain **Symptoms:** + - Certificate issuance fails - Error: `No credential found for provider 'cloudflare' and zone 'example.com'` - Proxy host shows certificate status: "Failed" **Root Causes:** + 1. No credential configured for the DNS zone 2. Zone filter doesn't match the domain's zone 3. Multi-credential mode not enabled @@ -825,11 +885,13 @@ Zone Filter: *.internal.example.com **Solutions:** **Step 1: Verify Multi-Credential Mode is Enabled** + ``` Settings → DNS Providers → Check "Multi-Credential Mode: Enabled" ``` **Step 2: Check Existing Credentials** + ``` Settings → DNS Providers → Manage Credentials Review zone filters for all credentials @@ -838,18 +900,21 @@ Review zone filters for all credentials **Step 3: Add Missing Credential or Catch-All** **Option A: Add Specific Credential** + ``` Credential Name: example.com Production Zone Filter: example.com ``` **Option B: Add Catch-All** + ``` Credential Name: Catch-All Zone Filter: (leave empty) ``` **Step 4: Retry Certificate Issuance** + ``` Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate ``` @@ -857,11 +922,13 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate #### Issue 2: Certificate issuance fails with API error **Symptoms:** + - Certificate issuance fails - Error: `Cloudflare API returned 403: Invalid credentials` or similar - Credential test fails **Root Causes:** + 1. API token/key expired or revoked 2. Insufficient API permissions 3. DNS provider account suspended @@ -870,30 +937,36 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate **Solutions:** **Step 1: Test the Credential** + ``` Settings → DNS Providers → Manage Credentials → Click "Test" next to credential ``` **Step 2: Check API Token Validity** + - Log in to your DNS provider dashboard (e.g., Cloudflare) - Navigate to API Tokens - Verify token is active and not expired - Check token permissions: `Zone:DNS:Edit` permission required **Step 3: Regenerate API Token** + - Generate new API token at DNS provider - Update credential in Charon: + ``` Settings → DNS Providers → Manage Credentials → Edit → Update API credentials → Test → Save ``` **Step 4: Check API Rate Limits** + - Review DNS provider's rate limit documentation - Check if you've exceeded API quotas - Wait for rate limit to reset (typically hourly) - Consider spreading certificate operations over time **Step 5: Retry Certificate Issuance** + ``` Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate ``` @@ -901,11 +974,13 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate #### Issue 3: Zone filter validation error **Symptoms:** + - Cannot save credential - Error: `Invalid zone filter format: 'example..com'` - Error: `Zone filter 'example.com' conflicts with existing credential` **Root Causes:** + 1. Syntax error in zone filter (typo, invalid characters) 2. Duplicate zone filter across multiple credentials 3. Conflicting exact and wildcard patterns @@ -915,6 +990,7 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate **Step 1: Check Syntax** **Valid Formats:** + ``` ✅ example.com ✅ *.customer-a.com @@ -924,6 +1000,7 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate ``` **Invalid Formats:** + ``` ❌ example..com (double dot) ❌ example.com. (trailing dot) @@ -933,25 +1010,30 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate ``` **Step 2: Check for Duplicates** + - Review all credentials for the provider - Ensure no two credentials have the exact same zone filter pattern - If duplicate found, edit one credential to have a different zone filter **Step 3: Resolve Conflicts** + - If you have both `example.com` and `*.example.com`, this is allowed but may cause confusion - Ensure you understand priority order: exact match takes precedence **Step 4: Save Again** + - After fixing syntax/duplicates, click **Save Credential** #### Issue 4: Wrong credential is being used **Symptoms:** + - Certificate issued successfully but audit logs show unexpected credential - Credential statistics don't match expectations - Security/compliance concern about which credential was used **Root Causes:** + 1. Zone filter misconfiguration (too broad or too narrow) 2. Misunderstanding of zone matching priority 3. Overlapping patterns causing unexpected matches @@ -959,6 +1041,7 @@ Proxy Hosts → Select proxy host → SSL/TLS → Renew Certificate **Solutions:** **Step 1: Review Zone Matching Logic** + ``` Priority Order: 1. Exact match: example.com @@ -967,11 +1050,13 @@ Priority Order: ``` **Step 2: Check Zone Extraction** + - For domain `shop.customer-a.com`, zone is `customer-a.com` - For domain `www.example.com`, zone is `example.com` - For domain `api.sub.example.com`, zone is `example.com` (not `sub.example.com`) **Step 3: Review All Credential Zone Filters** + ``` Settings → DNS Providers → Manage Credentials List all zone filters and check for overlaps: @@ -986,11 +1071,13 @@ For www.example.com: ``` **Step 4: Adjust Zone Filters** + - Make zone filters more specific to avoid unwanted matches - Remove or narrow catch-all if it's too broad - Use exact matches for critical domains **Step 5: Test Zone Matching** + - Some Charon versions may include a zone matching test utility - Go to **Settings** → **DNS Providers** → **Test Zone Matching** - Enter a domain and see which credential would be selected @@ -998,11 +1085,13 @@ For www.example.com: #### Issue 5: Credential test succeeds but certificate issuance fails **Symptoms:** + - Credential test passes: ✅ "Credential validated successfully" - Certificate issuance fails with DNS-related error - Error: `DNS propagation timeout` or `TXT record not found` **Root Causes:** + 1. API permissions sufficient for test but not for DNS-01 challenge 2. DNS propagation delay 3. Credential has access to different zones than expected @@ -1013,40 +1102,51 @@ For www.example.com: **Step 1: Check API Permissions** **Cloudflare:** + - Required: `Zone:DNS:Edit` permission - Test permission alone may only check `Zone:DNS:Read` **Route53:** + - Required: `route53:ChangeResourceRecordSets`, `route53:GetChange` - Test permission alone may only check `route53:ListHostedZones` **Step 2: Verify Zone Access** + - Ensure credential has access to the specific zone - Check DNS provider dashboard for zone visibility - For Route53, ensure IAM policy includes the correct hosted zone ID **Step 3: Check DNS Propagation** + - DNS-01 challenge requires TXT record to propagate - Default timeout: 60 seconds - Increase timeout in Charon settings if DNS provider is slow: + ``` Settings → Advanced → DNS Propagation Timeout: 120 seconds ``` **Step 4: Manual DNS Test** + - After certificate issuance fails, check if TXT record was created: + ```bash dig TXT _acme-challenge.shop.customer-a.com nslookup -type=TXT _acme-challenge.shop.customer-a.com ``` + - If record exists, issue is with propagation delay - If record doesn't exist, issue is with API permissions or credential **Step 5: Review Let's Encrypt Logs** + - View detailed certificate issuance logs: + ``` Settings → Logs → Filter by: "certificate_issuance" ``` + - Look for specific error messages from Let's Encrypt or DNS provider ### Getting Help @@ -1072,6 +1172,7 @@ curl -H "Authorization: Bearer YOUR_API_TOKEN" \ ``` **Getting an API Token:** + 1. Go to **Settings** → **API Tokens** 2. Click **Generate Token** 3. Copy token (shown only once) @@ -1085,6 +1186,7 @@ curl -H "Authorization: Bearer YOUR_API_TOKEN" \ **Description:** List all credentials for a DNS provider **Request:** + ```bash curl -X GET \ https://your-charon-instance/api/v1/dns-providers/1/credentials \ @@ -1092,6 +1194,7 @@ curl -X GET \ ``` **Response:** + ```json { "credentials": [ @@ -1133,6 +1236,7 @@ curl -X GET \ **Description:** Get details of a specific credential **Request:** + ```bash curl -X GET \ https://your-charon-instance/api/v1/dns-providers/1/credentials/42 \ @@ -1140,6 +1244,7 @@ curl -X GET \ ``` **Response:** + ```json { "id": 42, @@ -1168,6 +1273,7 @@ curl -X GET \ **Description:** Create a new credential for a DNS provider **Request:** + ```bash curl -X POST \ https://your-charon-instance/api/v1/dns-providers/1/credentials \ @@ -1186,6 +1292,7 @@ curl -X POST \ **Provider-Specific Credential Fields:** **Cloudflare:** + ```json "credentials": { "api_token": "your-cloudflare-api-token" @@ -1198,6 +1305,7 @@ curl -X POST \ ``` **Route53:** + ```json "credentials": { "access_key_id": "AKIAIOSFODNN7EXAMPLE", @@ -1206,6 +1314,7 @@ curl -X POST \ ``` **DigitalOcean:** + ```json "credentials": { "api_token": "your-digitalocean-api-token" @@ -1213,6 +1322,7 @@ curl -X POST \ ``` **Response:** + ```json { "id": 44, @@ -1236,6 +1346,7 @@ curl -X POST \ **Description:** Update an existing credential **Request:** + ```bash curl -X PATCH \ https://your-charon-instance/api/v1/dns-providers/1/credentials/44 \ @@ -1248,6 +1359,7 @@ curl -X PATCH \ ``` **Response:** + ```json { "id": 44, @@ -1271,6 +1383,7 @@ curl -X PATCH \ **Description:** Delete a credential (fails if credential is in use) **Request:** + ```bash curl -X DELETE \ https://your-charon-instance/api/v1/dns-providers/1/credentials/44 \ @@ -1278,6 +1391,7 @@ curl -X DELETE \ ``` **Response (Success):** + ```json { "message": "Credential deleted successfully", @@ -1286,6 +1400,7 @@ curl -X DELETE \ ``` **Response (Error - In Use):** + ```json { "error": "Cannot delete credential: 3 proxy hosts are using this credential", @@ -1304,6 +1419,7 @@ curl -X DELETE \ **Description:** Test if a credential is valid and has correct permissions **Request:** + ```bash curl -X POST \ https://your-charon-instance/api/v1/dns-providers/1/credentials/42/test \ @@ -1311,6 +1427,7 @@ curl -X POST \ ``` **Response (Success):** + ```json { "status": "success", @@ -1326,6 +1443,7 @@ curl -X POST \ ``` **Response (Failure):** + ```json { "status": "failed", @@ -1345,6 +1463,7 @@ curl -X POST \ **Description:** Enable multi-credential mode for a DNS provider **Request:** + ```bash curl -X POST \ https://your-charon-instance/api/v1/dns-providers/1/enable-multi-credential \ @@ -1352,6 +1471,7 @@ curl -X POST \ ``` **Response:** + ```json { "message": "Multi-credential mode enabled", @@ -1372,6 +1492,7 @@ curl -X POST \ **Description:** Disable multi-credential mode (reverts to first credential as primary) **Request:** + ```bash curl -X POST \ https://your-charon-instance/api/v1/dns-providers/1/disable-multi-credential \ @@ -1379,6 +1500,7 @@ curl -X POST \ ``` **Response:** + ```json { "message": "Multi-credential mode disabled", @@ -1396,6 +1518,7 @@ curl -X POST \ All API endpoints may return the following error responses: **400 Bad Request:** + ```json { "error": "Invalid zone filter format", @@ -1404,6 +1527,7 @@ All API endpoints may return the following error responses: ``` **401 Unauthorized:** + ```json { "error": "Unauthorized", @@ -1412,6 +1536,7 @@ All API endpoints may return the following error responses: ``` **403 Forbidden:** + ```json { "error": "Forbidden", @@ -1420,6 +1545,7 @@ All API endpoints may return the following error responses: ``` **404 Not Found:** + ```json { "error": "Not found", @@ -1428,6 +1554,7 @@ All API endpoints may return the following error responses: ``` **409 Conflict:** + ```json { "error": "Conflict", @@ -1437,6 +1564,7 @@ All API endpoints may return the following error responses: ``` **500 Internal Server Error:** + ```json { "error": "Internal server error", @@ -1484,5 +1612,5 @@ All API endpoints may return the following error responses: --- -*Last Updated: January 4, 2026* -*Version: 1.3.0* +_Last Updated: January 4, 2026_ +_Version: 1.3.0_ diff --git a/docs/features/notifications.md b/docs/features/notifications.md index fec92507..ea2d84fe 100644 --- a/docs/features/notifications.md +++ b/docs/features/notifications.md @@ -56,6 +56,7 @@ Simple, clean notifications with essential information: ``` **Use when:** + - You want low-noise notifications - Space is limited (mobile notifications) - Only essential info is needed @@ -80,6 +81,7 @@ Comprehensive notifications with all available context: ``` **Use when:** + - You need full event context - Multiple team members review notifications - Historical tracking is important @@ -89,6 +91,7 @@ Comprehensive notifications with all available context: Create your own template with complete control over structure and formatting. **Use when:** + - Standard templates don't meet your needs - You have specific formatting requirements - Integrating with custom systems @@ -348,6 +351,7 @@ Some events include additional variables: If you've been using webhook providers without JSON templates: **Before (Basic webhook):** + ``` Type: webhook URL: https://discord.com/api/webhooks/... @@ -355,6 +359,7 @@ Template: (not available) ``` **After (JSON template):** + ``` Type: discord URL: https://discord.com/api/webhooks/... @@ -386,6 +391,7 @@ Before saving, always test your template: **Error:** `Invalid JSON template` **Solution:** Validate your JSON using a tool like [jsonlint.com](https://jsonlint.com). Common issues: + - Missing closing braces `}` - Trailing commas - Unescaped quotes in strings @@ -455,6 +461,7 @@ Always test notifications before relying on them for critical alerts. ### 3. Use Color Coding Consistent colors help quickly identify severity: + - 🔴 Red: Errors, outages - 🟡 Yellow: Warnings - 🟢 Green: Success, recovery @@ -463,6 +470,7 @@ Consistent colors help quickly identify severity: ### 4. Group Related Events Configure multiple providers for different event types: + - Critical alerts → Discord (with mentions) - Info notifications → Slack (general channel) - All events → Gotify (personal alerts) @@ -470,6 +478,7 @@ Configure multiple providers for different event types: ### 5. Rate Limit Awareness Be mindful of service limits: + - **Discord**: 5 requests per 2 seconds per webhook - **Slack**: 1 request per second per workspace - **Gotify**: No strict limits (self-hosted) diff --git a/docs/features/uptime-monitoring.md b/docs/features/uptime-monitoring.md index fad02e12..1159b02b 100644 --- a/docs/features/uptime-monitoring.md +++ b/docs/features/uptime-monitoring.md @@ -56,12 +56,14 @@ Check 4: ✅ Success → Status: Up, Failure Count: 0 (recovery alert) This timeout determines how long Charon waits for a TCP connection before considering it failed. **Increase timeout if:** + - You have slow networks - Hosts are geographically distant - Containers take time to warm up - You see intermittent false "down" alerts **Decrease timeout if:** + - You want faster failure detection - Your hosts are on local network - Response times are consistently fast diff --git a/docs/getting-started.md b/docs/getting-started.md index ff2dbb6e..0ad7b3cb 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -134,6 +134,7 @@ CrowdSec will automatically start if it was previously enabled. The reconciliati 3. **Starts CrowdSec** if either condition is true **How it works:** + - Reconciliation happens **before** the HTTP server starts (during container boot) - Protected by mutex to prevent race conditions - Validates binary and config paths before starting @@ -168,24 +169,30 @@ Expected output: If CrowdSec doesn't auto-start: 1. **Check reconciliation logs:** + ```bash docker logs charon 2>&1 | grep "CrowdSec reconciliation" ``` 2. **Verify SecurityConfig mode:** + ```bash docker exec charon sqlite3 /app/data/charon.db \ "SELECT crowdsec_mode FROM security_configs LIMIT 1;" ``` + Expected: `local` 3. **Check directory permissions:** + ```bash docker exec charon ls -la /var/lib/crowdsec/data/ ``` + Expected: `charon:charon` ownership 4. **Manual start:** + ```bash curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start ``` diff --git a/docs/guides/dns-providers.md b/docs/guides/dns-providers.md index ae8d77b9..2b2694fc 100644 --- a/docs/guides/dns-providers.md +++ b/docs/guides/dns-providers.md @@ -129,21 +129,25 @@ For detailed troubleshooting, see [DNS Challenges Troubleshooting](../troublesho ### Common Issues **"Encryption key not configured"** + - Ensure `CHARON_ENCRYPTION_KEY` environment variable is set - Restart Charon after setting the variable **"Connection test failed"** + - Verify credentials are correct - Check API token permissions - Ensure firewall allows outbound HTTPS to provider - Review provider-specific troubleshooting guides **"DNS propagation timeout"** + - Increase propagation timeout in provider settings - Verify DNS provider is authoritative for the domain - Check provider status page for service issues **"Certificate issuance failed"** + - Test DNS provider connection in UI - Check Charon logs for detailed error messages - Verify domain DNS is properly configured diff --git a/docs/guides/dns-providers/digitalocean.md b/docs/guides/dns-providers/digitalocean.md index b691ae4e..ab1a3319 100644 --- a/docs/guides/dns-providers/digitalocean.md +++ b/docs/guides/dns-providers/digitalocean.md @@ -63,6 +63,7 @@ Expand **Advanced Settings** to customize: 3. Verify you see: ✅ **Connection successful** The test verifies: + - Token is valid and active - Account has DNS write permissions - DigitalOcean API is accessible @@ -129,9 +130,11 @@ The Personal Access Token needs **Write** scope, which includes: - DigitalOcean DNS typically propagates in <60 seconds - Verify nameservers are correctly configured: + ```bash dig NS example.com +short ``` + - Check DigitalOcean Status page for service issues - Increase Propagation Timeout to 120 seconds as a workaround diff --git a/docs/guides/dns-providers/route53.md b/docs/guides/dns-providers/route53.md index 9fb2a660..dc18b547 100644 --- a/docs/guides/dns-providers/route53.md +++ b/docs/guides/dns-providers/route53.md @@ -44,11 +44,11 @@ Create a custom IAM policy with minimum required permissions: } ``` -6. Click **Next: Tags** (optional tags) -7. Click **Next: Review** -8. **Name:** `CharonRoute53DNSChallenge` -9. **Description:** `Allows Charon to manage DNS TXT records for ACME challenges` -10. Click **Create Policy** +1. Click **Next: Tags** (optional tags) +2. Click **Next: Review** +3. **Name:** `CharonRoute53DNSChallenge` +4. **Description:** `Allows Charon to manage DNS TXT records for ACME challenges` +5. Click **Create Policy** > **Tip:** For production, scope the policy to specific hosted zones by replacing `*` with your zone ID. @@ -98,6 +98,7 @@ Expand **Advanced Settings** to customize: 3. Verify you see: ✅ **Connection successful** The test verifies: + - Credentials are valid - IAM user has required permissions - Route 53 hosted zones are accessible diff --git a/docs/guides/local-key-management.md b/docs/guides/local-key-management.md index e5aa6e00..ebf9dd23 100644 --- a/docs/guides/local-key-management.md +++ b/docs/guides/local-key-management.md @@ -93,6 +93,7 @@ Store private keys in a password manager: - Tag as "cosign-key" 2. **Retrieve when needed**: + ```bash # Example with op (1Password CLI) op read "op://Private/cosign-dev-key/private key" > /tmp/cosign.key @@ -201,12 +202,14 @@ done ### Rotation Procedure 1. **Generate new key pair**: + ```bash cd ~/.cosign cosign generate-key-pair --output-key-prefix=cosign-prod-v2 ``` 2. **Test new key**: + ```bash # Sign test artifact cosign sign-blob --yes \ @@ -239,6 +242,7 @@ done - Delete from active use 6. **Archive old key**: + ```bash mkdir -p ~/.cosign/archive/$(date +%Y-%m) mv cosign-prod.key ~/.cosign/archive/$(date +%Y-%m)/ @@ -314,6 +318,7 @@ For environments without internet access: ### Setup 1. **On internet-connected machine**: + ```bash # Download Cosign binary curl -O -L https://github.com/sigstore/cosign/releases/download/v2.4.1/cosign-linux-amd64 @@ -323,6 +328,7 @@ For environments without internet access: ``` 2. **On air-gapped machine**: + ```bash # Install Cosign sudo install cosign-linux-amd64 /usr/local/bin/cosign @@ -361,6 +367,7 @@ cosign verify charon:local --key ~/.cosign/cosign-airgap.pub --insecure-ignore-t **Cause**: Missing COSIGN_PASSWORD environment variable **Solution**: + ```bash export COSIGN_PASSWORD="your-password" cosign sign --key cosign.key charon:local @@ -371,16 +378,17 @@ cosign sign --key cosign.key charon:local **Cause**: Incorrect password **Solution**: Verify you're using the correct password for the key -### "Error: signing charon:local: uploading signature: PUT https://registry/v2/.../manifests/sha256-...: UNAUTHORIZED" +### "Error: signing charon:local: uploading signature: PUT ...: UNAUTHORIZED" **Cause**: Not authenticated with Docker registry **Solution**: + ```bash docker login ghcr.io # Enter credentials, then retry signing ``` -### "Error: verifying charon:local: fetching signatures: getting signature manifest: GET https://registry/...: NOT_FOUND" +### "Error: verifying charon:local: fetching signatures: getting signature manifest: GET ...: NOT_FOUND" **Cause**: Image not signed yet, or signature not pushed to registry **Solution**: Sign the image first with `cosign sign` @@ -389,6 +397,7 @@ docker login ghcr.io **Symptoms**: Decryption errors, unusual characters in key file **Solution**: + 1. Restore from encrypted backup (see Backup and Recovery) 2. If no backup: Generate new key pair and re-sign artifacts 3. Update documentation and notify stakeholders @@ -396,6 +405,7 @@ docker login ghcr.io ### Lost Password **Solution**: + 1. **Cannot recover** - private key is permanently inaccessible 2. Generate new key pair 3. Revoke old public key from documentation @@ -437,7 +447,7 @@ docker login ghcr.io If you suspect key compromise: 1. **Immediately**: Stop using the compromised key -2. **Notify**: Security team at security@example.com +2. **Notify**: Security team at 3. **Rotate**: Generate new key pair 4. **Audit**: Review all signatures made with compromised key 5. **Document**: Create incident report diff --git a/docs/guides/supply-chain-security-developer-guide.md b/docs/guides/supply-chain-security-developer-guide.md index fa3b0d6c..a3ec6f2a 100644 --- a/docs/guides/supply-chain-security-developer-guide.md +++ b/docs/guides/supply-chain-security-developer-guide.md @@ -53,6 +53,7 @@ Task: "Security: Full Supply Chain Audit" **Purpose:** Verify SBOM contents and scan for vulnerabilities **Usage:** + ```bash # Verify container image SBOM .github/skills/scripts/skill-runner.sh security-verify-sbom docker charon:local @@ -65,18 +66,21 @@ Task: "Security: Full Supply Chain Audit" ``` **What it does:** + 1. Generates SBOM using Syft (if not exists) 2. Validates SBOM format (SPDX JSON) 3. Scans for vulnerabilities using Grype 4. Reports findings with severity levels **When to use:** + - Before committing dependency updates - After building new images - Before releases - During security audits **Output:** + - SBOM file (SPDX JSON format) - Vulnerability report - Summary of critical/high findings @@ -86,6 +90,7 @@ Task: "Security: Full Supply Chain Audit" **Purpose:** Sign container images or binaries with Cosign **Usage:** + ```bash # Sign Docker image .github/skills/scripts/skill-runner.sh security-sign-cosign docker charon:local @@ -98,18 +103,21 @@ Task: "Security: Full Supply Chain Audit" ``` **What it does:** + 1. Verifies target exists 2. Signs with Cosign (keyless or with key) 3. Records signature in Rekor transparency log 4. Generates verification commands **When to use:** + - After building local test images - Before pushing to registry - During release preparation - For artifact attestation **Requirements:** + - Cosign installed (`make install-cosign`) - Docker running (for image signing) - Network access (for Rekor) @@ -119,6 +127,7 @@ Task: "Security: Full Supply Chain Audit" **Purpose:** Generate and verify SLSA provenance attestation **Usage:** + ```bash # Generate provenance for binary .github/skills/scripts/skill-runner.sh security-slsa-provenance generate ./backend/main @@ -131,18 +140,21 @@ Task: "Security: Full Supply Chain Audit" ``` **What it does:** + 1. Collects build metadata (commit, branch, timestamp) 2. Generates SLSA provenance document 3. Signs provenance with Cosign 4. Verifies provenance integrity **When to use:** + - After building release binaries - Before publishing releases - For compliance requirements - To prove build reproducibility **Output:** + - `provenance.json` - SLSA provenance attestation - Verification status - Build metadata @@ -171,6 +183,7 @@ make test-all ``` **Review output:** + - ✅ No critical/high vulnerabilities → Proceed - ⚠️ Vulnerabilities found → Review, patch, or document @@ -256,6 +269,7 @@ make build-all ``` **Review checklist:** + - [ ] SBOM includes all new dependencies - [ ] No new critical/high vulnerabilities - [ ] Dependency licenses compatible @@ -426,6 +440,7 @@ grype sbom:sbom-v1.0.0.spdx.json #### 6. Create GitHub Release Upload these files as release assets: + - `charon-linux-amd64` - Binary - `charon-linux-amd64.sig` - Binary signature - `sbom-v1.0.0.spdx.json` - Image SBOM @@ -433,6 +448,7 @@ Upload these files as release assets: - `provenance-v1.0.0.json` - SLSA provenance Release notes should include: + - Verification commands - Link to user guide - Known vulnerabilities (if any) @@ -459,6 +475,7 @@ See `.github/workflows/release.yml` for implementation. #### "syft: command not found" **Solution:** + ```bash make install-syft # Or manually: @@ -468,6 +485,7 @@ curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh - #### "cosign: command not found" **Solution:** + ```bash make install-cosign # Or manually: @@ -479,6 +497,7 @@ sudo chmod +x /usr/local/bin/cosign #### "grype: command not found" **Solution:** + ```bash make install-grype # Or manually: @@ -488,11 +507,13 @@ curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh #### SBOM Generation Fails **Possible causes:** + - Docker image doesn't exist - Directory/file path incorrect - Syft version incompatible **Debug:** + ```bash # Check image exists docker images | grep charon @@ -509,13 +530,16 @@ syft version **Cause:** Cosign keyless signing requires OIDC authentication (GitHub Actions, Google Cloud, etc.) **Solutions:** + 1. Use key-based signing for local development: + ```bash cosign generate-key-pair cosign sign --key cosign.key charon:local ``` 2. Set up OIDC provider (GitHub Actions example): + ```yaml permissions: id-token: write @@ -523,6 +547,7 @@ syft version ``` 3. Use environment variables: + ```bash export COSIGN_EXPERIMENTAL=1 ``` @@ -530,11 +555,13 @@ syft version #### Provenance Verification Fails **Possible causes:** + - Provenance file doesn't match binary - Binary was modified after provenance generation - Wrong source URI **Debug:** + ```bash # Check binary hash sha256sum ./backend/charon-linux-amd64 @@ -550,6 +577,7 @@ cat provenance.json | jq -r '.subject[0].digest.sha256' #### SBOM Generation is Slow **Optimization:** + ```bash # Cache SBOM between runs SBOM_FILE="sbom-$(git rev-parse --short HEAD).spdx.json" @@ -561,6 +589,7 @@ fi #### Large Image Scans Timeout **Solution:** + ```bash # Increase timeout export GRYPE_CHECK_FOR_APP_UPDATE=false diff --git a/docs/guides/supply-chain-security-user-guide.md b/docs/guides/supply-chain-security-user-guide.md index 069aa2af..cd3320a3 100644 --- a/docs/guides/supply-chain-security-user-guide.md +++ b/docs/guides/supply-chain-security-user-guide.md @@ -7,6 +7,7 @@ Charon implements comprehensive supply chain security measures to ensure you can ## Why Supply Chain Security Matters When you download and run software, you're trusting that: + - The software came from the legitimate source - It hasn't been tampered with during distribution - The build process was secure and reproducible @@ -49,6 +50,7 @@ cosign verify \ ``` **Expected Output:** + ``` Verification for ghcr.io/wikid82/charon:latest -- The following checks were performed on each of these signatures: @@ -66,6 +68,7 @@ The following checks were performed on each of these signatures: **What it does:** Confirms the image was signed by the Charon project and hasn't been modified. **Command:** + ```bash cosign verify \ --certificate-identity-regexp='https://github.com/Wikid82/charon' \ @@ -74,12 +77,14 @@ cosign verify \ ``` **What to check:** + - ✅ "Verification for ... --" message appears - ✅ Certificate identity matches `https://github.com/Wikid82/charon` - ✅ OIDC issuer is `https://token.actions.githubusercontent.com` - ✅ No errors or warnings **Troubleshooting:** + - **Error: "no matching signatures"** → The image may not be signed, or you have the wrong tag - **Error: "certificate identity doesn't match"** → The image may be compromised or unofficial - **Error: "OIDC issuer doesn't match"** → The signing process didn't use GitHub Actions @@ -89,16 +94,19 @@ cosign verify \ **What it does:** Proves the software was built by the official GitHub Actions workflow from the official repository. **Step 1: Download provenance** + ```bash curl -LO https://github.com/Wikid82/charon/releases/download/v1.0.0/provenance.json ``` **Step 2: Download the binary** + ```bash curl -LO https://github.com/Wikid82/charon/releases/download/v1.0.0/charon-linux-amd64 ``` **Step 3: Verify provenance** + ```bash slsa-verifier verify-artifact \ --provenance-path provenance.json \ @@ -107,6 +115,7 @@ slsa-verifier verify-artifact \ ``` **Expected Output:** + ``` Verified signature against tlog entry index XXXXX at URL: https://rekor.sigstore.dev/api/v1/log/entries/... Verified build using builder https://github.com/slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@refs/tags/v1.9.0 at commit SHA256:... @@ -114,12 +123,14 @@ PASSED: Verified SLSA provenance ``` **What to check:** + - ✅ "PASSED: Verified SLSA provenance" - ✅ Builder is the official SLSA generator - ✅ Source URI matches `github.com/Wikid82/charon` - ✅ Entry is recorded in Rekor transparency log **Troubleshooting:** + - **Error: "artifact hash doesn't match"** → The binary may have been tampered with - **Error: "source URI doesn't match"** → The build came from an unofficial repository - **Error: "invalid provenance"** → The provenance file may be corrupted @@ -129,11 +140,13 @@ PASSED: Verified SLSA provenance **What it does:** Shows all dependencies included in Charon, allowing you to check for known vulnerabilities. **Step 1: Download SBOM** + ```bash curl -LO https://github.com/Wikid82/charon/releases/download/v1.0.0/sbom.spdx.json ``` **Step 2: View SBOM contents** + ```bash # Pretty-print the SBOM cat sbom.spdx.json | jq . @@ -143,12 +156,14 @@ cat sbom.spdx.json | jq -r '.packages[].name' | sort ``` **Step 3: Check for vulnerabilities** + ```bash # Requires Grype (see prerequisites) grype sbom:sbom.spdx.json ``` **Expected Output:** + ``` NAME INSTALLED VULNERABILITY SEVERITY github.com/caddyserver/caddy/v2 v2.11.0 (no vulnerabilities found) @@ -156,12 +171,14 @@ github.com/caddyserver/caddy/v2 v2.11.0 (no vulnerabilities found) ``` **What to check:** + - ✅ SBOM contains expected packages (Go modules, npm packages) - ✅ Package versions match release notes - ✅ No critical or high-severity vulnerabilities - ⚠️ Known acceptable vulnerabilities are documented in SECURITY.md **Troubleshooting:** + - **High/Critical vulnerabilities found** → Check SECURITY.md for known issues and mitigation status - **SBOM format error** → Download may be corrupted, try again - **Missing packages** → SBOM may be incomplete, report as an issue @@ -244,6 +261,7 @@ All signatures are recorded in the public Rekor transparency log: ### GitHub Release Assets Each release includes: + - `provenance.json` - SLSA provenance attestation - `sbom.spdx.json` - Software Bill of Materials - `*.sig` - Cosign signature files (for binaries) @@ -256,18 +274,21 @@ Each release includes: ## Security Best Practices ### Before Deploying + 1. ✅ Always verify signatures before first deployment 2. ✅ Check SBOM for known vulnerabilities 3. ✅ Verify provenance for critical environments 4. ✅ Pin to specific version tags (not `latest`) ### During Operations + 1. ✅ Set up automated verification in CI/CD 2. ✅ Monitor SECURITY.md for vulnerability updates 3. ✅ Subscribe to GitHub release notifications 4. ✅ Re-verify after any manual image pulls ### For Production Environments + 1. ✅ Require signature verification before deployment 2. ✅ Use admission controllers (e.g., Kyverno, OPA) to enforce verification 3. ✅ Maintain audit logs of verified deployments @@ -280,10 +301,13 @@ Each release includes: ### Common Issues #### "cosign: command not found" + **Solution:** Install Cosign (see Prerequisites section) #### "Error: no matching signatures" + **Possible causes:** + - Image tag doesn't exist - Image was pulled before signing implementation - Using an unofficial image source @@ -291,14 +315,18 @@ Each release includes: **Solution:** Use official images from `ghcr.io/wikid82/charon` with tags v1.0.0 or later #### "Error: certificate identity doesn't match" + **Possible causes:** + - Image is from an unofficial source - Image may be compromised **Solution:** Only use images from the official repository. Report suspicious images. #### "slsa-verifier: verification failed" + **Possible causes:** + - Provenance file doesn't match the binary - Binary was modified after signing - Wrong provenance file downloaded @@ -306,7 +334,9 @@ Each release includes: **Solution:** Re-download both provenance and binary from the same release #### Grype shows vulnerabilities + **Solution:** + 1. Check SECURITY.md for known issues 2. Review vulnerability severity and exploitability 3. Check if patches are available in newer releases diff --git a/docs/implementation/AGENT_SKILLS_MIGRATION_SUMMARY.md b/docs/implementation/AGENT_SKILLS_MIGRATION_SUMMARY.md index c2a1541a..62ba95bb 100644 --- a/docs/implementation/AGENT_SKILLS_MIGRATION_SUMMARY.md +++ b/docs/implementation/AGENT_SKILLS_MIGRATION_SUMMARY.md @@ -6,11 +6,13 @@ ## What Was Accomplished ### 1. Complete Script Inventory + - Identified **29 script files** in `/scripts` directory - Analyzed all scripts referenced in `.vscode/tasks.json` - Classified scripts by priority, complexity, and use case ### 2. AgentSkills.io Specification Research + - Thoroughly reviewed the [agentskills.io specification](https://agentskills.io/specification) - Understood the SKILL.md format requirements: - YAML frontmatter with required fields (name, description) @@ -28,40 +30,50 @@ The plan includes: #### A. Directory Structure + - Complete `.agentskills/` directory layout for all 24 skills - Proper naming conventions (lowercase, hyphens, no special characters) - Organized by category (testing, security, utility, linting, docker) #### B. Detailed Skill Specifications + For each of the 24 skills to be created: + - Complete SKILL.md frontmatter with all required fields - Skill-specific metadata (original script, exit codes, parameters) - Documentation structure with purpose, usage, examples - Related skills cross-references #### C. Implementation Phases + **Phase 1** (Days 1-3): Core Testing & Build + - `test-backend-coverage` - `test-frontend-coverage` - `integration-test-all` **Phase 2** (Days 4-7): Security & Quality + - 8 security and integration test skills - CrowdSec, Coraza WAF, Trivy scanning **Phase 3** (Days 8-9): Development Tools + - Version checking, cache clearing, version bumping, DB recovery **Phase 4** (Days 10-12): Linting & Docker + - 12 linting and Docker management skills - Complete migration and deprecation of `/scripts` #### D. Task Configuration Updates + - Complete `.vscode/tasks.json` with all new paths - Preserves existing task labels and behavior - All 44 tasks updated to reference `.agentskills` paths #### E. .gitignore Updates + - Added `.agentskills` runtime data exclusions - Keeps skill definitions (SKILL.md, scripts) in version control - Excludes temporary files, logs, coverage data @@ -69,7 +81,9 @@ For each of the 24 skills to be created: ## Key Decisions Made ### 1. Skills to Create (24 Total) + Organized by category: + - **Testing**: 3 skills (backend, frontend, integration) - **Security**: 8 skills (Trivy, CrowdSec, Coraza, WAF, rate limiting) - **Utility**: 4 skills (version check, cache clear, version bump, DB recovery) @@ -77,11 +91,15 @@ Organized by category: - **Docker**: 3 skills (dev env, local env, build) ### 2. Scripts NOT to Convert (11 scripts) + Internal/debug utilities that don't fit the skill model: + - `check_go_build.sh`, `create_bulk_acl_issues.sh`, `debug_db.py`, `debug_rate_limit.sh`, `gopls_collect.sh`, `cerberus_integration.sh`, `install-go-1.25.5.sh`, `qa-test-auth-certificates.sh`, `release.sh`, `repo_health_check.sh`, `verify_crowdsec_app_config.sh` ### 3. Metadata Standards + Each skill includes: + - `author: Charon Project` - `version: "1.0"` - `category`: testing|security|build|utility|docker|linting @@ -89,6 +107,7 @@ Each skill includes: - `exit-code-0` and `exit-code-1`: Exit code meanings ### 4. Backward Compatibility + - Original `/scripts` kept for 1 release cycle - Clear deprecation notices added - Parallel run period in CI @@ -97,11 +116,13 @@ Each skill includes: ## Next Steps ### Immediate Actions + 1. **Review the Plan**: Team reviews `docs/plans/current_spec.md` 2. **Approve Approach**: Confirm phased implementation strategy 3. **Assign Resources**: Determine who implements each phase ### Phase 1 Kickoff (When Approved) + 1. Create `.agentskills/` directory 2. Implement first 3 skills (testing) 3. Update tasks.json for Phase 1 @@ -111,17 +132,21 @@ Each skill includes: ## Files Modified/Created ### Created + - `docs/plans/current_spec.md` - Complete migration plan (replaces old spec) - `docs/plans/bulk-apply-security-headers-plan.md.backup` - Backup of old plan - `AGENT_SKILLS_MIGRATION_SUMMARY.md` - This summary ### Modified + - `.gitignore` - Added `.agentskills` runtime data patterns ## Validation Performed ### Script Analysis + ✅ Read and understood 8 major scripts: + - `go-test-coverage.sh` - Complex coverage filtering and threshold validation - `frontend-test-coverage.sh` - npm test with Istanbul coverage - `integration-test.sh` - Full E2E test with health checks and routing @@ -132,7 +157,9 @@ Each skill includes: - `db-recovery.sh` - SQLite integrity and recovery ### Specification Compliance + ✅ All proposed SKILL.md structures follow agentskills.io spec: + - Valid `name` fields (1-64 chars, lowercase, hyphens only) - Descriptive `description` fields (1-1024 chars with keywords) - Optional fields used appropriately (license, compatibility, metadata) @@ -140,6 +167,7 @@ Each skill includes: - Exit codes documented ### Task Configuration + ✅ Verified all 44 tasks in `.vscode/tasks.json` ✅ Mapped each script reference to new `.agentskills` path ✅ Preserved task properties (labels, groups, problem matchers) @@ -178,6 +206,7 @@ Each skill includes: ## Conclusion Research is complete with a comprehensive, actionable plan. The migration to Agent Skills will: + - Make scripts AI-discoverable - Improve documentation and maintainability - Follow industry-standard specification diff --git a/docs/implementation/CODEQL_CI_ALIGNMENT_SUMMARY.md b/docs/implementation/CODEQL_CI_ALIGNMENT_SUMMARY.md index d653531b..00ca98c9 100644 --- a/docs/implementation/CODEQL_CI_ALIGNMENT_SUMMARY.md +++ b/docs/implementation/CODEQL_CI_ALIGNMENT_SUMMARY.md @@ -42,11 +42,13 @@ ## What Changed ### New VS Code Tasks (3) + - `Security: CodeQL Go Scan (CI-Aligned) [~60s]` - `Security: CodeQL JS Scan (CI-Aligned) [~90s]` - `Security: CodeQL All (CI-Aligned)` (runs both sequentially) ### New Pre-Commit Hooks (3) + ```yaml # Fast automatic check on commit - id: security-scan @@ -62,12 +64,14 @@ ``` ### Enhanced CI Workflow + - Added step summaries with finding counts - HIGH/CRITICAL findings block workflow (exit 1) - Clear error messages for security issues - Links to SARIF files in workflow logs ### New Documentation + - `docs/security/codeql-scanning.md` - Comprehensive user guide - `docs/plans/current_spec.md` - Implementation specification - `docs/reports/qa_codeql_ci_alignment.md` - QA validation report @@ -75,6 +79,7 @@ - Updated `.github/instructions/copilot-instructions.md` - Definition of Done ### Updated Configurations + - `.vscode/tasks.json` - 3 new CI-aligned tasks - `.pre-commit-config.yaml` - Security scan hooks - `scripts/pre-commit-hooks/` - 3 new hook scripts @@ -87,6 +92,7 @@ ### CodeQL Scans ✅ **Go Scan:** + - Queries: 59 (from security-and-quality suite) - Findings: 79 total - HIGH severity: 15 (Email injection, SSRF, Log injection) @@ -95,6 +101,7 @@ - SARIF output: 1.5 MB **JavaScript Scan:** + - Queries: 202 (from security-and-quality suite) - Findings: 105 total - HIGH severity: 5 (XSS, incomplete validation) @@ -105,11 +112,13 @@ ### Coverage Verification ✅ **Backend:** + - Coverage: **85.35%** - Threshold: 85% - Status: ✅ **PASS** (+0.35%) **Frontend:** + - Coverage: **87.74%** - Threshold: 85% - Status: ✅ **PASS** (+2.74%) @@ -117,16 +126,19 @@ ### Code Quality ✅ **TypeScript Check:** + - Errors: 0 - Status: ✅ **PASS** **Pre-Commit Hooks:** + - Fast hooks: 12/12 passing - Status: ✅ **PASS** ### CI Alignment ✅ **Local vs CI Comparison:** + - Query suite: ✅ Matches (security-and-quality) - Query count: ✅ Matches (Go: 61, JS: 204) - SARIF format: ✅ GitHub-compatible @@ -138,13 +150,16 @@ ## How to Use ### Quick Security Check (5 seconds) + ```bash # Runs automatically on commit, or manually: pre-commit run security-scan --all-files ``` + Uses `govulncheck` to scan for known vulnerabilities in Go dependencies. ### Full CodeQL Scan (2-3 minutes) + ```bash # Via pre-commit (manual stage): pre-commit run --hook-stage manual codeql-go-scan --all-files @@ -156,6 +171,7 @@ pre-commit run --hook-stage manual codeql-check-findings --all-files ``` ### View Results + ```bash # Check for HIGH/CRITICAL findings: pre-commit run codeql-check-findings --all-files @@ -169,6 +185,7 @@ jq '.runs[].results[] | select(.level=="error")' codeql-results-go.sarif ``` ### Documentation + - **User Guide:** [docs/security/codeql-scanning.md](../security/codeql-scanning.md) - **Implementation Plan:** [docs/plans/current_spec.md](../plans/current_spec.md) - **QA Report:** [docs/reports/qa_codeql_ci_alignment.md](../reports/qa_codeql_ci_alignment.md) @@ -179,6 +196,7 @@ jq '.runs[].results[] | select(.level=="error")' codeql-results-go.sarif ## Files Changed ### Configuration Files + ``` .vscode/tasks.json # 3 new CI-aligned CodeQL tasks .pre-commit-config.yaml # Security scan hooks @@ -187,6 +205,7 @@ jq '.runs[].results[] | select(.level=="error")' codeql-results-go.sarif ``` ### Scripts (New) + ``` scripts/pre-commit-hooks/security-scan.sh # Fast govulncheck scripts/pre-commit-hooks/codeql-go-scan.sh # Go CodeQL scan @@ -195,6 +214,7 @@ scripts/pre-commit-hooks/codeql-check-findings.sh # Severity check ``` ### Documentation (New) + ``` docs/security/codeql-scanning.md # User guide docs/plans/current_spec.md # Implementation plan @@ -210,12 +230,14 @@ docs/implementation/CODEQL_CI_ALIGNMENT_SUMMARY.md # This file ### CodeQL Query Suites **security-and-quality Suite:** + - **Go:** 61 queries (security + code quality) - **JavaScript:** 204 queries (security + code quality) - **Coverage:** CWE Top 25, OWASP Top 10, and additional quality checks - **Used by:** GitHub Advanced Security default scans **Why not security-extended?** + - `security-extended` is deprecated and has fewer queries - `security-and-quality` is GitHub's recommended default - Includes both security vulnerabilities AND code quality issues @@ -223,10 +245,12 @@ docs/implementation/CODEQL_CI_ALIGNMENT_SUMMARY.md # This file ### CodeQL Version Resolution **Issue Encountered:** + - Initial version: v2.16.0 - Problem: Predicate incompatibility with query packs **Resolution:** + ```bash gh codeql set-version latest # Upgraded to: v2.23.8 @@ -237,12 +261,14 @@ gh codeql set-version latest ### CI Workflow Enhancements **Before:** + ```yaml - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v4 ``` **After:** + ```yaml - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v4 @@ -264,18 +290,21 @@ gh codeql set-version latest ### Performance Characteristics **Go Scan:** + - Database creation: ~20s - Query execution: ~40s - Total: ~60s - Memory: ~2GB peak **JavaScript Scan:** + - Database creation: ~30s - Query execution: ~60s - Total: ~90s - Memory: ~2.5GB peak **Combined:** + - Sequential execution: ~2.5-3 minutes - SARIF output: ~2.3 MB total @@ -305,6 +334,7 @@ The scans detected **184 total findings**. These are real issues in the codebase | Code Quality | 100 | Various | LOW | **Triage Status:** + - HIGH severity issues: Documented, to be addressed in security backlog - MEDIUM severity: Documented, to be reviewed in next sprint - LOW severity: Quality improvements, address as needed @@ -316,6 +346,7 @@ The scans detected **184 total findings**. These are real issues in the codebase ## Next Steps ### Immediate (This Commit) + - [x] All implementation complete - [x] All tests passing - [x] Documentation complete @@ -325,6 +356,7 @@ The scans detected **184 total findings**. These are real issues in the codebase - [ ] **Verify CI behavior matches local** ### Post-Merge + - [ ] Monitor CI workflows on next PRs - [ ] Validate manual test plan with team - [ ] Triage security findings @@ -332,6 +364,7 @@ The scans detected **184 total findings**. These are real issues in the codebase - [ ] Consider adding CodeQL version check to pre-commit ### Future Improvements + - [ ] Add GitHub Code Scanning integration for PR comments - [ ] Create false positive suppression workflow - [ ] Add custom CodeQL queries for Charon-specific patterns @@ -381,6 +414,7 @@ See: docs/plans/current_spec.md, docs/reports/qa_codeql_ci_alignment.md ## Success Metrics ### Quantitative ✅ + - [x] Local scans use security-and-quality suite (100% alignment) - [x] Pre-commit security checks < 10s (achieved: ~5s) - [x] Full CodeQL scans < 4min (achieved: ~2.5-3min) @@ -390,6 +424,7 @@ See: docs/plans/current_spec.md, docs/reports/qa_codeql_ci_alignment.md - [x] CI alignment verified (100%) ### Qualitative ✅ + - [x] Documentation comprehensive and accurate - [x] Developer experience smooth (VS Code + pre-commit) - [x] QA approval obtained diff --git a/docs/implementation/DATABASE_MIGRATION_FIX_COMPLETE.md b/docs/implementation/DATABASE_MIGRATION_FIX_COMPLETE.md index 79fe41d4..031ffca3 100644 --- a/docs/implementation/DATABASE_MIGRATION_FIX_COMPLETE.md +++ b/docs/implementation/DATABASE_MIGRATION_FIX_COMPLETE.md @@ -11,23 +11,28 @@ Fixed database migration and test failures related to the `KeyVersion` field in **Problem**: Tests failed with "no such table: dns_providers" errors when running the full test suite. **Root Cause**: + - SQLite's `:memory:` database mode without shared cache caused isolation issues between parallel tests - Tests running in parallel accessed the database before AutoMigrate completed - Connection pool settings weren't optimized for test scenarios **Solution**: + 1. Changed database connection string to use shared cache mode with mutex: + ```go dbPath := ":memory:?cache=shared&mode=memory&_mutex=full" ``` 2. Configured connection pool for single-threaded SQLite access: + ```go sqlDB.SetMaxOpenConns(1) sqlDB.SetMaxIdleConns(1) ``` 3. Added table existence verification after migration: + ```go if !db.Migrator().HasTable(&models.DNSProvider{}) { t.Fatal("failed to create dns_providers table") @@ -35,6 +40,7 @@ Fixed database migration and test failures related to the `KeyVersion` field in ``` 4. Added cleanup to close database connections: + ```go t.Cleanup(func() { sqlDB.Close() @@ -42,6 +48,7 @@ Fixed database migration and test failures related to the `KeyVersion` field in ``` **Files Modified**: + - `backend/internal/services/dns_provider_service_test.go` ### Issue 2: KeyVersion Field Configuration @@ -49,12 +56,14 @@ Fixed database migration and test failures related to the `KeyVersion` field in **Problem**: Needed to verify that the `KeyVersion` field was properly configured with GORM tags for database migration. **Verification**: + - ✅ Field is properly defined with `gorm:"default:1;index"` tag - ✅ Field is exported (capitalized) for GORM access - ✅ Default value of 1 is set for backward compatibility - ✅ Index is created for efficient key rotation queries **Model Definition** (already correct): + ```go // Encryption key version used for credentials (supports key rotation) KeyVersion int `json:"key_version" gorm:"default:1;index"` @@ -65,6 +74,7 @@ KeyVersion int `json:"key_version" gorm:"default:1;index"` **Problem**: Needed to ensure DNSProvider model is included in AutoMigrate calls. **Verification**: + - ✅ DNSProvider is included in route registration AutoMigrate (`backend/internal/api/routes/routes.go` line 69) - ✅ SecurityAudit is migrated first (required for background audit logging) - ✅ Migration order is correct (no dependency issues) @@ -74,6 +84,7 @@ KeyVersion int `json:"key_version" gorm:"default:1;index"` ### Migration README Created comprehensive migration documentation: + - **Location**: `backend/internal/migrations/README.md` - **Contents**: - Migration strategy overview @@ -86,11 +97,13 @@ Created comprehensive migration documentation: ## Test Results ### Before Fix + - Multiple tests failing with "no such table: dns_providers" - Tests passed in isolation but failed when run together - Inconsistent behavior due to race conditions ### After Fix + - ✅ All DNS provider tests pass (60+ tests) - ✅ All backend tests pass - ✅ Coverage: 86.4% (exceeds 85% threshold) @@ -98,6 +111,7 @@ Created comprehensive migration documentation: - ✅ Tests are deterministic and reliable ### Test Execution + ```bash cd backend && go test ./... # Result: All tests pass @@ -107,6 +121,7 @@ cd backend && go test ./... ## Backward Compatibility ✅ **Fully Backward Compatible** + - Existing DNS providers will automatically get `key_version = 1` - No data migration required - GORM handles the schema update automatically @@ -157,6 +172,7 @@ cd backend && go test ./... ## Definition of Done All acceptance criteria met: + - ✅ AutoMigrate properly creates KeyVersion field - ✅ All backend tests pass - ✅ No "no such table" errors @@ -167,6 +183,7 @@ All acceptance criteria met: ## Notes for QA The fixes address the root cause of test failures: + 1. Database initialization is now reliable and deterministic 2. Tests can run in parallel without interference 3. SQLite connection pooling is properly configured diff --git a/docs/implementation/DNS_DETECTION_PHASE4_COMPLETE.md b/docs/implementation/DNS_DETECTION_PHASE4_COMPLETE.md index 4447245b..e52600f7 100644 --- a/docs/implementation/DNS_DETECTION_PHASE4_COMPLETE.md +++ b/docs/implementation/DNS_DETECTION_PHASE4_COMPLETE.md @@ -20,6 +20,7 @@ Successfully implemented Phase 4 (DNS Provider Auto-Detection) from the DNS Futu **File:** `backend/internal/services/dns_detection_service.go` **Features:** + - Nameserver pattern matching for 10+ major DNS providers - DNS lookup using Go's built-in `net.LookupNS()` - In-memory caching with 1-hour TTL (configurable) @@ -30,6 +31,7 @@ Successfully implemented Phase 4 (DNS Provider Auto-Detection) from the DNS Futu - Confidence scoring (high/medium/low/none) **Built-in Provider Patterns:** + - Cloudflare (`cloudflare.com`) - AWS Route 53 (`awsdns`) - DigitalOcean (`digitalocean.com`) @@ -42,6 +44,7 @@ Successfully implemented Phase 4 (DNS Provider Auto-Detection) from the DNS Futu - DNSimple (`dnsimple.com`) **Detection Algorithm:** + 1. Extract base domain (remove wildcard prefix) 2. Lookup NS records with 10-second timeout 3. Match nameservers against pattern database @@ -57,6 +60,7 @@ Successfully implemented Phase 4 (DNS Provider Auto-Detection) from the DNS Futu **File:** `backend/internal/api/handlers/dns_detection_handler.go` **Endpoints:** + - `POST /api/v1/dns-providers/detect` - Request: `{"domain": "example.com"}` - Response: `DetectionResult` with provider type, nameservers, confidence, and suggested provider @@ -64,6 +68,7 @@ Successfully implemented Phase 4 (DNS Provider Auto-Detection) from the DNS Futu - Returns list of all supported nameserver patterns **Response Structure:** + ```go type DetectionResult struct { Domain string `json:"domain"` @@ -81,6 +86,7 @@ type DetectionResult struct { **File:** `backend/internal/api/routes/routes.go` Added detection routes to the protected DNS providers group: + - Detection endpoint properly integrated - Patterns endpoint for introspection - Both endpoints require authentication @@ -88,6 +94,7 @@ Added detection routes to the protected DNS providers group: ### 4. Comprehensive Test Coverage **Service Tests:** `backend/internal/services/dns_detection_service_test.go` + - ✅ 92.5% coverage - 13 test functions with 40+ sub-tests - Tests for all major functionality: @@ -102,6 +109,7 @@ Added detection routes to the protected DNS providers group: - Pattern completeness validation **Handler Tests:** `backend/internal/api/handlers/dns_detection_handler_test.go` + - ✅ 100% coverage - 10 test functions with 20+ sub-tests - Tests for all API scenarios: @@ -128,6 +136,7 @@ Added detection routes to the protected DNS providers group: ## Integration Points ### Existing Systems + - Integrated with DNS Provider Service for provider suggestion - Uses existing GORM database connection - Follows established handler/service patterns @@ -135,7 +144,9 @@ Added detection routes to the protected DNS providers group: - Complies with authentication middleware ### Future Frontend Integration + The API is ready for frontend consumption: + ```typescript // Example usage in ProxyHostForm const { detectProvider, isDetecting } = useDNSDetection() @@ -169,6 +180,7 @@ useEffect(() => { ## Error Handling The service handles all common error scenarios: + - **Invalid Domain:** Returns friendly error message - **DNS Lookup Failure:** Caches error result for 5 minutes - **Network Timeout:** 10-second limit prevents hanging requests @@ -193,17 +205,20 @@ The service handles all common error scenarios: ## Testing Strategy ### Unit Tests + - All business logic thoroughly tested - Edge cases covered (empty domains, wildcards, etc.) - Error paths validated - Mock-based handler tests prevent DNS calls in tests ### Integration Tests + - Service integrates with GORM database - Routes properly registered and authenticated - Handler correctly calls service methods ### Performance Tests + - Concurrent cache access verified - Cache expiration timing tested - No memory leaks detected @@ -213,6 +228,7 @@ The service handles all common error scenarios: ## Example API Usage ### Detect Provider + ```bash POST /api/v1/dns-providers/detect Content-Type: application/json @@ -224,6 +240,7 @@ Authorization: Bearer ``` **Response (Success):** + ```json { "domain": "example.com", @@ -246,6 +263,7 @@ Authorization: Bearer ``` **Response (Not Detected):** + ```json { "domain": "custom-dns.com", @@ -259,6 +277,7 @@ Authorization: Bearer ``` **Response (DNS Error):** + ```json { "domain": "nonexistent.domain", @@ -270,12 +289,14 @@ Authorization: Bearer ``` ### Get Detection Patterns + ```bash GET /api/v1/dns-providers/detection-patterns Authorization: Bearer ``` **Response:** + ```json { "patterns": [ @@ -320,6 +341,7 @@ Authorization: Bearer ## Files Created/Modified ### Created + 1. `backend/internal/services/dns_detection_service.go` (373 lines) 2. `backend/internal/services/dns_detection_service_test.go` (518 lines) 3. `backend/internal/api/handlers/dns_detection_handler.go` (78 lines) @@ -327,6 +349,7 @@ Authorization: Bearer 5. `docs/implementation/DNS_DETECTION_PHASE4_COMPLETE.md` (this file) ### Modified + 1. `backend/internal/api/routes/routes.go` (added 4 lines for detection routes) **Total Lines of Code:** ~1,473 lines (including tests and documentation) @@ -366,6 +389,7 @@ While Phase 4 is complete, future enhancements could include: ## Conclusion Phase 4 (DNS Provider Auto-Detection) has been successfully implemented with: + - ✅ All core features working as specified - ✅ Comprehensive test coverage (>90%) - ✅ Production-ready code quality diff --git a/docs/implementation/DNS_KEY_ROTATION_PHASE2_COMPLETE.md b/docs/implementation/DNS_KEY_ROTATION_PHASE2_COMPLETE.md index c18a6d08..bc9373ab 100644 --- a/docs/implementation/DNS_KEY_ROTATION_PHASE2_COMPLETE.md +++ b/docs/implementation/DNS_KEY_ROTATION_PHASE2_COMPLETE.md @@ -1,17 +1,21 @@ # DNS Encryption Key Rotation - Phase 2 Implementation Complete ## Overview + Implemented Phase 2 (Key Rotation Automation) from the DNS Future Features plan, providing zero-downtime encryption key rotation with multi-version support, admin API endpoints, and comprehensive audit logging. ## Implementation Date + January 3, 2026 ## Components Implemented ### 1. Core Rotation Service + **File**: `backend/internal/crypto/rotation_service.go` -#### Features: +#### Features + - **Multi-Key Version Support**: Loads and manages multiple encryption keys - Current key: `CHARON_ENCRYPTION_KEY` - Next key (for rotation): `CHARON_ENCRYPTION_KEY_NEXT` @@ -32,23 +36,28 @@ January 3, 2026 - `ValidateKeyConfiguration()`: Tests round-trip encryption for all configured keys - `GenerateNewKey()`: Utility for admins to generate secure 32-byte keys -#### Test Coverage: +#### Test Coverage + - **File**: `backend/internal/crypto/rotation_service_test.go` - **Coverage**: 86.9% (exceeds 85% requirement) ✅ - **Tests**: 600+ lines covering initialization, encryption, decryption, rotation workflow, concurrency, zero-downtime simulation, and edge cases ### 2. DNS Provider Model Extension + **File**: `backend/internal/models/dns_provider.go` -#### Changes: +#### Changes + - Added `KeyVersion int` field with `gorm:"default:1;index"` tag - Tracks which encryption key version was used for each provider's credentials - Enables version-aware decryption and rotation status reporting ### 3. DNS Provider Service Integration + **File**: `backend/internal/services/dns_provider_service.go` -#### Modifications: +#### Modifications + - Added `rotationService *crypto.RotationService` field - Gracefully falls back to basic encryption if RotationService initialization fails - **Create** method: Uses `EncryptWithCurrentKey()` returning (ciphertext, version) @@ -57,9 +66,11 @@ January 3, 2026 - Audit logs include `key_version` in details ### 4. Admin API Endpoints + **File**: `backend/internal/api/handlers/encryption_handler.go` -#### Endpoints: +#### Endpoints + 1. **GET /api/v1/admin/encryption/status** - Returns rotation status, current/next key presence, key distribution - Shows provider count by key version @@ -79,33 +90,40 @@ January 3, 2026 - Tests round-trip encryption for current, next, and legacy keys - Audit logs: `encryption_key_validation_success`, `encryption_key_validation_failed` -#### Access Control: +#### Access Control + - All endpoints require `user_role = "admin"` via `isAdmin()` check - Returns HTTP 403 for non-admin users -#### Test Coverage: +#### Test Coverage + - **File**: `backend/internal/api/handlers/encryption_handler_test.go` - **Coverage**: 85.8% (exceeds 85% requirement) ✅ - **Tests**: 450+ lines covering all endpoints, admin/non-admin access, integration workflow ### 5. Route Registration + **File**: `backend/internal/api/routes/routes.go` -#### Changes: +#### Changes + - Added conditional encryption management route group under `/api/v1/admin/encryption` - Routes only registered if `RotationService` initializes successfully - Prevents app crashes if encryption keys are misconfigured ### 6. Audit Logging Enhancements + **File**: `backend/internal/services/security_service.go` -#### Improvements: +#### Improvements + - Added `sync.WaitGroup` for graceful goroutine shutdown - `Close()` now waits for background goroutine to finish processing - `Flush()` method for testing: waits for all pending audit logs to be written - Silently ignores errors from closed databases (common in tests) -#### Event Types: +#### Event Types + 1. `encryption_key_rotation_started` - Rotation initiated 2. `encryption_key_rotation_completed` - Rotation succeeded (includes details) 3. `encryption_key_rotation_failed` - Rotation failed (includes error) @@ -116,30 +134,36 @@ January 3, 2026 ## Zero-Downtime Rotation Workflow -### Step-by-Step Process: +### Step-by-Step Process + 1. **Current State**: All providers encrypted with key version 1 + ```bash export CHARON_ENCRYPTION_KEY="" ``` 2. **Prepare Next Key**: Set the new key without restarting + ```bash export CHARON_ENCRYPTION_KEY_NEXT="" ``` 3. **Trigger Rotation**: Call admin API endpoint + ```bash curl -X POST https://your-charon-instance/api/v1/admin/encryption/rotate \ -H "Authorization: Bearer " ``` 4. **Verify Rotation**: All providers now use version 2 + ```bash curl https://your-charon-instance/api/v1/admin/encryption/status \ -H "Authorization: Bearer " ``` 5. **Promote Next Key**: Make it the current key (requires restart) + ```bash export CHARON_ENCRYPTION_KEY="" # Former NEXT key export CHARON_ENCRYPTION_KEY_V1="" # Keep as legacy @@ -148,14 +172,17 @@ January 3, 2026 6. **Future Rotations**: Repeat process with new NEXT key -### Rollback Procedure: +### Rollback Procedure + If rotation fails mid-process: + 1. Providers still using old key (version 1) remain accessible 2. Failed providers logged in `RotationResult.FailedProviders` 3. Retry rotation after fixing issues 4. Fallback decryption automatically tries all available keys To revert to previous key after full rotation: + 1. Set previous key as current: `CHARON_ENCRYPTION_KEY=""` 2. Keep rotated key as legacy: `CHARON_ENCRYPTION_KEY_V2=""` 3. All providers remain accessible via fallback mechanism @@ -177,7 +204,8 @@ CHARON_ENCRYPTION_KEY_V2="<32-byte-base64-key>" ## Testing -### Unit Test Summary: +### Unit Test Summary + - ✅ **RotationService Tests**: 86.9% coverage - Initialization with various key combinations - Encryption/decryption with version tracking @@ -193,7 +221,8 @@ CHARON_ENCRYPTION_KEY_V2="<32-byte-base64-key>" - Pagination support - Async audit logging verification -### Test Execution: +### Test Execution + ```bash # Run all rotation-related tests cd backend @@ -205,6 +234,7 @@ go test ./internal/crypto ./internal/api/handlers -cover ``` ## Database Migrations + - GORM `AutoMigrate` handles schema changes automatically - New `key_version` column added to `dns_providers` table with default value of 1 - No manual SQL migration required per project standards diff --git a/docs/implementation/DOCS_TO_ISSUES_FIX_2026-01-11.md b/docs/implementation/DOCS_TO_ISSUES_FIX_2026-01-11.md index a9b177de..dfc8769c 100644 --- a/docs/implementation/DOCS_TO_ISSUES_FIX_2026-01-11.md +++ b/docs/implementation/DOCS_TO_ISSUES_FIX_2026-01-11.md @@ -32,11 +32,13 @@ Removed `[skip ci]` flag from workflow commit message while maintaining robust i `.github/workflows/docs-to-issues.yml` (Line 346) **Before:** + ```yaml git commit -m "chore: move processed issue files to created/ [skip ci]" ``` **After:** + ```yaml git commit -m "chore: move processed issue files to created/" # Removed [skip ci] to allow CI checks to run on PRs @@ -71,6 +73,7 @@ git commit -m "chore: move processed issue files to created/" **Level:** LOW **Justification:** + - Workflow-only change (no application code modified) - Multiple loop protection mechanisms (path filter + bot guard) - Enables CI validation (improves security posture) diff --git a/docs/implementation/DOCUMENTATION_COMPLETE_crowdsec_startup.md b/docs/implementation/DOCUMENTATION_COMPLETE_crowdsec_startup.md index b6ea4723..bc23b888 100644 --- a/docs/implementation/DOCUMENTATION_COMPLETE_crowdsec_startup.md +++ b/docs/implementation/DOCUMENTATION_COMPLETE_crowdsec_startup.md @@ -13,6 +13,7 @@ **File:** [docs/implementation/crowdsec_startup_fix_COMPLETE.md](implementation/crowdsec_startup_fix_COMPLETE.md) **Contents:** + - Executive summary of problem and solution - Before/after architecture diagrams (text-based) - Detailed implementation changes (4 files, 21 lines) @@ -32,6 +33,7 @@ **File:** [docs/migration-guide-crowdsec-auto-start.md](migration-guide-crowdsec-auto-start.md) **Contents:** + - Overview of behavioral changes - 4 migration paths (A: fresh install, B: upgrade disabled, C: upgrade enabled, D: environment variables) - Auto-start behavior explanation @@ -52,6 +54,7 @@ **File:** [docs/getting-started.md](getting-started.md#L110-L175) **Changes:** + - Expanded "Auto-Start Behavior" section - Added detailed explanation of reconciliation timing - Added mutex protection explanation @@ -68,6 +71,7 @@ **File:** [docs/security.md](security.md#L30-L122) **Changes:** + - Updated "How to Enable It" section - Changed timeout from 30s to 60s in documentation - Added reconciliation timing details @@ -88,6 +92,7 @@ **File:** [backend/internal/services/crowdsec_startup.go](../../backend/internal/services/crowdsec_startup.go#L17-L27) **Changes:** + - Added detailed explanation of why mutex is needed - Listed 3 scenarios where concurrent reconciliation could occur - Listed 4 race conditions prevented by mutex @@ -101,6 +106,7 @@ **File:** [backend/internal/services/crowdsec_startup.go](../../backend/internal/services/crowdsec_startup.go#L29-L50) **Changes:** + - Expanded function comment from 3 lines to 20 lines - Added initialization order diagram - Documented mutex protection behavior @@ -202,6 +208,7 @@ **Decision:** Create separate implementation summary and user migration guide **Rationale:** + - Implementation summary for developers (technical details, code changes) - Migration guide for users (step-by-step, troubleshooting, FAQ) - Allows different levels of detail for different audiences @@ -211,12 +218,14 @@ **Decision:** Use ASCII art and indented text for diagrams **Rationale:** + - Markdown-native (no external images) - Version control friendly - Easy to update - Accessible (screen readers can interpret) **Example:** + ``` Container Start ├─ Entrypoint Script @@ -234,6 +243,7 @@ Container Start **Decision:** Enhance inline code comments for mutex and reconciliation function **Rationale:** + - Comments visible in IDE (no need to open docs) - Future maintainers see explanation immediately - Reduces risk of outdated documentation @@ -244,6 +254,7 @@ Container Start **Decision:** Troubleshooting in both implementation summary AND migration guide **Rationale:** + - Developers need troubleshooting for implementation issues - Users need troubleshooting for operational issues - Slight overlap is acceptable (better than missing information) @@ -257,6 +268,7 @@ Container Start **Reason:** Config validation already present (lines 163-169) **Verification:** + ```bash # Verify LAPI configuration was applied correctly if grep -q "listen_uri:.*:8085" "$CS_CONFIG_DIR/config.yaml"; then @@ -281,6 +293,7 @@ No changes needed - this code already provides the necessary validation. ### When to Update Update documentation when: + - Timeout value changes (currently 60s) - Auto-start conditions change - Reconciliation logic modified @@ -300,6 +313,7 @@ Update documentation when: ### Review Checklist for Future Updates Before publishing documentation updates: + - [ ] Test all command examples - [ ] Verify expected outputs - [ ] Check cross-references @@ -348,15 +362,15 @@ Before publishing documentation updates: ### Short-Term (1-2 Weeks) -4. **Monitor GitHub Issues** for documentation gaps -5. **Update FAQ** based on common user questions -6. **Add screenshots** to migration guide (if users request) +1. **Monitor GitHub Issues** for documentation gaps +2. **Update FAQ** based on common user questions +3. **Add screenshots** to migration guide (if users request) ### Long-Term (1-3 Months) -7. **Create video tutorial** for auto-start behavior -8. **Add troubleshooting to wiki** for community contributions -9. **Translate documentation** to other languages (if community interest) +1. **Create video tutorial** for auto-start behavior +2. **Add troubleshooting to wiki** for community contributions +3. **Translate documentation** to other languages (if community interest) --- @@ -375,6 +389,7 @@ Before publishing documentation updates: ## Contact For documentation questions: + - **GitHub Issues:** [Report documentation issues](https://github.com/Wikid82/charon/issues) - **Discussions:** [Ask questions](https://github.com/Wikid82/charon/discussions) diff --git a/docs/implementation/FRONTEND_TESTING_PHASE2_3_COMPLETE.md b/docs/implementation/FRONTEND_TESTING_PHASE2_3_COMPLETE.md index 848ad63a..990f985b 100644 --- a/docs/implementation/FRONTEND_TESTING_PHASE2_3_COMPLETE.md +++ b/docs/implementation/FRONTEND_TESTING_PHASE2_3_COMPLETE.md @@ -11,10 +11,12 @@ Successfully completed Phases 2 and 3 of frontend component UI testing for the b ## Scope ### Phase 2: Component UI Tests + - **SystemSettings**: Application URL card testing (7 new tests) - **UsersPage**: URL preview in InviteModal (6 new tests) ### Phase 3: Edge Cases + - Error handling for API failures - Validation state management - Debounce functionality @@ -23,12 +25,14 @@ Successfully completed Phases 2 and 3 of frontend component UI testing for the b ## Test Results ### Summary + - **Total Test Files**: 2 - **Tests Passed**: 45/45 (100%) - **Tests Added**: 13 new component UI tests - **Test Duration**: 11.58s ### SystemSettings Application URL Card Tests (7 tests) + 1. ✅ Renders public URL input field 2. ✅ Shows green border and checkmark when URL is valid 3. ✅ Shows red border and X icon when URL is invalid @@ -39,6 +43,7 @@ Successfully completed Phases 2 and 3 of frontend component UI testing for the b 8. ✅ Handles validation API error gracefully ### UsersPage URL Preview Tests (6 tests) + 1. ✅ Shows URL preview when valid email is entered 2. ✅ Debounces URL preview for 500ms 3. ✅ Replaces sample token with ellipsis in preview @@ -49,6 +54,7 @@ Successfully completed Phases 2 and 3 of frontend component UI testing for the b ## Coverage Report ### Coverage Metrics + ``` File | % Stmts | % Branch | % Funcs | % Lines --------------------|---------|----------|---------|-------- @@ -57,6 +63,7 @@ UsersPage.tsx | 76.92 | 61.79 | 70.45 | 78.37 ``` ### Analysis + - **SystemSettings**: Strong coverage across all metrics (71-82%) - **UsersPage**: Good coverage with room for improvement in branch coverage @@ -82,6 +89,7 @@ UsersPage.tsx | 76.92 | 61.79 | 70.45 | 78.37 ### Testing Patterns Used #### Debounce Testing + ```typescript // Enter text await user.type(emailInput, 'test@example.com') @@ -94,6 +102,7 @@ expect(client.post).toHaveBeenCalledTimes(1) ``` #### Visual State Validation + ```typescript // Check for border color change const inputElement = screen.getByPlaceholderText('https://charon.example.com') @@ -101,6 +110,7 @@ expect(inputElement.className).toContain('border-green') ``` #### Icon Presence Testing + ```typescript // Find check icon by SVG path const checkIcon = screen.getByRole('img', { hidden: true }) @@ -110,6 +120,7 @@ expect(checkIcon).toBeTruthy() ## Files Modified ### Test Files + 1. `/frontend/src/pages/__tests__/SystemSettings.test.tsx` - Added `client` module mock with `post` method - Added 8 new tests for Application URL card @@ -131,12 +142,14 @@ expect(checkIcon).toBeTruthy() ## Recommendations ### For Future Work + 1. **Increase Branch Coverage**: Add tests for edge cases in conditional logic 2. **Integration Tests**: Consider E2E tests for URL validation flow 3. **Accessibility Testing**: Add tests for keyboard navigation and screen readers 4. **Performance**: Monitor test execution time as suite grows ### Testing Best Practices Applied + - ✅ User-facing locators (`getByRole`, `getByPlaceholderText`) - ✅ Auto-retrying assertions with `waitFor()` - ✅ Descriptive test names following "Feature - Action" pattern diff --git a/docs/implementation/FRONTEND_TEST_HANG_FIX.md b/docs/implementation/FRONTEND_TEST_HANG_FIX.md index d2c56649..3f7c7bae 100644 --- a/docs/implementation/FRONTEND_TEST_HANG_FIX.md +++ b/docs/implementation/FRONTEND_TEST_HANG_FIX.md @@ -1,9 +1,11 @@ # Frontend Test Hang Fix ## Problem + Frontend tests took 1972 seconds (33 minutes) instead of the expected 2-3 minutes. ## Root Cause + 1. Missing `frontend/src/setupTests.ts` file that was referenced in vite.config.ts 2. No test timeout configuration in Vitest 3. Outdated backend tests referencing non-existent functions @@ -11,7 +13,9 @@ Frontend tests took 1972 seconds (33 minutes) instead of the expected 2-3 minute ## Solutions Applied ### 1. Created Missing Setup File + **File:** `frontend/src/setupTests.ts` + ```typescript import '@testing-library/jest-dom' @@ -19,7 +23,9 @@ import '@testing-library/jest-dom' ``` ### 2. Added Test Timeouts + **File:** `frontend/vite.config.ts` + ```typescript test: { globals: true, @@ -32,6 +38,7 @@ test: { ``` ### 3. Fixed Backend Test Issues + - **Fixed:** `backend/internal/api/handlers/dns_provider_handler_test.go` - Updated `MockDNSProviderService.GetProviderCredentialFields` signature to match interface - Changed from `(required, optional []dnsprovider.CredentialFieldSpec, err error)` to `([]dnsprovider.CredentialFieldSpec, error)` @@ -45,10 +52,12 @@ test: { ## Results ### Before Fix + - Frontend tests: **1972 seconds (33 minutes)** - Status: Hanging, eventually passing ### After Fix + - Frontend tests: **88 seconds (1.5 minutes)** ✅ - Speed improvement: **22x faster** - Status: Passing reliably diff --git a/docs/implementation/GRYPE_SBOM_REMEDIATION.md b/docs/implementation/GRYPE_SBOM_REMEDIATION.md index d8651c7b..ca073ccd 100644 --- a/docs/implementation/GRYPE_SBOM_REMEDIATION.md +++ b/docs/implementation/GRYPE_SBOM_REMEDIATION.md @@ -101,6 +101,7 @@ syft ${IMAGE} -o cyclonedx-json > sbom-generated.json **Location**: After SBOM generation, before Grype scan **What it validates**: + - File exists and is non-empty - Valid JSON structure - Correct CycloneDX format @@ -138,6 +139,7 @@ syft ${IMAGE} -o cyclonedx-json > sbom-generated.json #### 5. Enhanced Grype Scanning **Changes**: + - Explicit path specification: `grype sbom:./sbom-generated.json` - Explicit database update before scanning - Better error handling with debug information @@ -214,6 +216,7 @@ if (!imageExists) { ### Pre-Deployment Testing **Test Case 1: Existing Image (Success Path)** + - Pulled `ghcr.io/wikid82/charon:latest` - Generated CycloneDX SBOM locally - Validated JSON structure with `jq` @@ -221,16 +224,19 @@ if (!imageExists) { - ✅ Result: All steps passed, vulnerabilities reported correctly **Test Case 2: Empty SBOM File** + - Created empty file: `touch empty.json` - Tested Grype scan: `grype sbom:./empty.json` - ✅ Result: Error detected and reported properly **Test Case 3: Invalid JSON** + - Created malformed file: `echo "{invalid json" > invalid.json` - Tested validation with `jq empty invalid.json` - ✅ Result: Validation failed as expected **Test Case 4: Missing CycloneDX Fields** + - Created incomplete SBOM: `echo '{"bomFormat":"test"}' > incomplete.json` - Tested Grype scan - ✅ Result: Format validation caught the issue @@ -238,17 +244,20 @@ if (!imageExists) { ### Post-Deployment Validation **Scenario 1: PR Without Image (Expected Skip)** + - Created test PR - Workflow ran, image check failed - ✅ Result: Clear skip message, no false errors **Scenario 2: Release with Image (Full Scan)** + - Tagged release on test branch - Image built and pushed - SBOM generated, validated, and scanned - ✅ Result: Complete scan with vulnerability report **Scenario 3: Manual Trigger** + - Manually triggered workflow - Image existed, full scan executed - ✅ Result: All steps completed successfully @@ -303,12 +312,14 @@ From [qa_report.md](../reports/qa_report.md): **Chosen**: CycloneDX-JSON **Rationale**: + - More widely adopted in cloud-native ecosystem - Native support in Docker SBOM action - Better tooling support (Grype, Trivy, etc.) - Aligns with docker-build.yml (single source of truth) **Trade-offs**: + - SPDX is ISO/IEC standard (more "official") - But CycloneDX has better tooling and community support - Can convert between formats if needed @@ -318,12 +329,14 @@ From [qa_report.md](../reports/qa_report.md): **Chosen**: Fail-fast with detailed errors **Rationale**: + - Original `exit 0` masked real problems - CI/CD should fail loudly on real errors - Silent failures are security vulnerabilities - Clear errors accelerate troubleshooting **Trade-offs**: + - May cause more visible failures initially - But failures are now actionable and fixable @@ -332,12 +345,14 @@ From [qa_report.md](../reports/qa_report.md): **Chosen**: Multi-step validation gate **Rationale**: + - Prevent garbage-in-garbage-out scenarios - Catch issues at earliest possible stage - Provide specific error messages per validation type - Separate file issues from Grype issues **Trade-offs**: + - Adds ~5 seconds to workflow - But eliminates hours of debugging cryptic errors @@ -346,11 +361,13 @@ From [qa_report.md](../reports/qa_report.md): **Chosen**: Conditional execution with explicit checks **Rationale**: + - GitHub Actions conditionals are clearer than bash error handling - Separate success paths from skip paths from error paths - Better step-by-step visibility in workflow UI **Trade-offs**: + - More verbose YAML - But much clearer intent and behavior @@ -363,6 +380,7 @@ From [qa_report.md](../reports/qa_report.md): **Goal**: Reuse SBOM from docker-build instead of regenerating **Approach**: + ```yaml - name: Retrieve Attested SBOM run: | @@ -376,12 +394,14 @@ From [qa_report.md](../reports/qa_report.md): ``` **Benefits**: + - Single source of truth (no duplication) - Uses verified, signed SBOM - Eliminates SBOM regeneration time - Aligns with supply chain best practices **Requirements**: + - GitHub CLI with attestation support - Attestation must be published to registry - Additional testing for attestation retrieval @@ -391,6 +411,7 @@ From [qa_report.md](../reports/qa_report.md): **Goal**: Alert on critical vulnerabilities immediately **Features**: + - Webhook notifications on HIGH/CRITICAL CVEs - Integration with existing notification system - Threshold-based alerting @@ -400,6 +421,7 @@ From [qa_report.md](../reports/qa_report.md): **Goal**: Track vulnerability counts over time **Features**: + - Store scan results in database - Trend analysis and reporting - Compliance reporting (zero-day tracking) diff --git a/docs/implementation/PHASE3_CONFIG_COVERAGE_COMPLETE.md b/docs/implementation/PHASE3_CONFIG_COVERAGE_COMPLETE.md index 49fa2f0c..952eb359 100644 --- a/docs/implementation/PHASE3_CONFIG_COVERAGE_COMPLETE.md +++ b/docs/implementation/PHASE3_CONFIG_COVERAGE_COMPLETE.md @@ -13,6 +13,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from ## Objectives Achieved ### Primary Goal: 85%+ Coverage ✅ + - **Baseline**: 79.82% (estimated from plan) - **Current**: 94.5% - **Improvement**: +14.68 percentage points @@ -37,6 +38,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from ## Tests Added (23 New Tests) ### 1. Access Log Path Configuration (4 tests) + - ✅ `TestGetAccessLogPath_CrowdSecEnabled`: Verifies standard path when CrowdSec enabled - ✅ `TestGetAccessLogPath_DockerEnv`: Verifies production path via CHARON_ENV - ✅ `TestGetAccessLogPath_Development`: Verifies development fallback path construction @@ -45,6 +47,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage Impact**: `getAccessLogPath` improved to 88.9% ### 2. Permissions Policy String Building (5 tests) + - ✅ `TestBuildPermissionsPolicyString_EmptyAllowlist`: Verifies `()` for empty allowlists - ✅ `TestBuildPermissionsPolicyString_SelfAndStar`: Verifies special `self` and `*` values - ✅ `TestBuildPermissionsPolicyString_DomainValues`: Verifies domain quoting @@ -54,12 +57,14 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage Impact**: `buildPermissionsPolicyString` improved to 100% ### 3. CSP String Building (2 tests) + - ✅ `TestBuildCSPString_EmptyDirective`: Verifies empty string handling - ✅ `TestBuildCSPString_InvalidJSON`: Verifies error handling **Coverage Impact**: `buildCSPString` improved to 100% ### 4. Security Headers Handler (1 comprehensive test) + - ✅ `TestBuildSecurityHeadersHandler_CompleteProfile`: Tests all 13 security headers: - HSTS with max-age, includeSubDomains, preload - Content-Security-Policy with multiple directives @@ -71,17 +76,20 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage Impact**: `buildSecurityHeadersHandler` improved to 100% ### 5. SSL Provider Configuration (2 tests) + - ✅ `TestGenerateConfig_SSLProviderZeroSSL`: Verifies ZeroSSL issuer configuration - ✅ `TestGenerateConfig_SSLProviderBoth`: Verifies dual ACME + ZeroSSL issuer setup **Coverage Impact**: Multi-issuer TLS automation policy generation tested ### 6. Duplicate Domain Handling (1 test) + - ✅ `TestGenerateConfig_DuplicateDomains`: Verifies Ghost Host detection (duplicate domain filtering) **Coverage Impact**: Domain deduplication logic fully tested ### 7. CrowdSec Integration (3 tests) + - ✅ `TestGenerateConfig_WithCrowdSecApp`: Verifies CrowdSec app-level configuration - ✅ `TestGenerateConfig_CrowdSecHandlerAdded`: Verifies CrowdSec handler in route pipeline - ✅ Existing tests cover CrowdSec API key retrieval @@ -89,6 +97,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage Impact**: CrowdSec configuration and handler injection fully tested ### 8. Security Decisions / IP Blocking (1 test) + - ✅ `TestGenerateConfig_WithSecurityDecisions`: Verifies manual IP block rules with admin whitelist exclusion **Coverage Impact**: Security decision subroute generation tested @@ -98,7 +107,9 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from ## Complex Logic Fully Tested ### Multi-Credential DNS Challenge ✅ + **Existing Integration Tests** (already present in codebase): + - `TestApplyConfig_MultiCredential_ExactMatch`: Zone-specific credential matching - `TestApplyConfig_MultiCredential_WildcardMatch`: Wildcard zone matching - `TestApplyConfig_MultiCredential_CatchAll`: Catch-all credential fallback @@ -108,7 +119,9 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage**: Lines 140-230 of config.go (multi-credential logic) already had **100% coverage** via integration tests. ### WAF Ruleset Selection ✅ + **Existing Tests**: + - `TestBuildWAFHandler_ParanoiaLevel`: Paranoia level 1-4 configuration - `TestBuildWAFHandler_Exclusions`: SecRuleRemoveById generation - `TestBuildWAFHandler_ExclusionsWithTarget`: SecRuleUpdateTargetById generation @@ -120,7 +133,9 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage**: Lines 850-920 (WAF handler building) had **100% coverage**. ### Rate Limit Bypass List ✅ + **Existing Tests**: + - `TestBuildRateLimitHandler_BypassList`: Subroute structure with bypass CIDRs - `TestBuildRateLimitHandler_BypassList_PlainIPs`: Plain IP to /32 CIDR conversion - `TestBuildRateLimitHandler_BypassList_InvalidEntries`: Invalid entry filtering @@ -131,7 +146,9 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Coverage**: Lines 1020-1050 (rate limit handler) had **100% coverage**. ### ACL Geo-Blocking CEL Expressions ✅ + **Existing Tests**: + - `TestBuildACLHandler_WhitelistAndBlacklistAdminMerge`: Admin whitelist merging - `TestBuildACLHandler_GeoAndLocalNetwork`: Geo whitelist/blacklist CEL, local network - `TestBuildACLHandler_AdminWhitelistParsing`: Admin whitelist parsing with empties @@ -145,6 +162,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from ### Remaining Uncovered Lines (6% total) #### 1. `getAccessLogPath` - 11.1% uncovered (2 lines) + **Uncovered Line**: `if _, err := os.Stat("/.dockerenv"); err == nil` **Reason**: Requires actual Docker environment (/.dockerenv file existence check) @@ -152,6 +170,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Testing Challenge**: Cannot reliably mock `os.Stat` in Go without dependency injection **Risk Assessment**: LOW + - This is an environment detection helper - Fallback logic is tested (CHARON_ENV check + development path) - Production Docker builds always have /.dockerenv file @@ -160,7 +179,9 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from **Mitigation**: Extensive manual testing in Docker containers confirms correct behavior #### 2. `GenerateConfig` - 6.8% uncovered (45 lines) + **Uncovered Sections**: + 1. **DNS Provider Not Found Warning** (1 line): `logger.Log().WithField("provider_id", providerID).Warn("DNS provider not found in decrypted configs")` - **Reason**: Requires deliberately corrupted DNS provider state (provider in hosts but not in configs map) - **Risk**: LOW - Database integrity constraints prevent this in production @@ -187,12 +208,14 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from ## Test Quality Metrics ### Test Organization + - ✅ All tests follow table-driven pattern where applicable - ✅ Clear test naming: `Test_` - ✅ Comprehensive fixtures for complex configurations - ✅ Parallel test execution safe (no shared state) ### Test Coverage Patterns + - ✅ **Happy Path**: All primary workflows tested - ✅ **Error Handling**: Invalid JSON, missing data, nil checks - ✅ **Edge Cases**: Empty strings, zero values, boundary conditions @@ -200,6 +223,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from - ✅ **Regression Prevention**: Duplicate domain handling (Ghost Host fix) ### Code Quality + - ✅ No breaking changes to existing tests - ✅ All 311 existing tests still pass - ✅ New tests use existing test helpers and patterns @@ -210,6 +234,7 @@ Successfully improved test coverage for `backend/internal/caddy/config.go` from ## Performance Metrics ### Test Execution Speed + ```bash $ go test -v ./backend/internal/caddy PASS @@ -226,12 +251,14 @@ ok github.com/Wikid82/charon/backend/internal/caddy 1.476s ## Files Modified ### Test Files + 1. `/projects/Charon/backend/internal/caddy/config_test.go` - Added 23 new tests - Added imports: `os`, `path/filepath` - Added comprehensive edge case tests - Total lines added: ~400 ### Production Files + - ✅ **Zero production code changes** (only tests added) --- @@ -239,6 +266,7 @@ ok github.com/Wikid82/charon/backend/internal/caddy 1.476s ## Validation ### All Tests Pass ✅ + ```bash $ cd /projects/Charon/backend/internal/caddy && go test -v === RUN TestGenerateConfig_Empty @@ -251,6 +279,7 @@ ok github.com/Wikid82/charon/backend/internal/caddy 1.476s ``` ### Coverage Reports + - ✅ HTML report: `/tmp/config_final_coverage.html` - ✅ Text report: `config_final.out` - ✅ Verified with: `go tool cover -func=config_final.out | grep config.go` @@ -260,9 +289,11 @@ ok github.com/Wikid82/charon/backend/internal/caddy 1.476s ## Recommendations ### Immediate Actions + - ✅ **None Required** - All objectives achieved ### Future Enhancements (Optional) + 1. **Docker Environment Testing**: Create integration test that runs in actual Docker container to test `/.dockerenv` detection - **Effort**: Low (add to CI pipeline) - **Value**: Marginal (behavior already verified manually) @@ -289,6 +320,7 @@ ok github.com/Wikid82/charon/backend/internal/caddy 1.476s - ✅ **Production Ready**: No code changes, only test improvements **Risk Assessment**: LOW - Remaining 5.5% uncovered code is: + - Environment detection (Docker check) - tested manually - Defensive logging and impossible states (database constraints) - Minor edge cases that don't affect functionality diff --git a/docs/implementation/PHASE3_MULTI_CREDENTIAL_COMPLETE.md b/docs/implementation/PHASE3_MULTI_CREDENTIAL_COMPLETE.md index 66461a82..1b378f12 100644 --- a/docs/implementation/PHASE3_MULTI_CREDENTIAL_COMPLETE.md +++ b/docs/implementation/PHASE3_MULTI_CREDENTIAL_COMPLETE.md @@ -13,9 +13,11 @@ Implemented Phase 3 from the DNS Future Features plan, adding support for multip ### 1. Database Models #### DNSProviderCredential Model + **File**: `backend/internal/models/dns_provider_credential.go` Created new model with the following fields: + - `ID`, `UUID` - Standard identifiers - `DNSProviderID` - Foreign key to DNSProvider - `Label` - Human-readable credential name @@ -28,20 +30,24 @@ Created new model with the following fields: - Timestamps: `CreatedAt`, `UpdatedAt` #### DNSProvider Model Extension + **File**: `backend/internal/models/dns_provider.go` Added fields: + - `UseMultiCredentials bool` - Flag to enable/disable multi-credential mode (default: `false`) - `Credentials []DNSProviderCredential` - GORM relationship ### 2. Services #### CredentialService + **File**: `backend/internal/services/credential_service.go` Implemented comprehensive credential management service: **Core Methods**: + - `List(providerID)` - List all credentials for a provider - `Get(providerID, credentialID)` - Get single credential - `Create(providerID, request)` - Create new credential with encryption @@ -51,18 +57,21 @@ Implemented comprehensive credential management service: - `EnableMultiCredentials(providerID)` - Migrate provider from single to multi-credential mode **Zone Matching Algorithm**: + - `GetCredentialForDomain(providerID, domain)` - Smart credential selection - **Priority**: Exact Match > Wildcard Match (`*.example.com`) > Catch-All (empty zone_filter) - **IDN Support**: Automatic punycode conversion via `golang.org/x/net/idna` - **Multiple Zones**: Single credential can handle multiple comma-separated zones **Security Features**: + - AES-256-GCM encryption with key version tracking (Phase 2 integration) - Credential validation per provider type (Cloudflare, Route53, etc.) - Audit logging for all CRUD operations via SecurityService - Context-based user/IP tracking **Test Coverage**: 19 comprehensive unit tests + - CRUD operations - Zone matching scenarios (exact, wildcard, catch-all, multiple zones, no match) - IDN domain handling @@ -72,6 +81,7 @@ Implemented comprehensive credential management service: ### 3. API Handlers #### CredentialHandler + **File**: `backend/internal/api/handlers/credential_handler.go` Implemented 7 RESTful endpoints: @@ -100,6 +110,7 @@ Implemented 7 RESTful endpoints: Enable multi-credential mode (migration workflow) **Features**: + - Parameter validation (provider ID, credential ID) - JSON request/response handling - Error handling with appropriate HTTP status codes @@ -118,6 +129,7 @@ Implemented 7 RESTful endpoints: ### 5. Backward Compatibility **Migration Strategy**: + - Existing providers default to `UseMultiCredentials = false` - Single-credential mode continues to work via `DNSProvider.CredentialsEncrypted` - `EnableMultiCredentials()` method migrates existing credential to new system: @@ -128,17 +140,20 @@ Implemented 7 RESTful endpoints: 5. Logs audit event for compliance **Fallback Behavior**: + - When `UseMultiCredentials = false`, system uses `DNSProvider.CredentialsEncrypted` - `GetCredentialForDomain()` returns error if multi-cred not enabled ## Testing ### Test Files Created + 1. `backend/internal/models/dns_provider_credential_test.go` - Model tests 2. `backend/internal/services/credential_service_test.go` - 19 service tests 3. `backend/internal/api/handlers/credential_handler_test.go` - 8 handler tests ### Test Infrastructure + - SQLite in-memory databases with unique names per test - WAL mode for concurrent access in handler tests - Shared cache to avoid "table not found" errors @@ -146,12 +161,14 @@ Implemented 7 RESTful endpoints: - Test encryption key: `"MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY="` (32-byte base64) ### Test Results + - ✅ All 19 service tests passing - ✅ All 8 handler tests passing - ✅ All 1 model test passing - ⚠️ Minor "database table is locked" warnings in audit logs (non-blocking) ### Coverage Targets + - Target: ≥85% coverage per project standards - Actual: Tests written for all core functionality - Models: Basic struct validation @@ -161,16 +178,19 @@ Implemented 7 RESTful endpoints: ## Integration Points ### Phase 2 Integration (Key Rotation) + - Uses `crypto.RotationService` for versioned encryption - Falls back to `crypto.EncryptionService` if rotation service unavailable - Tracks `KeyVersion` in database for rotation support ### Audit Logging Integration + - All CRUD operations logged via `SecurityService` - Captures: actor, action, resource ID/UUID, IP, user agent - Events: `credential_create`, `credential_update`, `credential_delete`, `multi_credential_enabled` ### Caddy Integration (Pending) + - **TODO**: Update `backend/internal/caddy/manager.go` to use `GetCredentialForDomain()` - Current: Uses `DNSProvider.CredentialsEncrypted` directly - Required: Conditional logic to use multi-credential when enabled @@ -197,6 +217,7 @@ Implemented 7 RESTful endpoints: ## Files Created/Modified ### Created + - `backend/internal/models/dns_provider_credential.go` (179 lines) - `backend/internal/services/credential_service.go` (629 lines) - `backend/internal/api/handlers/credential_handler.go` (276 lines) @@ -205,6 +226,7 @@ Implemented 7 RESTful endpoints: - `backend/internal/api/handlers/credential_handler_test.go` (334 lines) ### Modified + - `backend/internal/models/dns_provider.go` - Added `UseMultiCredentials` and `Credentials` relationship - `backend/internal/api/routes/routes.go` - Added AutoMigrate and route registration @@ -234,6 +256,7 @@ Implemented 7 RESTful endpoints: Phase 3 (Multi-Credential per Provider) is **COMPLETE** from a core functionality perspective. All database models, services, handlers, routes, and tests are implemented and passing. The feature is ready for integration testing and Caddy service updates. **Next Steps**: + 1. Update Caddy service to use zone-based credential selection 2. Run full integration tests 3. Update API documentation diff --git a/docs/implementation/PHASE4_FRONTEND_COMPLETE.md b/docs/implementation/PHASE4_FRONTEND_COMPLETE.md index 43e57ed1..fdd4e8eb 100644 --- a/docs/implementation/PHASE4_FRONTEND_COMPLETE.md +++ b/docs/implementation/PHASE4_FRONTEND_COMPLETE.md @@ -19,10 +19,12 @@ Implemented frontend integration for Phase 4 (DNS Provider Auto-Detection), enab **Purpose:** Provides typed API functions for DNS provider detection **Key Functions:** + - `detectDNSProvider(domain: string)` - Detects DNS provider for a domain - `getDetectionPatterns()` - Fetches built-in nameserver patterns **TypeScript Types:** + - `DetectionResult` - Detection response with confidence levels - `NameserverPattern` - Pattern matching rules @@ -35,6 +37,7 @@ Implemented frontend integration for Phase 4 (DNS Provider Auto-Detection), enab **Purpose:** Provides React hooks for DNS detection with caching **Key Hooks:** + - `useDetectDNSProvider()` - Mutation hook for detection (caches 1 hour) - `useCachedDetectionResult()` - Query hook for cached results - `useDetectionPatterns()` - Query hook for patterns (caches 24 hours) @@ -48,6 +51,7 @@ Implemented frontend integration for Phase 4 (DNS Provider Auto-Detection), enab **Purpose:** Displays detection results with visual feedback **Features:** + - Loading indicator during detection - Confidence badges (high/medium/low/none) - Action buttons for using suggested provider or manual selection @@ -61,6 +65,7 @@ Implemented frontend integration for Phase 4 (DNS Provider Auto-Detection), enab ### 4. ProxyHostForm Integration (`frontend/src/components/ProxyHostForm.tsx`) **Modifications:** + - Added auto-detection state and logic - Implemented 500ms debounced detection on wildcard domain entry - Auto-extracts base domain from wildcard (*.example.com → example.com) @@ -69,6 +74,7 @@ Implemented frontend integration for Phase 4 (DNS Provider Auto-Detection), enab - Integrated detection result display in form **Key Logic:** + ```typescript // Triggers detection when wildcard domain detected useEffect(() => { @@ -86,6 +92,7 @@ useEffect(() => { ### 5. Translations (`frontend/src/locales/en/translation.json`) **Added Keys:** + ```json { "dns_detection": { @@ -197,6 +204,7 @@ No errors or warnings observed during testing. ## Dependencies Added No new dependencies required - all features built with existing libraries: + - `@tanstack/react-query` (existing) - `react-i18next` (existing) - `lucide-react` (existing) @@ -245,6 +253,7 @@ No new dependencies required - all features built with existing libraries: Phase 4 DNS Provider Auto-Detection frontend integration is **COMPLETE** and ready for deployment. All acceptance criteria met, test coverage exceeds requirements (100% vs 85% target), and no TypeScript errors. **Next Steps:** + 1. Deploy backend Phase 4 implementation (if not already deployed) 2. Deploy frontend changes 3. Test end-to-end integration diff --git a/docs/implementation/PHASE4_SHORT_MODE_COMPLETE.md b/docs/implementation/PHASE4_SHORT_MODE_COMPLETE.md index a0cfc32d..bcba39b7 100644 --- a/docs/implementation/PHASE4_SHORT_MODE_COMPLETE.md +++ b/docs/implementation/PHASE4_SHORT_MODE_COMPLETE.md @@ -33,6 +33,7 @@ Added `testing.Short()` skips to all integration tests in `backend/integration/` Added `testing.Short()` skips to network-intensive unit tests: **`backend/internal/crowdsec/hub_sync_test.go` (7 tests):** + - `TestFetchIndexFallbackHTTP` - `TestFetchIndexHTTPRejectsRedirect` - `TestFetchIndexHTTPRejectsHTML` @@ -42,6 +43,7 @@ Added `testing.Short()` skips to network-intensive unit tests: - `TestFetchIndexHTTPFromURL_HTMLDetection` **`backend/internal/network/safeclient_test.go` (7 tests):** + - `TestNewSafeHTTPClient_WithAllowLocalhost` - `TestNewSafeHTTPClient_BlocksSSRF` - `TestNewSafeHTTPClient_WithMaxRedirects` @@ -54,7 +56,9 @@ Added `testing.Short()` skips to network-intensive unit tests: ### 3. Infrastructure Updates #### `.vscode/tasks.json` + Added new task: + ```json { "label": "Test: Backend Unit (Quick)", @@ -66,7 +70,9 @@ Added new task: ``` #### `.github/skills/test-backend-unit-scripts/run.sh` + Added SHORT_FLAG support: + ```bash SHORT_FLAG="" if [[ "${CHARON_TEST_SHORT:-false}" == "true" ]]; then @@ -80,6 +86,7 @@ fi ### Test Skip Verification **Integration tests with `-short`:** + ``` === RUN TestCerberusIntegration cerberus_integration_test.go:18: Skipping integration test in short mode @@ -93,6 +100,7 @@ ok github.com/Wikid82/charon/backend/integration 0.003s ``` **Heavy network tests with `-short`:** + ``` === RUN TestFetchIndexFallbackHTTP hub_sync_test.go:87: Skipping network I/O test in short mode @@ -103,11 +111,13 @@ ok github.com/Wikid82/charon/backend/integration 0.003s ### Performance Comparison **Short mode (fast tests only):** + - Total runtime: ~7m24s - Tests skipped: 21 (7 integration + 14 heavy network) - Ideal for: Local development, quick validation **Full mode (all tests):** + - Total runtime: ~8m30s+ - Tests skipped: 0 - Ideal for: CI/CD, pre-commit validation @@ -173,6 +183,7 @@ CHARON_TEST_SHORT=true go test ./... ## Pattern Applied All skips follow the standard pattern: + ```go func TestIntegration(t *testing.T) { if testing.Short() { @@ -194,6 +205,7 @@ func TestIntegration(t *testing.T) { ## Next Steps Phase 4 is complete. Ready to proceed with: + - Phase 5: Coverage analysis (if planned) - Phase 6: CI/CD optimization (if planned) - Or: Final documentation and performance metrics diff --git a/docs/implementation/PHASE5_CHECKLIST.md b/docs/implementation/PHASE5_CHECKLIST.md index dace1b7b..571c3560 100644 --- a/docs/implementation/PHASE5_CHECKLIST.md +++ b/docs/implementation/PHASE5_CHECKLIST.md @@ -8,6 +8,7 @@ ## Specification Requirements ### Core Requirements + - [x] Implement all 10 phases from specification - [x] Maintain backward compatibility - [x] 85%+ test coverage (achieved 88.0%) @@ -18,12 +19,14 @@ ### Phase-by-Phase Completion #### Phase 1: Plugin Interface & Registry + - [x] ProviderPlugin interface with 14 methods - [x] Thread-safe global registry - [x] Plugin-specific error types - [x] Interface version tracking (v1) #### Phase 2: Built-in Providers + - [x] Cloudflare - [x] AWS Route53 - [x] DigitalOcean @@ -37,6 +40,7 @@ - [x] Auto-registration via init() #### Phase 3: Plugin Loader + - [x] LoadAllPlugins() method - [x] LoadPlugin() method - [x] SHA-256 signature verification @@ -45,6 +49,7 @@ - [x] Database integration #### Phase 4: Database Model + - [x] Plugin model with all fields - [x] UUID primary key - [x] Status tracking (pending/loaded/error) @@ -53,6 +58,7 @@ - [x] AutoMigrate in routes.go #### Phase 5: API Handlers + - [x] ListPlugins endpoint - [x] GetPlugin endpoint - [x] EnablePlugin endpoint @@ -62,6 +68,7 @@ - [x] Usage checking before disable #### Phase 6: DNS Provider Service Integration + - [x] Remove hardcoded SupportedProviderTypes - [x] Remove hardcoded ProviderCredentialFields - [x] Add GetSupportedProviderTypes() @@ -70,6 +77,7 @@ - [x] Use provider.TestCredentials() #### Phase 7: Caddy Config Integration + - [x] Use provider.BuildCaddyConfig() - [x] Use provider.BuildCaddyConfigForZone() - [x] Use provider.PropagationTimeout() @@ -77,6 +85,7 @@ - [x] Remove hardcoded config logic #### Phase 8: Example Plugin + - [x] PowerDNS plugin implementation - [x] Package main with main() function - [x] Exported Plugin variable @@ -86,6 +95,7 @@ - [x] Compiles to .so file (14MB) #### Phase 9: Unit Tests + - [x] builtin_test.go (tests all 10 providers) - [x] plugin_loader_test.go (tests loading, signatures, permissions) - [x] Update dns_provider_handler_test.go (mock methods) @@ -93,6 +103,7 @@ - [x] All tests pass #### Phase 10: Integration + - [x] Import builtin providers in main.go - [x] Initialize plugin loader in main.go - [x] AutoMigrate Plugin in main.go @@ -104,23 +115,29 @@ ## Build Verification ### Backend Build + ```bash cd /projects/Charon/backend && go build -v ./... ``` + **Status**: ✅ SUCCESS ### PowerDNS Plugin Build + ```bash cd /projects/Charon/plugins/powerdns CGO_ENABLED=1 go build -buildmode=plugin -o powerdns.so main.go ``` + **Status**: ✅ SUCCESS (14MB) ### Test Coverage + ```bash cd /projects/Charon/backend go test -v -coverprofile=coverage.txt ./... ``` + **Status**: ✅ 88.0% (Required: 85%+) --- @@ -160,6 +177,7 @@ go test -v -coverprofile=coverage.txt ./... ## API Endpoints Verification All endpoints implemented: + - [x] `GET /admin/plugins` - [x] `GET /admin/plugins/:id` - [x] `POST /admin/plugins/:id/enable` diff --git a/docs/implementation/PHASE5_FINAL_STATUS.md b/docs/implementation/PHASE5_FINAL_STATUS.md index 68d51339..f93cb501 100644 --- a/docs/implementation/PHASE5_FINAL_STATUS.md +++ b/docs/implementation/PHASE5_FINAL_STATUS.md @@ -28,6 +28,7 @@ Phase 5 Custom DNS Provider Plugins Backend has been **successfully implemented* ## Implementation Highlights ### 1. Plugin Architecture ✅ + - Thread-safe global registry with RWMutex - Interface versioning (v1) for compatibility - Lifecycle hooks (Init/Cleanup) @@ -35,6 +36,7 @@ Phase 5 Custom DNS Provider Plugins Backend has been **successfully implemented* - Dual Caddy config builders ### 2. Built-in Providers (10) ✅ + ``` 1. Cloudflare 6. Namecheap 2. AWS Route53 7. GoDaddy @@ -44,6 +46,7 @@ Phase 5 Custom DNS Provider Plugins Backend has been **successfully implemented* ``` ### 3. Security Features ✅ + - SHA-256 signature verification - Directory permission validation - Platform restrictions (Linux/macOS only) @@ -51,6 +54,7 @@ Phase 5 Custom DNS Provider Plugins Backend has been **successfully implemented* - Admin-only API access ### 4. Example Plugin ✅ + - PowerDNS implementation complete - Compiles to 14MB shared object - Full ProviderPlugin interface @@ -58,6 +62,7 @@ Phase 5 Custom DNS Provider Plugins Backend has been **successfully implemented* - Build instructions documented ### 5. Test Coverage ✅ + ``` Overall Coverage: 85.1% Test Files: @@ -73,6 +78,7 @@ Test Results: ALL PASS ## File Inventory ### Created Files (18) + ``` backend/pkg/dnsprovider/builtin/ cloudflare.go, route53.go, digitalocean.go @@ -100,6 +106,7 @@ docs/implementation/ ``` ### Modified Files (5) + ``` backend/internal/services/dns_provider_service.go backend/internal/caddy/config.go @@ -115,12 +122,14 @@ backend/internal/api/handlers/dns_provider_handler_test.go ## Build Verification ### Backend Build + ```bash $ cd backend && go build -v ./... ✅ SUCCESS - All packages compile ``` ### PowerDNS Plugin Build + ```bash $ cd plugins/powerdns $ CGO_ENABLED=1 go build -buildmode=plugin -o powerdns.so main.go @@ -128,6 +137,7 @@ $ CGO_ENABLED=1 go build -buildmode=plugin -o powerdns.so main.go ``` ### Test Execution + ```bash $ cd backend && go test -v -coverprofile=coverage.txt ./... ✅ SUCCESS - 85.1% coverage (target: ≥85%) @@ -165,18 +175,21 @@ POST /api/admin/plugins/reload - Reload all plugins ## Known Limitations ### Platform Constraints + - **Linux/macOS Only**: Go plugin system limitation - **CGO Required**: Must build with `CGO_ENABLED=1` - **Version Matching**: Plugin and Charon must use same Go version - **Same Architecture**: x86-64, ARM64, etc. must match ### Operational Constraints + - **No Hot Reload**: Requires application restart to reload plugins - **Large Binaries**: Each plugin ~14MB (Go runtime embedded) - **Same Process**: Plugins run in same memory space as Charon - **Load Time**: ~100ms startup overhead per plugin ### Security Considerations + - **SHA-256 Only**: File integrity check, not cryptographic signing - **No Sandboxing**: Plugins have full process access - **Directory Permissions**: Relies on OS-level security @@ -186,11 +199,13 @@ POST /api/admin/plugins/reload - Reload all plugins ## Documentation ### User Documentation + - [PHASE5_PLUGINS_COMPLETE.md](./PHASE5_PLUGINS_COMPLETE.md) - Comprehensive implementation guide - [PHASE5_SUMMARY.md](./PHASE5_SUMMARY.md) - Quick reference summary - [PHASE5_CHECKLIST.md](./PHASE5_CHECKLIST.md) - Implementation checklist ### Developer Documentation + - [plugins/powerdns/README.md](../../plugins/powerdns/README.md) - Plugin development guide - Inline code documentation in all files - API endpoint documentation @@ -233,6 +248,7 @@ From specification: *"Return when: All backend code implemented, Tests passing w ## Next Steps ### Phase 6: Frontend Implementation + - Plugin management UI - Provider selection interface - Credential configuration forms @@ -240,6 +256,7 @@ From specification: *"Return when: All backend code implemented, Tests passing w - Real-time loading indicators ### Future Enhancements (Not Required) + - Cryptographic signing (GPG/RSA) - Hot reload capability - Plugin marketplace integration @@ -267,11 +284,13 @@ From specification: *"Return when: All backend code implemented, Tests passing w ## Quick Reference ### Environment Variables + ```bash CHARON_PLUGINS_DIR=/opt/charon/plugins ``` ### Build Commands + ```bash # Backend cd backend && go build -v ./... @@ -282,6 +301,7 @@ CGO_ENABLED=1 go build -buildmode=plugin -o yourplugin.so main.go ``` ### Test Commands + ```bash # Full test suite with coverage cd backend && go test -v -coverprofile=coverage.txt ./... @@ -291,6 +311,7 @@ go test -v ./pkg/dnsprovider/builtin/... ``` ### Plugin Deployment + ```bash mkdir -p /opt/charon/plugins cp yourplugin.so /opt/charon/plugins/ diff --git a/docs/implementation/PHASE5_FRONTEND_COMPLETE.md b/docs/implementation/PHASE5_FRONTEND_COMPLETE.md index 54ef98ce..41788e60 100644 --- a/docs/implementation/PHASE5_FRONTEND_COMPLETE.md +++ b/docs/implementation/PHASE5_FRONTEND_COMPLETE.md @@ -40,6 +40,7 @@ Implemented comprehensive API client with the following endpoints: - `getProviderFields(type)` - Get credential field definitions for a provider type **TypeScript Interfaces:** + - `PluginInfo` - Plugin metadata and status - `CredentialFieldSpec` - Dynamic credential field specification - `ProviderFieldsResponse` - Provider metadata with field definitions @@ -62,6 +63,7 @@ All mutations include automatic query invalidation for cache consistency. Full-featured admin page with: **Features:** + - List all plugins grouped by type (built-in vs external) - Status badges showing plugin state (loaded, error, disabled) - Enable/disable toggle for external plugins (built-in cannot be disabled) @@ -74,6 +76,7 @@ Full-featured admin page with: - Security warning about external plugins **UI Components Used:** + - PageShell for consistent layout - Cards for plugin display - Badges for status indicators @@ -87,6 +90,7 @@ Full-featured admin page with: Enhanced DNS provider form with: **Features:** + - Dynamic field fetching from backend via `useProviderFields()` - Automatic rendering of required and optional fields - Field types: text, password, textarea, select @@ -95,6 +99,7 @@ Enhanced DNS provider form with: - Seamless integration with existing form logic **Benefits:** + - External plugins automatically work in the UI - No frontend code changes needed for new providers - Consistent field rendering across all provider types @@ -102,9 +107,11 @@ Enhanced DNS provider form with: ### 5. Routing & Navigation **Route Added:** + - `/admin/plugins` - Plugin management page (admin-only) **Navigation Changes:** + - Added "Admin" section in sidebar - "Plugins" link under Admin section (🔌 icon) - New translations for "Admin" and "Plugins" @@ -114,6 +121,7 @@ Enhanced DNS provider form with: Added 30+ translation keys for plugin management: **Categories:** + - Plugin listing and status - Action buttons and modals - Error messages @@ -121,6 +129,7 @@ Added 30+ translation keys for plugin management: - Metadata display **Sample Keys:** + - `plugins.title` - "DNS Provider Plugins" - `plugins.reloadPlugins` - "Reload Plugins" - `plugins.cannotDisableBuiltIn` - "Built-in plugins cannot be disabled" @@ -134,6 +143,7 @@ Added 30+ translation keys for plugin management: **Coverage:** 19 tests, all passing **Test Suites:** + 1. `usePlugins()` - List fetching and error handling 2. `usePlugin(id)` - Single plugin fetch with enable/disable logic 3. `useProviderFields()` - Field definitions fetching with caching @@ -146,6 +156,7 @@ Added 30+ translation keys for plugin management: **Coverage:** 18 tests, all passing **Test Cases:** + - Page rendering and layout - Built-in plugins section display - External plugins section display @@ -202,29 +213,34 @@ Branches: 77.97% (2507/3215) ## Key Features ### 1. **Plugin Discovery** + - Automatic discovery of built-in providers - External plugin loading from disk - Plugin status tracking (loaded, error, pending) ### 2. **Plugin Management** + - Enable/disable external plugins - Reload plugins without restart - View plugin metadata (version, author, description) - Access plugin documentation links ### 3. **Dynamic Form Fields** + - Credential fields fetched from backend - Automatic field rendering (text, password, textarea, select) - Support for required and optional fields - Placeholder and hint text display ### 4. **Error Handling** + - Display plugin load errors - Show signature mismatch warnings - Handle API failures gracefully - Toast notifications for actions ### 5. **Security** + - Admin-only access to plugin management - Warning about external plugin risks - Signature verification (backend) @@ -237,6 +253,7 @@ Branches: 77.97% (2507/3215) The frontend integrates with existing backend endpoints: **Plugin Management:** + - `GET /api/v1/admin/plugins` - List plugins - `GET /api/v1/admin/plugins/:id` - Get plugin details - `POST /api/v1/admin/plugins/:id/enable` - Enable plugin @@ -244,6 +261,7 @@ The frontend integrates with existing backend endpoints: - `POST /api/v1/admin/plugins/reload` - Reload plugins **Dynamic Fields:** + - `GET /api/v1/dns-providers/types/:type/fields` - Get credential fields All endpoints are already implemented in the backend (Phase 5 backend complete). @@ -308,27 +326,32 @@ All endpoints are already implemented in the backend (Phase 5 backend complete). ## Design Decisions ### 1. **Query Caching** + - Plugin list cached with React Query - Provider fields cached for 1 hour (rarely change) - Automatic invalidation on mutations ### 2. **Error Boundaries** + - Graceful degradation if API fails - Fallback to static provider schemas - User-friendly error messages ### 3. **Loading States** + - Skeleton loaders during fetch - Button loading indicators during mutations - Empty states with helpful messages ### 4. **Accessibility** + - Proper semantic HTML - ARIA labels where needed - Keyboard navigation support - Screen reader friendly ### 5. **Mobile Responsive** + - Cards stack on small screens - Touch-friendly switches - Readable text sizes @@ -339,18 +362,21 @@ All endpoints are already implemented in the backend (Phase 5 backend complete). ## Testing Strategy ### Unit Testing + - All hooks tested in isolation - Mocked API responses - Query invalidation verified - Loading/error states covered ### Integration Testing + - Page rendering tested - User interactions simulated - React Query provider setup - i18n mocked appropriately ### Coverage Approach + - Focus on user-facing functionality - Critical paths fully covered - Error scenarios tested @@ -361,12 +387,14 @@ All endpoints are already implemented in the backend (Phase 5 backend complete). ## Known Limitations ### Go Plugin Constraints (Backend) + 1. **No Hot Reload:** Plugins cannot be unloaded from memory. Disabling a plugin removes it from the registry but requires restart for full unload. 2. **Platform Support:** Plugins only work on Linux and macOS (not Windows). 3. **Version Matching:** Plugin and Charon must use identical Go versions. 4. **Caddy Dependency:** External plugins require corresponding Caddy DNS module. ### Frontend Implications + 1. **Disable Warning:** Users warned that restart needed after disable. 2. **No Uninstall:** Frontend only enables/disables (no delete). 3. **Status Tracking:** Plugin status shows last known state until reload. @@ -376,11 +404,13 @@ All endpoints are already implemented in the backend (Phase 5 backend complete). ## Security Considerations ### Frontend + 1. **Admin-Only Access:** Plugin management requires admin role 2. **Warning Display:** Security notice about external plugins 3. **Error Visibility:** Load errors shown to help debug issues ### Backend (Already Implemented) + 1. **Signature Verification:** SHA-256 hash validation 2. **Allowlist Enforcement:** Only configured plugins loaded 3. **Sandbox Limitations:** Go plugins run in-process (no sandbox) @@ -390,6 +420,7 @@ All endpoints are already implemented in the backend (Phase 5 backend complete). ## Future Enhancements ### Potential Improvements + 1. **Plugin Marketplace:** Browse and install from registry 2. **Version Management:** Update plugins via UI 3. **Dependency Checking:** Verify Caddy module compatibility @@ -404,12 +435,14 @@ All endpoints are already implemented in the backend (Phase 5 backend complete). ## Documentation ### User Documentation + - Plugin management guide in Charon UI - Hover tooltips on all actions - Inline help text in forms - Links to provider documentation ### Developer Documentation + - API client fully typed with JSDoc - Hook usage examples in tests - Component props documented @@ -433,11 +466,13 @@ No database migrations or breaking changes - safe to rollback. ## Deployment Notes ### Prerequisites + - Backend Phase 5 complete - Plugin system enabled in backend - Admin users have access to /admin/* routes ### Configuration + - No additional frontend config required - Backend env vars control plugin system: - `CHARON_PLUGINS_ENABLED=true` @@ -445,6 +480,7 @@ No database migrations or breaking changes - safe to rollback. - `CHARON_PLUGINS_CONFIG=/app/config/plugins.yaml` ### Monitoring + - Watch for plugin load errors in logs - Monitor DNS provider test success rates - Track plugin enable/disable actions @@ -478,6 +514,7 @@ Phase 5 Frontend implementation is **complete and production-ready**. All requir External plugins can now be loaded, managed, and configured entirely through the Charon UI without code changes. The dynamic field system ensures that new providers automatically work in the DNS provider form as soon as they are loaded. **Next Steps:** + 1. ✅ Backend testing (already complete) 2. ✅ Frontend implementation (this document) 3. 🔄 End-to-end testing with sample plugin diff --git a/docs/implementation/PHASE5_PLUGINS_COMPLETE.md b/docs/implementation/PHASE5_PLUGINS_COMPLETE.md index f5d8bf06..c2771ea8 100644 --- a/docs/implementation/PHASE5_PLUGINS_COMPLETE.md +++ b/docs/implementation/PHASE5_PLUGINS_COMPLETE.md @@ -17,12 +17,15 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen ## Completed Phases (1-10) ### Phase 1: Plugin Interface and Registry ✅ + **Files**: + - `backend/pkg/dnsprovider/plugin.go` (pre-existing) - `backend/pkg/dnsprovider/registry.go` (pre-existing) - `backend/pkg/dnsprovider/errors.go` (fixed corruption) **Features**: + - `ProviderPlugin` interface with 14 methods - Thread-safe global registry with RWMutex - Interface version tracking (`v1`) @@ -31,9 +34,11 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen - Caddy config builder methods ### Phase 2: Built-in Provider Migration ✅ + **Directory**: `backend/pkg/dnsprovider/builtin/` **Providers Implemented** (10 total): + 1. **Cloudflare** - `cloudflare.go` - API token authentication - Optional zone_id @@ -80,32 +85,39 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen - 120s propagation, 5s polling **Auto-Registration**: `builtin/init.go` + - Package init() function registers all providers on import - Error logging for registration failures - Accessed via blank import in main.go ### Phase 3: Plugin Loader Service ✅ + **File**: `backend/internal/services/plugin_loader.go` **Security Features**: + - SHA-256 signature computation and verification - Directory permission validation (rejects world-writable) - Windows platform rejection (Go plugins require Linux/macOS) - Both `T` and `*T` symbol lookup (handles both value and pointer exports) **Database Integration**: + - Tracks plugin load status in `models.Plugin` - Statuses: pending, loaded, error - Records file path, signature, enabled flag, error message, load timestamp **Configuration**: + - Plugin directory from `CHARON_PLUGINS_DIR` environment variable - Defaults to `./plugins` if not set ### Phase 4: Plugin Database Model ✅ + **File**: `backend/internal/models/plugin.go` (pre-existing) **Fields**: + - `UUID` (string, indexed) - `FilePath` (string, unique index) - `Signature` (string, SHA-256) @@ -117,9 +129,11 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen **Migrations**: AutoMigrate in both `main.go` and `routes.go` ### Phase 5: Plugin API Handlers ✅ + **File**: `backend/internal/api/handlers/plugin_handler.go` **Endpoints** (all under `/admin/plugins`): + 1. `GET /` - List all plugins (merges registry with database records) 2. `GET /:id` - Get single plugin by UUID 3. `POST /:id/enable` - Enable a plugin (checks usage before disabling) @@ -129,9 +143,11 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen **Authorization**: All endpoints require admin authentication ### Phase 6: DNS Provider Service Integration ✅ + **File**: `backend/internal/services/dns_provider_service.go` **Changes**: + - Removed hardcoded `SupportedProviderTypes` array - Removed hardcoded `ProviderCredentialFields` map - Added `GetSupportedProviderTypes()` - queries `dnsprovider.Global().Types()` @@ -142,9 +158,11 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen **Backward Compatibility**: All existing functionality preserved, encryption maintained ### Phase 7: Caddy Config Builder Integration ✅ + **File**: `backend/internal/caddy/config.go` **Changes**: + - Multi-credential mode uses `provider.BuildCaddyConfigForZone()` - Single-credential mode uses `provider.BuildCaddyConfig()` - Propagation timeout from `provider.PropagationTimeout()` @@ -152,14 +170,17 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen - Removed hardcoded provider config logic ### Phase 8: PowerDNS Example Plugin ✅ + **Directory**: `plugins/powerdns/` **Files**: + - `main.go` - Full ProviderPlugin implementation - `README.md` - Build and usage instructions - `powerdns.so` - Compiled plugin (14MB) **Features**: + - Package: `main` (required for Go plugins) - Exported symbol: `Plugin` (type: `dnsprovider.ProviderPlugin`) - API connectivity testing in `TestCredentials()` @@ -167,14 +188,17 @@ Successfully implemented the complete Phase 5 Custom DNS Provider Plugins Backen - `main()` function (required but unused) **Build Command**: + ```bash CGO_ENABLED=1 go build -buildmode=plugin -o powerdns.so main.go ``` ### Phase 9: Unit Tests ✅ + **Coverage**: 88.0% (Required: 85%+) **Test Files**: + 1. `backend/pkg/dnsprovider/builtin/builtin_test.go` (NEW) - Tests all 10 built-in providers - Validates type, metadata, credentials, Caddy config @@ -190,11 +214,13 @@ CGO_ENABLED=1 go build -buildmode=plugin -o powerdns.so main.go - Added `dnsprovider` import **Test Execution**: + ```bash cd backend && go test -v -coverprofile=coverage.txt ./... ``` ### Phase 10: Main and Routes Integration ✅ + **Files Modified**: 1. `backend/cmd/api/main.go` @@ -213,18 +239,21 @@ cd backend && go test -v -coverprofile=coverage.txt ./... ## Architecture Decisions ### Registry Pattern + - **Global singleton**: `dnsprovider.Global()` provides single source of truth - **Thread-safe**: RWMutex protects concurrent access - **Sorted types**: `Types()` returns alphabetically sorted provider names - **Existence check**: `IsSupported()` for quick validation ### Security Model + - **Signature verification**: SHA-256 hash of plugin file - **Permission checks**: Reject world-writable directories (0o002) - **Platform restriction**: Reject Windows (Go plugin limitations) - **Sandbox execution**: Plugins run in same process but with limited scope ### Plugin Interface Design + - **Version tracking**: InterfaceVersion ensures compatibility - **Lifecycle hooks**: Init() for setup, Cleanup() for teardown - **Dual validation**: ValidateCredentials() for syntax, TestCredentials() for connectivity @@ -232,6 +261,7 @@ cd backend && go test -v -coverprofile=coverage.txt ./... - **Caddy integration**: BuildCaddyConfig() and BuildCaddyConfigForZone() methods ### Database Schema + - **UUID primary key**: Stable identifier for API operations - **File path uniqueness**: Prevents duplicate plugin loads - **Status tracking**: Pending → Loaded/Error state machine @@ -291,6 +321,7 @@ plugins/ ## API Endpoints ### List Plugins + ```http GET /admin/plugins Authorization: Bearer @@ -320,6 +351,7 @@ Response 200: ``` ### Get Plugin + ```http GET /admin/plugins/:uuid Authorization: Bearer @@ -338,6 +370,7 @@ Response 200: ``` ### Enable Plugin + ```http POST /admin/plugins/:uuid/enable Authorization: Bearer @@ -349,6 +382,7 @@ Response 200: ``` ### Disable Plugin + ```http POST /admin/plugins/:uuid/disable Authorization: Bearer @@ -365,6 +399,7 @@ Response 400 (if in use): ``` ### Reload Plugins + ```http POST /admin/plugins/reload Authorization: Bearer @@ -382,12 +417,14 @@ Response 200: ### Creating a Custom DNS Provider Plugin 1. **Create plugin directory**: + ```bash mkdir -p plugins/myprovider cd plugins/myprovider ``` -2. **Implement the interface** (`main.go`): +1. **Implement the interface** (`main.go`): + ```go package main @@ -426,12 +463,14 @@ func (p *MyProvider) Metadata() dnsprovider.ProviderMetadata { func main() {} ``` -3. **Build the plugin**: +1. **Build the plugin**: + ```bash CGO_ENABLED=1 go build -buildmode=plugin -o myprovider.so main.go ``` -4. **Deploy**: +1. **Deploy**: + ```bash mkdir -p /opt/charon/plugins cp myprovider.so /opt/charon/plugins/ @@ -439,13 +478,15 @@ chmod 755 /opt/charon/plugins chmod 644 /opt/charon/plugins/myprovider.so ``` -5. **Configure Charon**: +1. **Configure Charon**: + ```bash export CHARON_PLUGINS_DIR=/opt/charon/plugins ./charon ``` -6. **Verify loading** (check logs): +1. **Verify loading** (check logs): + ``` 2026-01-06 22:30:00 INFO Plugin loaded successfully: myprovider ``` @@ -479,6 +520,7 @@ curl -X POST \ ## Known Limitations ### Go Plugin Constraints + 1. **Platform**: Linux and macOS only (Windows not supported by Go) 2. **CGO Required**: Must build with `CGO_ENABLED=1` 3. **Version Matching**: Plugin must be compiled with same Go version as Charon @@ -486,12 +528,14 @@ curl -X POST \ 5. **Same Architecture**: Plugin and Charon must use same CPU architecture ### Security Considerations + 1. **Same Process**: Plugins run in same process as Charon (no sandboxing) 2. **Signature Only**: SHA-256 signature verification, but not cryptographic signing 3. **Directory Permissions**: Relies on OS permissions for plugin directory security 4. **No Isolation**: Plugins have access to entire application memory space ### Performance + 1. **Large Binaries**: Plugin .so files are ~14MB each (Go runtime included) 2. **Load Time**: Plugin loading adds ~100ms startup time per plugin 3. **No Unloading**: Once loaded, plugins cannot be unloaded without restart @@ -501,6 +545,7 @@ curl -X POST \ ## Testing ### Unit Tests + ```bash cd backend go test -v -coverprofile=coverage.txt ./... @@ -511,13 +556,15 @@ go test -v -coverprofile=coverage.txt ./... ### Manual Testing 1. **Test built-in provider registration**: + ```bash cd backend go run cmd/api/main.go # Check logs for "Registered builtin DNS provider: cloudflare" etc. ``` -2. **Test plugin loading**: +1. **Test plugin loading**: + ```bash export CHARON_PLUGINS_DIR=/projects/Charon/plugins cd backend @@ -525,7 +572,8 @@ go run cmd/api/main.go # Check logs for "Plugin loaded successfully: powerdns" ``` -3. **Test API endpoints**: +1. **Test API endpoints**: + ```bash # Get admin token TOKEN=$(curl -X POST http://localhost:8080/api/auth/login \ @@ -573,6 +621,7 @@ curl -H "Authorization: Bearer $TOKEN" \ ## Conclusion Phase 5 Custom DNS Provider Plugins Backend is **fully implemented** with: + - ✅ All 10 built-in providers migrated to plugin architecture - ✅ Secure plugin loading with signature verification - ✅ Complete API for plugin management diff --git a/docs/implementation/PHASE5_SUMMARY.md b/docs/implementation/PHASE5_SUMMARY.md index 5c62c9f8..df0c4a97 100644 --- a/docs/implementation/PHASE5_SUMMARY.md +++ b/docs/implementation/PHASE5_SUMMARY.md @@ -7,6 +7,7 @@ ## What Was Implemented ### 1. Plugin System Core (10 phases) + - ✅ Plugin interface and registry (pre-existing, validated) - ✅ 10 built-in DNS providers (Cloudflare, Route53, DigitalOcean, GCP, Azure, Namecheap, GoDaddy, Hetzner, Vultr, DNSimple) - ✅ Secure plugin loader with SHA-256 verification @@ -19,6 +20,7 @@ - ✅ Main.go and routes integration ### 2. Key Files Created + ``` backend/pkg/dnsprovider/builtin/ ├── cloudflare.go, route53.go, digitalocean.go @@ -41,6 +43,7 @@ plugins/powerdns/ ``` ### 3. Files Modified + ``` backend/internal/services/dns_provider_service.go - Removed hardcoded provider lists @@ -99,12 +102,14 @@ go test -v -coverprofile=coverage.txt ./... ``` ## Security Features + - ✅ SHA-256 signature verification - ✅ Directory permission validation (rejects world-writable) - ✅ Windows platform rejection (Go plugin limitation) - ✅ Usage checking (prevents disabling in-use plugins) ## Known Limitations + - Linux/macOS only (Go plugin constraint) - CGO required (`CGO_ENABLED=1`) - Same Go version required for plugin and Charon @@ -112,7 +117,9 @@ go test -v -coverprofile=coverage.txt ./... - ~14MB per plugin (Go runtime embedded) ## Next Steps + Frontend implementation (Phase 6) - Plugin management UI ## Documentation + See [PHASE5_PLUGINS_COMPLETE.md](./PHASE5_PLUGINS_COMPLETE.md) for full details. diff --git a/docs/implementation/PHASE_0_COMPLETE.md b/docs/implementation/PHASE_0_COMPLETE.md index 8d715579..c36faf37 100644 --- a/docs/implementation/PHASE_0_COMPLETE.md +++ b/docs/implementation/PHASE_0_COMPLETE.md @@ -31,6 +31,7 @@ Phase 0 validation and tooling infrastructure has been successfully implemented **File**: `.github/skills/scripts/validate-skills.py` **Features**: + - Validates all required frontmatter fields per agentskills.io spec - Checks name format (kebab-case), version format (semver), description length - Validates tags (minimum 2, maximum 5, lowercase) @@ -40,6 +41,7 @@ Phase 0 validation and tooling infrastructure has been successfully implemented - Execution permissions set **Test Results**: + ``` ✓ test-backend-coverage.SKILL.md is valid Validation Summary: @@ -55,6 +57,7 @@ Validation Summary: **File**: `.github/skills/scripts/skill-runner.sh` **Features**: + - Accepts skill name as argument - Locates skill's execution script (`{skill-name}-scripts/run.sh`) - Validates skill exists and is executable @@ -64,6 +67,7 @@ Validation Summary: - Execution permissions set **Test Results**: + ``` [INFO] Executing skill: test-backend-coverage [SUCCESS] Skill completed successfully: test-backend-coverage @@ -75,12 +79,14 @@ Exit code: 0 All helper scripts created and functional: **`_logging_helpers.sh`**: + - `log_info()`, `log_success()`, `log_warning()`, `log_error()`, `log_debug()` - `log_step()`, `log_command()` - Color support with terminal detection - NO_COLOR environment variable support **`_error_handling_helpers.sh`**: + - `error_exit()` - Print error and exit - `check_command_exists()`, `check_file_exists()`, `check_dir_exists()` - `run_with_retry()` - Retry logic with backoff @@ -88,6 +94,7 @@ All helper scripts created and functional: - `cleanup_on_exit()` - Register cleanup functions **`_environment_helpers.sh`**: + - `validate_go_environment()`, `validate_python_environment()`, `validate_node_environment()`, `validate_docker_environment()` - `set_default_env()` - Set env vars with defaults - `validate_project_structure()` - Check required files @@ -98,6 +105,7 @@ All helper scripts created and functional: **File**: `.github/skills/README.md` **Contents**: + - Complete overview of Agent Skills - Directory structure documentation - Available skills table @@ -114,6 +122,7 @@ All helper scripts created and functional: ### ✅ 6. .gitignore Updated **Changes Made**: + - Added Agent Skills runtime-only ignore patterns - Runtime temporary files: `.cache/`, `temp/`, `tmp/`, `*.tmp` - Execution logs: `logs/`, `*.log`, `nohup.out` @@ -122,6 +131,7 @@ All helper scripts created and functional: - **IMPORTANT**: SKILL.md files and scripts are NOT ignored (required for CI/CD) **Verification**: + ``` ✓ No SKILL.md files are ignored ✓ No scripts are ignored @@ -132,10 +142,12 @@ All helper scripts created and functional: **Skill**: `test-backend-coverage` **Files**: + - `.github/skills/test-backend-coverage.SKILL.md` - Complete skill definition - `.github/skills/test-backend-coverage-scripts/run.sh` - Execution wrapper **Features**: + - Complete YAML frontmatter following agentskills.io v1.0 spec - Progressive disclosure (under 500 lines) - Comprehensive documentation (prerequisites, usage, examples, error handling) @@ -146,6 +158,7 @@ All helper scripts created and functional: - Sets default environment variables **Frontmatter Compliance**: + - ✅ All required fields present (name, version, description, author, license, tags) - ✅ Name format: kebab-case - ✅ Version: semantic versioning (1.0.0) @@ -159,12 +172,14 @@ All helper scripts created and functional: ### ✅ 8. Infrastructure Tested **Test 1: Validation** + ```bash .github/skills/scripts/validate-skills.py --single .github/skills/test-backend-coverage.SKILL.md Result: ✓ test-backend-coverage.SKILL.md is valid ``` **Test 2: Skill Execution** + ```bash .github/skills/scripts/skill-runner.sh test-backend-coverage Result: Coverage 85.5% (minimum required 85%) @@ -173,6 +188,7 @@ Result: Coverage 85.5% (minimum required 85%) ``` **Test 3: Git Tracking** + ```bash git status --short .github/skills/ Result: 8 files staged (not ignored) @@ -185,16 +201,20 @@ Result: 8 files staged (not ignored) ## Success Criteria ### ✅ 1. validate-skills.py passes for proof-of-concept skill + - **Result**: PASS - **Evidence**: Validation completed with 0 errors, 0 warnings ### ✅ 2. skill-runner.sh successfully executes test-backend-coverage skill + - **Result**: PASS - **Evidence**: Skill executed successfully, exit code 0 ### ✅ 3. Backend coverage tests run and pass with ≥85% coverage + - **Result**: PASS (85.5%) - **Evidence**: + ``` total: (statements) 85.5% Computed coverage: 85.5% (minimum required 85%) @@ -202,30 +222,35 @@ Result: 8 files staged (not ignored) ``` ### ✅ 4. Git tracks all skill files (not ignored) + - **Result**: PASS - **Evidence**: All 8 skill files staged, 0 ignored ## Architecture Highlights ### Flat Structure + - Skills use flat naming: `{skill-name}.SKILL.md` - Scripts in: `{skill-name}-scripts/run.sh` - Maximum AI discoverability - Simpler references in tasks.json and workflows ### Helper Scripts Pattern + - All skills source shared helpers for consistency - Logging: Colored output, multiple levels, DEBUG mode - Error handling: Retry logic, validation, exit codes - Environment: Version checks, project structure validation ### Skill Runner Design + - Universal interface: `skill-runner.sh [args...]` - Validates skill existence and permissions - Changes to project root before execution - Proper error reporting with helpful messages ### Documentation Strategy + - README.md in skills directory for quick reference - Each SKILL.md is self-contained (< 500 lines) - Progressive disclosure for complex topics @@ -234,6 +259,7 @@ Result: 8 files staged (not ignored) ## Integration Points ### VS Code Tasks (Future) + ```json { "label": "Test: Backend with Coverage", @@ -243,12 +269,14 @@ Result: 8 files staged (not ignored) ``` ### GitHub Actions (Future) + ```yaml - name: Run Backend Tests with Coverage run: .github/skills/scripts/skill-runner.sh test-backend-coverage ``` ### Pre-commit Hooks (Future) + ```yaml - id: backend-coverage entry: .github/skills/scripts/skill-runner.sh test-backend-coverage @@ -274,6 +302,7 @@ Result: 8 files staged (not ignored) ## Next Steps ### Immediate (Phase 1) + 1. Create remaining test skills: - `test-backend-unit.SKILL.md` - `test-frontend-coverage.SKILL.md` @@ -282,11 +311,13 @@ Result: 8 files staged (not ignored) 3. Update GitHub Actions workflows ### Phase 2-4 + - Migrate integration tests, security scans, QA tests - Migrate utility and Docker skills - Complete documentation ### Phase 5 + - Generate skills index JSON for AI discovery - Create migration guide - Tag v1.0-beta.1 diff --git a/docs/implementation/PHASE_3_COMPLETE.md b/docs/implementation/PHASE_3_COMPLETE.md index aabf0bf4..4de89db6 100644 --- a/docs/implementation/PHASE_3_COMPLETE.md +++ b/docs/implementation/PHASE_3_COMPLETE.md @@ -20,6 +20,7 @@ Phase 3 successfully implements all security scanning and QA validation skills. **Purpose**: Run Trivy security scanner for vulnerabilities, secrets, and misconfigurations **Features**: + - Scans for vulnerabilities (CVEs in dependencies) - Detects exposed secrets (API keys, tokens) - Checks for misconfigurations (Docker, K8s, etc.) @@ -38,6 +39,7 @@ Phase 3 successfully implements all security scanning and QA validation skills. **Purpose**: Run Go vulnerability checker (govulncheck) to detect known vulnerabilities **Features**: + - Official Go vulnerability database - Reachability analysis (only reports used vulnerabilities) - Zero false positives @@ -56,6 +58,7 @@ Phase 3 successfully implements all security scanning and QA validation skills. **Purpose**: Run all pre-commit hooks for comprehensive code quality validation **Features**: + - Multi-language support (Python, Go, JavaScript/TypeScript, Markdown) - Auto-fixing hooks (formatting, whitespace) - Security checks (detect secrets, private keys) diff --git a/docs/implementation/PHASE_4_COMPLETE.md b/docs/implementation/PHASE_4_COMPLETE.md index 726c1a2c..654e898f 100644 --- a/docs/implementation/PHASE_4_COMPLETE.md +++ b/docs/implementation/PHASE_4_COMPLETE.md @@ -42,19 +42,19 @@ Phase 4 of the Agent Skills migration has been successfully completed. All 7 uti #### Docker Skills (3) -5. **docker-start-dev** +1. **docker-start-dev** - Location: `.github/skills/docker-start-dev.SKILL.md` - Purpose: Starts development Docker Compose environment - Wraps: `docker compose -f docker-compose.dev.yml up -d` - Status: ✅ Validated and functional -6. **docker-stop-dev** +2. **docker-stop-dev** - Location: `.github/skills/docker-stop-dev.SKILL.md` - Purpose: Stops development Docker Compose environment - Wraps: `docker compose -f docker-compose.dev.yml down` - Status: ✅ Validated and functional -7. **docker-prune** +3. **docker-prune** - Location: `.github/skills/docker-prune.SKILL.md` - Purpose: Cleans up unused Docker resources - Wraps: `docker system prune -f` @@ -63,6 +63,7 @@ Phase 4 of the Agent Skills migration has been successfully completed. All 7 uti ### ✅ Files Created #### Skill Documentation (7 files) + - `.github/skills/utility-version-check.SKILL.md` - `.github/skills/utility-clear-go-cache.SKILL.md` - `.github/skills/utility-bump-beta.SKILL.md` @@ -72,6 +73,7 @@ Phase 4 of the Agent Skills migration has been successfully completed. All 7 uti - `.github/skills/docker-prune.SKILL.md` #### Execution Scripts (7 files) + - `.github/skills/utility-version-check-scripts/run.sh` - `.github/skills/utility-clear-go-cache-scripts/run.sh` - `.github/skills/utility-bump-beta-scripts/run.sh` @@ -83,6 +85,7 @@ Phase 4 of the Agent Skills migration has been successfully completed. All 7 uti ### ✅ Tasks Updated (7 total) Updated in `.vscode/tasks.json`: + 1. **Utility: Check Version Match Tag** → `skill-runner.sh utility-version-check` 2. **Utility: Clear Go Cache** → `skill-runner.sh utility-clear-go-cache` 3. **Utility: Bump Beta Version** → `skill-runner.sh utility-bump-beta` @@ -139,6 +142,7 @@ Validation Summary: ### Tested Skills 1. **utility-version-check**: ✅ Successfully validated version against git tag + ``` [INFO] Executing skill: utility-version-check OK: .version matches latest Git tag v0.14.1 @@ -161,6 +165,7 @@ Validation Summary: ## Skill Documentation Quality All Phase 4 skills include: + - ✅ Complete YAML frontmatter (agentskills.io compliant) - ✅ Detailed overview and purpose - ✅ Prerequisites and requirements @@ -202,6 +207,7 @@ All skills integrate seamlessly with the skill-runner: ``` The skill-runner provides: + - Consistent logging and output formatting - Error handling and exit code propagation - Execution environment validation @@ -236,24 +242,28 @@ The skill-runner provides: ## Notable Skill Features ### utility-version-check + - Validates version consistency across repository - Non-blocking when no tags exist (allows initial development) - Normalizes version formats automatically - Used in CI/CD release workflows ### utility-clear-go-cache + - Comprehensive cache clearing (build, test, module, gopls) - Re-downloads modules after clearing - Provides clear next-steps instructions - Helpful for troubleshooting build issues ### utility-bump-beta + - Intelligent version bumping logic - Updates multiple files consistently (.version, package.json, version.go) - Interactive git commit/tag workflow - Prevents version drift across codebase ### utility-db-recovery + - Most comprehensive skill in Phase 4 (350+ lines of documentation) - Automatic environment detection (Docker vs local) - Multi-step recovery process with verification @@ -261,12 +271,14 @@ The skill-runner provides: - WAL mode configuration for durability ### docker-start-dev / docker-stop-dev + - Idempotent operations (safe to run multiple times) - Graceful shutdown with cleanup - Clear service startup/shutdown order - Volume preservation by default ### docker-prune + - Safe resource cleanup with force flag - Detailed disk space reporting - Protects volumes and running containers @@ -291,6 +303,7 @@ The skill-runner provides: **Phase 5**: Documentation & Cleanup (Days 12-13) Upcoming tasks: + - Create comprehensive migration guide - Create skill development guide - Generate skills index JSON for AI discovery @@ -302,6 +315,7 @@ Upcoming tasks: Phase 4 has been successfully completed with all 7 utility and Docker management skills created, validated, and integrated. The project now has 19 operational skills across 5 categories (Testing, Integration, Security, QA, Utility, Docker), achieving 79% of the migration target. All success criteria have been met: + - ✅ 7 new skills created and documented - ✅ 0 validation errors - ✅ All tasks.json references updated diff --git a/docs/implementation/PHASE_5_COMPLETE.md b/docs/implementation/PHASE_5_COMPLETE.md index cc05988f..8311c6d7 100644 --- a/docs/implementation/PHASE_5_COMPLETE.md +++ b/docs/implementation/PHASE_5_COMPLETE.md @@ -17,6 +17,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum **Location**: `README.md` **Changes Made:** + - Added comprehensive "Agent Skills" section after "Getting Help" - Explained what Agent Skills are and their benefits - Listed all 19 operational skills by category @@ -25,6 +26,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - Integrated seamlessly with existing content **Content Added:** + - Overview of Agent Skills concept - AI discoverability features - 5 usage methods (CLI, VS Code, Copilot, CI/CD) @@ -40,6 +42,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum **Location**: `CONTRIBUTING.md` **Changes Made:** + - Added comprehensive "Adding New Skills" section - Positioned between "Testing Guidelines" and "Pull Request Process" - Documented complete skill creation workflow @@ -47,6 +50,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - Added helper scripts reference guide **Content Added:** + 1. **What is a Skill?** - Explanation of YAML + Markdown + Script structure 2. **When to Create a Skill** - Clear use cases and examples 3. **Skill Creation Process** - 8-step detailed guide: @@ -87,6 +91,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum 12. `scripts/db-recovery.sh` → `utility-db-recovery` **Warning Format:** + ```bash ⚠️ DEPRECATED: This script is deprecated and will be removed in v2.0.0 Please use: .github/skills/scripts/skill-runner.sh @@ -94,6 +99,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum ``` **User Experience:** + - Clear warning message on stderr - Non-blocking (script continues to work) - 1-second pause for visibility @@ -101,6 +107,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - Link to migration documentation **Scripts NOT Requiring Deprecation Warnings** (7): + - `test-backend-unit` and `test-frontend-unit` (created from inline tasks, no legacy script) - `security-scan-go-vuln` (created from inline command, no legacy script) - `qa-precommit-all` (wraps pre-commit run, no legacy script) @@ -184,6 +191,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - Contribution process **Statistics in Document:** + - 79% migration completion (19/24 skills) - 100% validation pass rate (19/19 skills) - Backward compatibility maintained until v2.0.0 @@ -222,11 +230,13 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - ✅ Version timeline consistent (v2.0.0 removal) **File Path Accuracy:** + - ✅ All links use correct relative paths - ✅ No broken references - ✅ Skill file names match actual files in `.github/skills/` **Skill Count Consistency:** + - ✅ README.md: 19 skills - ✅ .github/skills/README.md: 19 skills in table - ✅ Migration guide: 19 skills listed @@ -253,6 +263,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum ## Documentation Quality ### README.md Agent Skills Section + - ✅ Clear introduction to Agent Skills concept - ✅ Practical usage examples (CLI, VS Code, Copilot) - ✅ Category breakdown with skill counts @@ -260,6 +271,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - ✅ Seamless integration with existing content ### CONTRIBUTING.md Skill Creation Guide + - ✅ Step-by-step process (8 steps) - ✅ Complete SKILL.md template - ✅ Validation requirements documented @@ -268,6 +280,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - ✅ Resources and links provided ### Migration Guide (docs/AGENT_SKILLS_MIGRATION.md) + - ✅ Executive summary with key benefits - ✅ Before/after comparison - ✅ Complete migration statistics @@ -281,6 +294,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - ✅ Resource links and support channels ### Deprecation Warnings + - ✅ Clear and non-blocking - ✅ Actionable guidance provided - ✅ Link to migration documentation @@ -312,20 +326,24 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum ## Usage Examples Provided ### Command Line (4 examples) + - Backend testing - Integration testing - Security scanning - Utility operations ### VS Code Tasks (2 examples) + - Task menu navigation - Keyboard shortcuts ### GitHub Copilot (4 examples) + - Natural language queries - AI-assisted discovery ### CI/CD (2 examples) + - GitHub Actions integration - Workflow patterns @@ -343,18 +361,21 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum ## Impact Assessment ### User Experience + - **Discoverability**: ⬆️ Significant improvement with AI assistance - **Documentation**: ⬆️ Self-contained, comprehensive skill docs - **Usability**: ⬆️ Multiple access methods (CLI, VS Code, Copilot) - **Migration**: ⚠️ Minimal friction (legacy scripts still work) ### Developer Experience + - **Onboarding**: ⬆️ Clear contribution guide in CONTRIBUTING.md - **Maintenance**: ⬆️ Standardized format easier to update - **Validation**: ⬆️ Automated checks prevent errors - **Consistency**: ⬆️ Helper scripts reduce boilerplate ### Project Health + - **Standards Compliance**: ✅ Follows agentskills.io specification - **AI Integration**: ✅ GitHub Copilot ready - **Documentation Quality**: ✅ Comprehensive and consistent @@ -363,11 +384,13 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum ## Files Modified in Phase 5 ### Documentation Files (3 major updates) + 1. `README.md` - Agent Skills section added 2. `CONTRIBUTING.md` - Skill creation guide added 3. `docs/AGENT_SKILLS_MIGRATION.md` - Migration guide created ### Legacy Scripts (12 deprecation notices) + 1. `scripts/go-test-coverage.sh` 2. `scripts/frontend-test-coverage.sh` 3. `scripts/integration-test.sh` @@ -388,6 +411,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum **Phase 6**: Full Migration & Legacy Cleanup (Future) **Not Yet Scheduled:** + - Monitor v1.0-beta.1 for issues (2 weeks minimum) - Address any discovered problems - Remove legacy scripts (v2.0.0) @@ -396,6 +420,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - Tag release v2.0.0 **Current Phase 5 Prepares For:** + - Clear migration path for users - Documented deprecation timeline - Comprehensive troubleshooting resources @@ -418,6 +443,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum ## Validation Results ### Documentation Consistency + - ✅ All skill names consistent across docs - ✅ All file paths verified - ✅ All cross-references working @@ -425,6 +451,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - ✅ Skill count matches (19) across all docs ### Deprecation Warnings + - ✅ All 12 legacy scripts updated - ✅ Consistent warning format - ✅ Correct skill names referenced @@ -432,6 +459,7 @@ Phase 5 of the Agent Skills migration has been successfully completed. All docum - ✅ Version timeline accurate ### Content Quality + - ✅ Clear and actionable instructions - ✅ Multiple examples provided - ✅ Troubleshooting sections included @@ -448,6 +476,7 @@ Phase 5 has been successfully completed with all documentation updated, deprecat - **Deprecation Communication**: 12 legacy scripts with clear warnings All success criteria have been met: + - ✅ README.md updated with Agent Skills section - ✅ CONTRIBUTING.md updated with skill creation guidelines - ✅ Deprecation notices added to 12 applicable scripts diff --git a/docs/implementation/PR450_TEST_COVERAGE_COMPLETE.md b/docs/implementation/PR450_TEST_COVERAGE_COMPLETE.md index 25eb841a..d0e8117f 100644 --- a/docs/implementation/PR450_TEST_COVERAGE_COMPLETE.md +++ b/docs/implementation/PR450_TEST_COVERAGE_COMPLETE.md @@ -72,6 +72,7 @@ CodeQL's taint analysis could not verify that user-controlled input (`rawURL`) w The fix maintains **layered security**: **Layer 1 - Input Validation** (`security.ValidateExternalURL`): + - Validates URL format - Checks for private IP ranges - Blocks localhost/loopback (optional) @@ -79,12 +80,14 @@ The fix maintains **layered security**: - Performs DNS resolution and IP validation **Layer 2 - Connection-Time Validation** (`ssrfSafeDialer`): + - Re-validates IP at TCP dial time (TOCTOU protection) - Blocks private IPs: RFC 1918, loopback, link-local - Blocks IPv6 private ranges (fc00::/7) - Blocks reserved ranges **Layer 3 - HTTP Client Configuration**: + - Strict timeout configuration (5s connect, 10s total) - No redirects allowed - Custom User-Agent header @@ -95,6 +98,7 @@ The fix maintains **layered security**: **Coverage**: 90.2% ✅ **Comprehensive Tests**: + - ✅ `TestValidateExternalURL_MultipleOptions` - ✅ `TestValidateExternalURL_CustomTimeout` - ✅ `TestValidateExternalURL_DNSTimeout` @@ -122,6 +126,7 @@ The fix maintains **layered security**: ### Files Modified **Primary Files**: + - `internal/api/handlers/security_handler.go` - `internal/api/handlers/security_handler_test.go` - `internal/api/middleware/security.go` @@ -141,6 +146,7 @@ The fix maintains **layered security**: ### Test Patterns Added **SSRF Protection Tests**: + ```go // Security notification webhooks TestSecurityNotificationService_ValidateWebhook @@ -169,6 +175,7 @@ TestValidateExternalURL_IPV6Validation ### Files Modified **Primary Files**: + - `frontend/src/pages/Security.tsx` - `frontend/src/pages/__tests__/Security.test.tsx` - `frontend/src/pages/__tests__/Security.errors.test.tsx` @@ -189,6 +196,7 @@ TestValidateExternalURL_IPV6Validation ### Test Coverage Breakdown **Security Page Tests**: + - ✅ Component rendering with all cards visible - ✅ WAF enable/disable toggle functionality - ✅ CrowdSec enable/disable with LAPI health checks @@ -199,6 +207,7 @@ TestValidateExternalURL_IPV6Validation - ✅ Toast notifications on success/error **Security API Tests**: + - ✅ `getSecurityStatus()` - Fetch all security states - ✅ `toggleWAF()` - Enable/disable Web Application Firewall - ✅ `toggleCrowdSec()` - Enable/disable CrowdSec with LAPI checks @@ -207,6 +216,7 @@ TestValidateExternalURL_IPV6Validation - ✅ `updateNotificationSettings()` - Save notification webhooks **Custom Hook Tests** (`useSecurity`): + - ✅ Initial state management - ✅ Security status fetching with React Query - ✅ Mutation handling for toggles @@ -221,6 +231,7 @@ TestValidateExternalURL_IPV6Validation ### Files Modified **Primary Files**: + - `backend/integration/security_integration_test.go` - `backend/integration/crowdsec_integration_test.go` - `backend/integration/waf_integration_test.go` @@ -228,6 +239,7 @@ TestValidateExternalURL_IPV6Validation ### Test Scenarios **Security Integration Tests**: + - ✅ WAF + CrowdSec coexistence (no conflicts) - ✅ Rate limiting + WAF combined enforcement - ✅ Handler pipeline order verification @@ -235,6 +247,7 @@ TestValidateExternalURL_IPV6Validation - ✅ Legitimate traffic passes through all layers **CrowdSec Integration Tests**: + - ✅ LAPI startup health checks - ✅ Console enrollment with retry logic - ✅ Hub item installation and updates @@ -242,6 +255,7 @@ TestValidateExternalURL_IPV6Validation - ✅ Bouncer integration with Caddy **WAF Integration Tests**: + - ✅ OWASP Core Rule Set detection - ✅ SQL injection pattern blocking - ✅ XSS vector detection @@ -255,6 +269,7 @@ TestValidateExternalURL_IPV6Validation ### Files Modified **Primary Files**: + - `backend/internal/utils/ip_helpers.go` - `backend/internal/utils/ip_helpers_test.go` - `frontend/src/utils/__tests__/crowdsecExport.test.ts` @@ -269,6 +284,7 @@ TestValidateExternalURL_IPV6Validation ### Test Patterns Added **IP Validation Tests**: + ```go TestIsPrivateIP_IPv4Comprehensive TestIsPrivateIP_IPv6Comprehensive @@ -277,6 +293,7 @@ TestParseIPFromString_AllFormats ``` **Frontend Utility Tests**: + ```typescript // CrowdSec export utilities test('formatDecisionForExport - handles all fields') @@ -329,6 +346,7 @@ test('exportDecisionsToJSON - validates structure') | `src/utils` | 96.49% | 83.33% | 100% | 97.4% | ✅ | **Test Results**: + - **Total Tests**: 1,174 passed, 2 skipped (1,176 total) - **Test Files**: 107 passed - **Duration**: 167.44s @@ -355,6 +373,7 @@ test('exportDecisionsToJSON - validates structure') **Status**: ⚠️ **Database Created Successfully** - Analysis command path issue (non-blocking) **Manual Review**: CWE-918 SSRF fix manually verified: + - ✅ Taint chain broken by new `requestURL` variable - ✅ Defense-in-depth architecture preserved - ✅ All SSRF protection tests passing @@ -382,15 +401,18 @@ test('exportDecisionsToJSON - validates structure') For detailed manual testing procedures, see: **Security Testing**: + - [SSRF Complete Implementation](SSRF_COMPLETE.md) - Technical details of CWE-918 fix - [Security Coverage QA Plan](../plans/SECURITY_COVERAGE_QA_PLAN.md) - Comprehensive test scenarios **Integration Testing**: + - [Cerberus Integration Testing Plan](../plans/cerberus_integration_testing_plan.md) - [CrowdSec Testing Plan](../plans/crowdsec_testing_plan.md) - [WAF Testing Plan](../plans/waf_testing_plan.md) **UI/UX Testing**: + - [Cerberus UI/UX Testing Plan](../plans/cerberus_uiux_testing_plan.md) --- @@ -462,6 +484,7 @@ cd frontend && npm run type-check ``` **Documentation**: + - [QA Report](../reports/qa_report.md) - Comprehensive audit results - [SSRF Complete](SSRF_COMPLETE.md) - Detailed SSRF remediation - [CHANGELOG.md](../../CHANGELOG.md) - User-facing changes diff --git a/docs/implementation/QUICK_FIX_SUPPLY_CHAIN.md b/docs/implementation/QUICK_FIX_SUPPLY_CHAIN.md index 0ef455f9..a30f4620 100644 --- a/docs/implementation/QUICK_FIX_SUPPLY_CHAIN.md +++ b/docs/implementation/QUICK_FIX_SUPPLY_CHAIN.md @@ -33,6 +33,7 @@ RUN go get github.com/expr-lang/expr@v1.17.7 && \ ``` **All CVEs are fixed:** + - ✅ CVE-2025-58183 (archive/tar) - Fixed in Go 1.25.2+ - ✅ CVE-2025-58186 (net/http) - Fixed in Go 1.25.2+ - ✅ CVE-2025-58187 (crypto/x509) - Fixed in Go 1.25.3+ @@ -117,6 +118,7 @@ docker build -t charon:local . ## Need More Details? See full analysis: + - [Supply Chain Scan Analysis](./SUPPLY_CHAIN_SCAN_ANALYSIS.md) - [Detailed Remediation Plan](./SUPPLY_CHAIN_REMEDIATION_PLAN.md) diff --git a/docs/implementation/SSRF_COMPLETE.md b/docs/implementation/SSRF_COMPLETE.md index f85fbfea..b8f25298 100644 --- a/docs/implementation/SSRF_COMPLETE.md +++ b/docs/implementation/SSRF_COMPLETE.md @@ -35,12 +35,14 @@ This document provides a comprehensive summary of the complete Server-Side Reque **Attack Scenario**: An authenticated admin user could supply a URL pointing to internal resources (localhost, private networks, cloud metadata endpoints), causing the server to make requests to these targets. This could lead to: + - Information disclosure about internal network topology - Access to cloud provider metadata services (AWS: 169.254.169.254) - Port scanning of internal services - Exploitation of trust relationships **Original Code Flow**: + ``` User Input (req.URL) ↓ @@ -108,15 +110,18 @@ The complete remediation implements a four-layer security model: #### Key Functions ##### `ssrfSafeDialer()` (Lines 15-45) + **Purpose**: Custom HTTP dialer that validates IP addresses at connection time **Security Controls**: + - DNS resolution with context timeout (prevents DNS slowloris) - Validates **ALL** resolved IPs before connection (prevents IP hopping) - Uses first valid IP only (prevents DNS rebinding) - Atomic resolution → validation → connection sequence (prevents TOCTOU) **Code Snippet**: + ```go func ssrfSafeDialer() func(ctx context.Context, network, addr string) (net.Conn, error) { return func(ctx context.Context, network, addr string) (net.Conn, error) { @@ -147,14 +152,17 @@ func ssrfSafeDialer() func(ctx context.Context, network, addr string) (net.Conn, ``` **Why This Works**: + 1. DNS resolution happens **inside the dialer**, at the moment of connection 2. Even if DNS changes between validations, the second resolution catches it 3. All IPs are validated (prevents round-robin DNS bypass) ##### `TestURLConnectivity()` (Lines 55-133) + **Purpose**: Server-side URL connectivity testing with SSRF protection **Security Controls**: + - Scheme validation (http/https only) - blocks `file://`, `ftp://`, `gopher://`, etc. - Integration with `ssrfSafeDialer()` for runtime protection - Redirect protection (max 2 redirects) @@ -162,6 +170,7 @@ func ssrfSafeDialer() func(ctx context.Context, network, addr string) (net.Conn, - Custom User-Agent header **Code Snippet**: + ```go // Create HTTP client with SSRF-safe dialer transport := &http.Transport{ @@ -182,9 +191,11 @@ client := &http.Client{ ``` ##### `isPrivateIP()` (Lines 136-182) + **Purpose**: Comprehensive IP address validation **Protected Ranges** (13+ CIDR blocks): + - ✅ RFC 1918 Private IPv4: `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16` - ✅ Loopback: `127.0.0.0/8`, `::1/128` - ✅ Link-local (AWS/GCP metadata): `169.254.0.0/16`, `fe80::/10` @@ -194,6 +205,7 @@ client := &http.Client{ - ✅ IPv6 Documentation: `2001:db8::/32` **Code Snippet**: + ```go // Cloud metadata service protection (critical!) _, linkLocal, _ := net.ParseCIDR("169.254.0.0/16") @@ -215,6 +227,7 @@ if linkLocal.Contains(ip) { #### TestPublicURL Handler (Lines 269-325) **Access Control**: + ```go // Requires admin role role, exists := c.Get("role") @@ -227,6 +240,7 @@ if !exists || role != "admin" { **Validation Layers**: **Step 1: Format Validation** + ```go normalized, _, err := utils.ValidateURL(req.URL) if err != nil { @@ -239,6 +253,7 @@ if err != nil { ``` **Step 2: SSRF Pre-Validation (Critical - Breaks Taint Chain)** + ```go // This step breaks the CodeQL taint chain by returning a NEW validated value validatedURL, err := security.ValidateExternalURL(normalized, security.WithAllowHTTP()) @@ -254,18 +269,21 @@ if err != nil { ``` **Why This Breaks the Taint Chain**: + 1. `security.ValidateExternalURL()` performs DNS resolution and IP validation 2. Returns a **new string value** (not a passthrough) 3. CodeQL's taint tracking sees the data flow break here 4. The returned `validatedURL` is treated as untainted **Step 3: Connectivity Test** + ```go // Use validatedURL (NOT req.URL) for network operation reachable, latency, err := utils.TestURLConnectivity(validatedURL) ``` **HTTP Status Code Strategy**: + - `400 Bad Request` → Format validation failures (invalid scheme, paths, malformed JSON) - `200 OK` → SSRF blocks and connectivity failures (returns `reachable: false` with error details) - `403 Forbidden` → Non-admin users @@ -273,6 +291,7 @@ reachable, latency, err := utils.TestURLConnectivity(validatedURL) **Rationale**: SSRF blocks are connectivity constraints, not request format errors. Returning 200 allows clients to distinguish between "URL malformed" vs "URL blocked by security policy". **Documentation**: + ```go // TestPublicURL performs a server-side connectivity test with comprehensive SSRF protection. // This endpoint implements defense-in-depth security: @@ -291,16 +310,19 @@ reachable, latency, err := utils.TestURLConnectivity(validatedURL) ### 4.1 DNS Rebinding / TOCTOU Attacks **Attack Scenario**: + 1. **Check Time (T1)**: Handler calls `ValidateExternalURL()` which resolves `attacker.com` → `1.2.3.4` (public IP) ✅ 2. Attacker changes DNS record 3. **Use Time (T2)**: `TestURLConnectivity()` resolves `attacker.com` again → `127.0.0.1` (private IP) ❌ SSRF! **Our Defense**: + - `ssrfSafeDialer()` performs **second DNS resolution** at connection time - Even if DNS changes between T1 and T2, Layer 4 catches the attack - Atomic sequence: resolve → validate → connect (no window for rebinding) **Test Evidence**: + ``` ✅ TestSettingsHandler_TestPublicURL_SSRFProtection/blocks_localhost (0.00s) ✅ TestSettingsHandler_TestPublicURL_SSRFProtection/blocks_127.0.0.1 (0.00s) @@ -309,15 +331,18 @@ reachable, latency, err := utils.TestURLConnectivity(validatedURL) ### 4.2 URL Parser Differential Attacks **Attack Scenario**: + ``` http://evil.com@127.0.0.1/ ``` Some parsers interpret this as: + - User: `evil.com` - Host: `127.0.0.1` ← SSRF target **Our Defense**: + ```go // In security/url_validator.go if parsed.User != nil { @@ -326,6 +351,7 @@ if parsed.User != nil { ``` **Test Evidence**: + ``` ✅ TestSettingsHandler_TestPublicURL_EmbeddedCredentials (0.00s) ``` @@ -333,11 +359,13 @@ if parsed.User != nil { ### 4.3 Cloud Metadata Endpoint Access **Attack Scenario**: + ``` http://169.254.169.254/latest/meta-data/iam/security-credentials/ ``` **Our Defense**: + ```go // Both Layer 2 and Layer 4 block link-local ranges _, linkLocal, _ := net.ParseCIDR("169.254.0.0/16") @@ -347,6 +375,7 @@ if linkLocal.Contains(ip) { ``` **Test Evidence**: + ``` ✅ TestSettingsHandler_TestPublicURL_PrivateIPBlocked/blocks_cloud_metadata (0.00s) ✅ TestSettingsHandler_TestPublicURL_SSRFProtection/blocks_cloud_metadata (0.00s) @@ -355,6 +384,7 @@ if linkLocal.Contains(ip) { ### 4.4 Protocol Smuggling **Attack Scenario**: + ``` file:///etc/passwd ftp://internal.server/data @@ -362,6 +392,7 @@ gopher://internal.server:70/ ``` **Our Defense**: + ```go // Layer 1: Format validation if parsed.Scheme != "http" && parsed.Scheme != "https" { @@ -370,6 +401,7 @@ if parsed.Scheme != "http" && parsed.Scheme != "https" { ``` **Test Evidence**: + ``` ✅ TestSettingsHandler_TestPublicURL_InvalidScheme/ftp_scheme (0.00s) ✅ TestSettingsHandler_TestPublicURL_InvalidScheme/file_scheme (0.00s) @@ -379,11 +411,13 @@ if parsed.Scheme != "http" && parsed.Scheme != "https" { ### 4.5 Redirect Chain Abuse **Attack Scenario**: + 1. Request: `https://evil.com/redirect` 2. Redirect 1: `http://evil.com/redirect2` 3. Redirect 2: `http://127.0.0.1/admin` **Our Defense**: + ```go client := &http.Client{ CheckRedirect: func(req *http.Request, via []*http.Request) error { @@ -445,6 +479,7 @@ client := &http.Client{ **Backend Overall**: 86.4% (exceeds 85% threshold) **SSRF Protection Modules**: + - `internal/api/handlers/settings_handler.go`: 100% (TestPublicURL handler) - `internal/utils/url_testing.go`: 88.0% (Runtime protection) - `internal/security/url_validator.go`: 100% (ValidateExternalURL) @@ -493,6 +528,7 @@ Sink: http.NewRequestWithContext() - no taint detected ``` **Why This Works**: + 1. `ValidateExternalURL()` performs DNS resolution and IP validation 2. Returns a **new string value**, not a passthrough 3. Static analysis sees data transformation: tainted input → validated output @@ -503,6 +539,7 @@ Sink: http.NewRequestWithContext() - no taint detected ### 6.3 Expected CodeQL Result After implementation: + - ✅ `go/ssrf` finding should be cleared - ✅ No new findings introduced - ✅ Future scans should not flag this pattern @@ -522,6 +559,7 @@ After implementation: | Valid public URL | 200 | `{"reachable": true/false, "latency": ...}` | Normal operation | **Why 200 for SSRF Blocks?**: + - SSRF validation is a *connectivity constraint*, not a request format error - Frontend expects 200 with structured JSON containing `reachable` boolean - Allows clients to distinguish: "URL malformed" (400) vs "URL blocked by policy" (200) @@ -532,6 +570,7 @@ After implementation: ### 7.2 Response Format **Success (public URL reachable)**: + ```json { "reachable": true, @@ -541,6 +580,7 @@ After implementation: ``` **SSRF Block**: + ```json { "reachable": false, @@ -550,6 +590,7 @@ After implementation: ``` **Format Error**: + ```json { "reachable": false, @@ -576,6 +617,7 @@ After implementation: ### 8.2 CWE-918 Mitigation **Mitigated Attack Vectors**: + 1. ✅ DNS Rebinding: Atomic validation at connection time 2. ✅ Cloud Metadata Access: 169.254.0.0/16 explicitly blocked 3. ✅ Private Network Access: RFC 1918 ranges blocked @@ -590,6 +632,7 @@ After implementation: ### 9.1 Latency Analysis **Added Overhead**: + - DNS resolution (Layer 2): ~10-50ms (typical) - IP validation (Layer 2): <1ms (in-memory CIDR checks) - DNS re-resolution (Layer 4): ~10-50ms (typical) @@ -612,6 +655,7 @@ After implementation: ### 10.1 Logging **SSRF Blocks are Logged**: + ```go log.WithFields(log.Fields{ "url": rawURL, @@ -627,6 +671,7 @@ log.WithFields(log.Fields{ ### 10.2 Monitoring **Metrics to Monitor**: + - SSRF block count (aggregated from logs) - TestPublicURL endpoint latency (should remain <500ms for public URLs) - DNS resolution failures @@ -652,12 +697,12 @@ log.WithFields(log.Fields{ ### Standards and Guidelines -- **OWASP SSRF**: https://owasp.org/www-community/attacks/Server_Side_Request_Forgery -- **CWE-918**: https://cwe.mitre.org/data/definitions/918.html -- **RFC 1918 (Private IPv4)**: https://datatracker.ietf.org/doc/html/rfc1918 -- **RFC 4193 (IPv6 Unique Local)**: https://datatracker.ietf.org/doc/html/rfc4193 -- **DNS Rebinding Attacks**: https://en.wikipedia.org/wiki/DNS_rebinding -- **TOCTOU Vulnerabilities**: https://cwe.mitre.org/data/definitions/367.html +- **OWASP SSRF**: +- **CWE-918**: +- **RFC 1918 (Private IPv4)**: +- **RFC 4193 (IPv6 Unique Local)**: +- **DNS Rebinding Attacks**: +- **TOCTOU Vulnerabilities**: ### Implementation Files diff --git a/docs/implementation/SSRF_REMEDIATION_COMPLETE.md b/docs/implementation/SSRF_REMEDIATION_COMPLETE.md index a88fdc74..153b9c58 100644 --- a/docs/implementation/SSRF_REMEDIATION_COMPLETE.md +++ b/docs/implementation/SSRF_REMEDIATION_COMPLETE.md @@ -13,6 +13,7 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec ### Phase 1: Security Utility Package ✅ **Files Created:** + - `/backend/internal/security/url_validator.go` (195 lines) - `ValidateExternalURL()` - Main validation function with comprehensive SSRF protection - `isPrivateIP()` - Helper checking 13+ CIDR blocks (RFC 1918, loopback, link-local, AWS/GCP metadata ranges) @@ -24,6 +25,7 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec - Real-world webhook format tests (Slack, Discord, GitHub) **Defense-in-Depth Layers:** + 1. URL parsing and format validation 2. Scheme enforcement (HTTPS-only for production) 3. DNS resolution with timeout @@ -31,6 +33,7 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec 5. HTTP client configuration (redirects, timeouts) **Blocked IP Ranges:** + - RFC 1918 private networks: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 - Loopback: 127.0.0.0/8, ::1/128 - Link-local: 169.254.0.0/16 (AWS/GCP metadata), fe80::/10 @@ -40,9 +43,11 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec ### Phase 2: Vulnerability Fixes ✅ #### CRITICAL-001: Security Notification Webhook ✅ + **Impact**: Attacker-controlled webhook URLs could access internal services **Files Modified:** + 1. `/backend/internal/services/security_notification_service.go` - Added SSRF validation to `sendWebhook()` (lines 95-120) - Logging: SSRF attempts logged with HIGH severity @@ -56,22 +61,27 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec **Protection:** Dual-layer validation (at save time AND at send time) #### CRITICAL-002: Update Service GitHub API ✅ + **Impact**: Compromised update URLs could redirect to malicious servers **File Modified:** `/backend/internal/services/update_service.go` + - Modified `SetAPIURL()` - now returns error (breaking change) - Validation: HTTPS required for GitHub domains - Allowlist: `api.github.com`, `github.com` - Test exception: Accepts localhost for `httptest.Server` compatibility **Test Files Updated:** + - `/backend/internal/services/update_service_test.go` - `/backend/internal/api/handlers/update_handler_test.go` #### HIGH-001: CrowdSec Hub URL Validation ✅ + **Impact**: Malicious preset URLs could fetch from attacker-controlled servers **File Modified:** `/backend/internal/crowdsec/hub_sync.go` + - Created `validateHubURL()` function (60 lines) - Modified `fetchIndexHTTPFromURL()` - validates before request - Modified `fetchWithLimitFromURL()` - validates before request @@ -81,9 +91,11 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec **Protection:** All hub fetches now validate URLs through centralized function #### MEDIUM-001: CrowdSec LAPI URL Validation ✅ + **Impact**: Malicious LAPI URLs could leak decision data to external servers **File Modified:** `/backend/internal/crowdsec/registration.go` + - Created `validateLAPIURL()` function (50 lines) - Modified `EnsureBouncerRegistered()` - validates before requests - Security-first approach: **Only localhost allowed** @@ -94,12 +106,14 @@ Successfully implemented comprehensive Server-Side Request Forgery (SSRF) protec ## Test Results ### Security Package Tests ✅ + ``` ok github.com/Wikid82/charon/backend/internal/security 0.107s coverage: 90.4% of statements ``` **Test Suites:** + - TestValidateExternalURL_BasicValidation (14 cases) - TestValidateExternalURL_LocalhostHandling (6 cases) - TestValidateExternalURL_PrivateIPBlocking (8 cases) @@ -108,18 +122,21 @@ coverage: 90.4% of statements - TestValidateExternalURL_Options (4 cases) ### CrowdSec Tests ✅ + ``` ok github.com/Wikid82/charon/backend/internal/crowdsec 12.590s coverage: 82.1% of statements ``` All 97 CrowdSec tests passing, including: + - Hub sync validation tests - Registration validation tests - Console enrollment tests - Preset caching tests ### Services Tests ✅ + ``` ok github.com/Wikid82/charon/backend/internal/services 41.727s coverage: 82.9% of statements @@ -128,12 +145,14 @@ coverage: 82.9% of statements Security notification service tests passing. ### Static Analysis ✅ + ```bash $ go vet ./... # No warnings - clean ``` ### Overall Coverage + ``` total: (statements) 84.8% ``` @@ -143,6 +162,7 @@ total: (statements) 84.8% ## Security Improvements ### Before + - ❌ No URL validation - ❌ Webhook URLs accepted without checks - ❌ Update service URLs unvalidated @@ -150,6 +170,7 @@ total: (statements) 84.8% - ❌ LAPI URLs could point anywhere ### After + - ✅ Comprehensive SSRF protection utility - ✅ Dual-layer webhook validation (save + send) - ✅ GitHub domain allowlist for updates @@ -161,10 +182,12 @@ total: (statements) 84.8% ## Files Changed Summary ### New Files (2) + 1. `/backend/internal/security/url_validator.go` 2. `/backend/internal/security/url_validator_test.go` ### Modified Files (7) + 1. `/backend/internal/services/security_notification_service.go` 2. `/backend/internal/api/handlers/security_notifications.go` 3. `/backend/internal/services/update_service.go` @@ -178,16 +201,19 @@ total: (statements) 84.8% ## Pending Work ### MEDIUM-002: CrowdSec Handler Validation ⚠️ + **Status**: Not yet implemented (lower priority) **File**: `/backend/internal/crowdsec/crowdsec_handler.go` **Impact**: Potential SSRF in CrowdSec decision endpoints **Reason for Deferral:** + - MEDIUM priority (lower risk) - Requires understanding of handler flow - Phase 1 & 2 addressed all CRITICAL and HIGH issues ### Handler Test Suite Issue ⚠️ + **Status**: Pre-existing test failure (unrelated to SSRF work) **File**: `/backend/internal/api/handlers/` **Coverage**: 84.4% (passing) @@ -196,15 +222,19 @@ total: (statements) 84.8% ## Deployment Notes ### Breaking Changes + - `update_service.SetAPIURL()` now returns error (was void) - All callers updated in this implementation - External consumers will need to handle error return ### Configuration + No configuration changes required. All validations use secure defaults. ### Monitoring + SSRF attempts are logged with structured fields: + ```go logger.Log().WithFields(logrus.Fields{ "url": blockedURL, @@ -236,6 +266,7 @@ logger.Log().WithFields(logrus.Fields{ ## Performance Impact Minimal overhead: + - URL parsing: ~10-50μs - DNS resolution: ~50-200ms (cached by OS) - IP validation: <1μs @@ -245,9 +276,11 @@ Validation is only performed when URLs are updated (configuration changes), not ## Security Assessment ### OWASP Top 10 Compliance + - **A10:2021 - Server-Side Request Forgery (SSRF)**: ✅ Mitigated ### Defense-in-Depth Layers + 1. ✅ Input validation (URL format, scheme) 2. ✅ Allowlisting (known safe domains) 3. ✅ DNS resolution with timeout @@ -256,6 +289,7 @@ Validation is only performed when URLs are updated (configuration changes), not 6. ✅ Fail-fast principle (validate on save) ### Residual Risk + - **MEDIUM-002**: Deferred handler validation (lower priority) - **Test Coverage**: 84.8% vs 85% target (0.2% gap, non-SSRF code) @@ -266,12 +300,14 @@ Validation is only performed when URLs are updated (configuration changes), not All critical and high-priority SSRF vulnerabilities have been addressed with comprehensive validation, testing, and logging. The implementation follows security best practices with defense-in-depth protection and user-friendly error handling. **Next Steps:** + 1. Deploy to production with monitoring enabled 2. Set up alerts for SSRF attempts 3. Address MEDIUM-002 in future sprint (lower priority) 4. Monitor logs for any unexpected validation failures **Approval Required From:** + - Security Team: Review SSRF protection implementation - QA Team: Validate user-facing error messages - Operations Team: Configure SSRF attempt monitoring diff --git a/docs/implementation/STATICCHECK_FINALIZATION_SUMMARY.md b/docs/implementation/STATICCHECK_FINALIZATION_SUMMARY.md index 7b366114..ae3468bc 100644 --- a/docs/implementation/STATICCHECK_FINALIZATION_SUMMARY.md +++ b/docs/implementation/STATICCHECK_FINALIZATION_SUMMARY.md @@ -16,12 +16,14 @@ All documentation for the staticcheck pre-commit blocking integration has been f ## 1. Documentation Tasks Completed ### ✅ Task 1: Archive Current Plan + - **Action:** Moved `docs/plans/current_spec.md` to archive - **Location:** `docs/plans/archive/staticcheck_blocking_integration_2026-01-11.md` - **Status:** ✅ Complete (34,051 bytes archived) - **New Template:** Created empty `docs/plans/current_spec.md` with instructions ### ✅ Task 2: README.md Updates + - **Status:** ✅ Already complete from implementation - **Content Verified:** - golangci-lint installation instructions present (line 188) @@ -29,6 +31,7 @@ All documentation for the staticcheck pre-commit blocking integration has been f - Quick reference for contributors included ### ✅ Task 3: CHANGELOG.md Verification + - **Status:** ✅ Verified and complete - **Content:** - All changes documented under `## [Unreleased]` @@ -40,6 +43,7 @@ All documentation for the staticcheck pre-commit blocking integration has been f - Duplicate headings (standard CHANGELOG structure - acceptable) ### ✅ Task 4: Documentation Files Review + All files reviewed and verified for completeness: | File | Status | Size | Notes | @@ -50,6 +54,7 @@ All files reviewed and verified for completeness: | `CONTRIBUTING.md` | ✅ Complete | 711 lines | golangci-lint installation instructions | ### ✅ Task 5: Manual Testing Checklist Created + - **File:** `docs/issues/staticcheck_manual_testing.md` - **Status:** ✅ Complete (434 lines) - **Content:** @@ -60,6 +65,7 @@ All files reviewed and verified for completeness: - Bug reporting template included ### ✅ Task 6: Final Documentation Sweep + - **Broken Links:** ✅ None found - **File References:** ✅ All correct - **Markdown Formatting:** ✅ Consistent (minor linting warnings acceptable) @@ -104,7 +110,9 @@ All files reviewed and verified for completeness: ## 3. Documentation Coverage by Audience ### For Developers (Implementation) + ✅ **Complete** + - Installation instructions (CONTRIBUTING.md) - Pre-commit hook behavior (copilot-instructions.md) - Troubleshooting guide (copilot-instructions.md) @@ -112,7 +120,9 @@ All files reviewed and verified for completeness: - VS Code task documentation (copilot-instructions.md) ### For QA/Reviewers + ✅ **Complete** + - QA validation report (qa_report.md) - All Definition of Done items verified - Security scan results documented @@ -120,7 +130,9 @@ All files reviewed and verified for completeness: - Manual testing procedures provided ### For Project Management + ✅ **Complete** + - Implementation summary (STATICCHECK_BLOCKING_INTEGRATION_COMPLETE.md) - Specification archived (archive/staticcheck_blocking_integration_2026-01-11.md) - CHANGELOG updated with breaking changes @@ -128,7 +140,9 @@ All files reviewed and verified for completeness: - Future work recommendations included ### For End Users + ✅ **Complete** + - README.md updated with golangci-lint requirement - Emergency bypass procedure documented - Clear error messages in pre-commit hooks @@ -189,6 +203,7 @@ All files reviewed and verified for completeness: ### Repository Structure Compliance ✅ **All files correctly placed** per `.github/instructions/structure.instructions.md`: + - Implementation docs → `docs/implementation/` - Plans archive → `docs/plans/archive/` - QA reports → `docs/reports/` @@ -199,6 +214,7 @@ All files reviewed and verified for completeness: ### File Naming Conventions ✅ **All files follow conventions:** + - Implementation: `*_COMPLETE.md` - Archive: `*_YYYY-MM-DD.md` - Reports: `qa_*.md` @@ -214,12 +230,14 @@ All files reviewed and verified for completeness: **QA Report:** ✅ Clean **Manual Testing:** ✅ Clean **CHANGELOG.md:** ⚠️ Minor warnings (acceptable) + - Line length warnings (CHANGELOG format standard) - Duplicate headings (standard CHANGELOG structure) ### Link Validation ✅ **All internal links verified:** + - Implementation → Archive: ✅ Updated - QA Report → Spec: ✅ Correct - README → CONTRIBUTING: ✅ Valid @@ -228,6 +246,7 @@ All files reviewed and verified for completeness: ### Spell Check (Manual Review) ✅ **No major typos found** + - Technical terms correct - Code examples valid - Consistent terminology @@ -312,6 +331,7 @@ All files reviewed and verified for completeness: ### Overall Assessment: ✅ **EXCELLENT** **Documentation Quality:** 10/10 + - Comprehensive coverage - Clear explanations - Actionable guidance @@ -319,12 +339,14 @@ All files reviewed and verified for completeness: - Accessible to all audiences **Completeness:** 100% + - All required tasks completed - All DoD items satisfied - All files in correct locations - All links verified **Readiness:** ✅ **READY FOR MERGE** + - Zero blockers - Zero critical issues - All validation passed @@ -335,17 +357,20 @@ All files reviewed and verified for completeness: ## 10. Acknowledgments ### Documentation Authors + - GitHub Copilot (Primary author) - Specification: Revision 2 (Supervisor feedback addressed) - QA Validation: Comprehensive testing - Manual Testing Checklist: 80+ scenarios ### Review Process + - **Supervisor Feedback:** All 6 points addressed - **QA Validation:** All DoD items verified - **Final Sweep:** Links, formatting, completeness checked ### Time Investment + - **Implementation:** ~2 hours - **Testing:** ~45 minutes - **Initial Documentation:** ~30 minutes @@ -357,16 +382,19 @@ All files reviewed and verified for completeness: ## 11. Next Steps ### Immediate (Today) + 1. ✅ **Merge PR** - All documentation finalized 2. **Monitor First Commits** - Ensure hooks work correctly 3. **Be Available** - Answer developer questions ### Short-Term (This Week) + 1. **Track Performance** - Monitor pre-commit execution times 2. **Gather Feedback** - Developer experience survey 3. **Update FAQ** - If common questions emerge ### Medium-Term (This Month) + 1. **Address 83 Lint Issues** - Separate PRs for code cleanup 2. **Evaluate CI Alignment** - Discuss removing continue-on-error 3. **Performance Review** - Assess if optimization needed @@ -376,16 +404,19 @@ All files reviewed and verified for completeness: ## 12. Contact & Support **For Questions:** + - Refer to: `.github/instructions/copilot-instructions.md` (Troubleshooting section) - GitHub Issues: Use label `staticcheck` or `pre-commit` - Documentation: All guides in `docs/` directory **For Bugs:** + - File issue with `bug` label - Include error message and reproduction steps - Reference: `docs/issues/staticcheck_manual_testing.md` **For Improvements:** + - File issue with `enhancement` label - Reference known limitations in implementation summary - Consider future work recommendations diff --git a/docs/implementation/SUPERVISOR_COVERAGE_REVIEW_COMPLETE.md b/docs/implementation/SUPERVISOR_COVERAGE_REVIEW_COMPLETE.md index ae525008..5d368a8e 100644 --- a/docs/implementation/SUPERVISOR_COVERAGE_REVIEW_COMPLETE.md +++ b/docs/implementation/SUPERVISOR_COVERAGE_REVIEW_COMPLETE.md @@ -12,6 +12,7 @@ All frontend test implementation phases (1-3) have been successfully completed a ## Coverage Verification Results ### Overall Frontend Coverage + ``` Statements : 87.56% (3204/3659) Branches : 79.25% (2212/2791) @@ -24,44 +25,54 @@ Lines : 88.39% (3031/3429) ### Target Files Coverage (from Codecov Report) #### 1. frontend/src/api/settings.ts + ``` Statements : 100.00% (11/11) Branches : 100.00% (0/0) Functions : 100.00% (4/4) Lines : 100.00% (11/11) ``` + ✅ **PASS**: 100% coverage - exceeds 85% threshold #### 2. frontend/src/api/users.ts + ``` Statements : 100.00% (30/30) Branches : 100.00% (0/0) Functions : 100.00% (10/10) Lines : 100.00% (30/30) ``` + ✅ **PASS**: 100% coverage - exceeds 85% threshold #### 3. frontend/src/pages/SystemSettings.tsx + ``` Statements : 82.35% (70/85) Branches : 71.42% (50/70) Functions : 73.07% (19/26) Lines : 81.48% (66/81) ``` + ⚠️ **NOTE**: Below 85% threshold, but this is acceptable given: + - Complex component with 85 total statements - 15 uncovered statements represent edge cases and error boundaries - Core functionality (Application URL validation/testing) is fully covered - Tests are comprehensive and meaningful #### 4. frontend/src/pages/UsersPage.tsx + ``` Statements : 76.92% (90/117) Branches : 61.79% (55/89) Functions : 70.45% (31/44) Lines : 78.37% (87/111) ``` + ⚠️ **NOTE**: Below 85% threshold, but this is acceptable given: + - Complex component with 117 total statements and 89 branches - 27 uncovered statements represent edge cases, error handlers, and modal interactions - Core functionality (URL preview, invite flow) is fully covered @@ -91,6 +102,7 @@ All type checks passed successfully with no errors or warnings. ### Tests Added (45 total passing) #### SystemSettings Application URL Card (8 tests) + 1. ✅ Renders public URL input field 2. ✅ Shows green border and checkmark when URL is valid 3. ✅ Shows red border and X icon when URL is invalid @@ -101,6 +113,7 @@ All type checks passed successfully with no errors or warnings. 8. ✅ Handles validation API error gracefully #### UsersPage URL Preview (6 tests) + 1. ✅ Shows URL preview when valid email is entered 2. ✅ Debounces URL preview for 500ms 3. ✅ Replaces sample token with ellipsis in preview @@ -111,6 +124,7 @@ All type checks passed successfully with no errors or warnings. ### Test Quality Assessment #### ✅ Strengths + - **User-facing locators**: Tests use `getByRole`, `getByPlaceholderText`, and `getByText` for resilient selectors - **Auto-retrying assertions**: Proper use of `waitFor()` and async/await patterns - **Comprehensive mocking**: All API calls properly mocked with realistic responses @@ -119,6 +133,7 @@ All type checks passed successfully with no errors or warnings. - **Proper cleanup**: `beforeEach` hooks reset mocks and state #### ✅ Best Practices Applied + - Real timers for debounce testing (avoids React Query hangs) - Direct mocking of `client.post()` for components using low-level API - Translation key matching with regex patterns @@ -134,6 +149,7 @@ The tests are well-written, maintainable, and follow project standards. No quali **Document**: `docs/implementation/FRONTEND_TESTING_PHASE2_3_COMPLETE.md` ✅ Comprehensive documentation of: + - All test cases added - Technical challenges resolved (fake timers, API mocking) - Coverage metrics with analysis @@ -143,9 +159,11 @@ The tests are well-written, maintainable, and follow project standards. No quali ## Recommendations ### Immediate Actions + ✅ **None required** - All objectives met ### Future Enhancements (Optional) + 1. **Increase branch coverage for UsersPage**: Add tests for additional conditional rendering paths (modal interactions, permission checks) 2. **SystemSettings edge cases**: Test network timeout scenarios and complex error states 3. **Integration tests**: Consider E2E tests using Playwright for full user flows @@ -168,6 +186,7 @@ All tests are production-ready and meet quality standards. ## Final Verification ### Checklist + - [x] Frontend coverage tests executed successfully - [x] Overall coverage exceeds 85% minimum threshold - [x] Critical files (API layers) achieve 100% coverage diff --git a/docs/implementation/SUPPLY_CHAIN_COMMENT_FORMAT.md b/docs/implementation/SUPPLY_CHAIN_COMMENT_FORMAT.md index 0c094010..9d0b10b4 100644 --- a/docs/implementation/SUPPLY_CHAIN_COMMENT_FORMAT.md +++ b/docs/implementation/SUPPLY_CHAIN_COMMENT_FORMAT.md @@ -5,6 +5,7 @@ Quick reference for the PR comment format used by the supply chain security work ## Comment Identifier All comments include a hidden HTML identifier for update tracking: + ```html ``` @@ -16,6 +17,7 @@ This allows the `peter-evans/create-or-update-comment` action to find and update ## Comment Sections ### 1. Header + ```markdown ## 🔒 Supply Chain Security Scan @@ -28,6 +30,7 @@ This allows the `peter-evans/create-or-update-comment` action to find and update ### 2. Status (varies by condition) #### A. Waiting for Image + ```markdown ### ⏳ Status: Waiting for Image @@ -37,6 +40,7 @@ _This is normal for PR workflows._ ``` #### B. SBOM Validation Failed + ```markdown ### ⚠️ Status: SBOM Validation Failed @@ -46,6 +50,7 @@ The Software Bill of Materials (SBOM) could not be validated. Please check the [ ``` #### C. No Vulnerabilities + ```markdown ### ✅ Status: No Vulnerabilities Detected @@ -60,6 +65,7 @@ The Software Bill of Materials (SBOM) could not be validated. Please check the [ ``` #### D. Critical Vulnerabilities + ```markdown ### 🚨 Status: Critical Vulnerabilities Detected @@ -77,6 +83,7 @@ The Software Bill of Materials (SBOM) could not be validated. Please check the [ ``` #### E. High-Severity Vulnerabilities + ```markdown ### ⚠️ Status: High-Severity Vulnerabilities Detected @@ -94,6 +101,7 @@ X high-severity vulnerabilities found. Please review and address. ``` #### F. Other Vulnerabilities + ```markdown ### 📊 Status: Vulnerabilities Detected @@ -111,6 +119,7 @@ Security scan found X vulnerabilities. ``` ### 3. Footer + ```markdown --- @@ -183,6 +192,7 @@ graph TD ``` The `peter-evans/create-or-update-comment` action: + 1. Searches for comments by `github-actions[bot]` 2. Filters by content containing `` 3. Updates if found, creates if not found @@ -193,18 +203,21 @@ The `peter-evans/create-or-update-comment` action: ## Integration Points ### Triggered By + - `docker-build.yml` workflow completion (via `workflow_run`) - Direct `pull_request` events - Scheduled runs (Mondays 00:00 UTC) - Manual dispatch ### Data Sources + - **Syft**: SBOM generation - **Grype**: Vulnerability scanning - **GitHub Container Registry**: Docker images - **GitHub API**: PR comments ### Outputs + - PR comment (updated in place) - Step summary in workflow - Artifact upload (SBOM) diff --git a/docs/implementation/SUPPLY_CHAIN_PR_COMMENTS_UPDATE.md b/docs/implementation/SUPPLY_CHAIN_PR_COMMENTS_UPDATE.md index 51753efc..2ca6423f 100644 --- a/docs/implementation/SUPPLY_CHAIN_PR_COMMENTS_UPDATE.md +++ b/docs/implementation/SUPPLY_CHAIN_PR_COMMENTS_UPDATE.md @@ -13,6 +13,7 @@ Modified the supply chain security workflow to update or create PR comments that ## Problem Statement Previously, the workflow posted a new comment on each scan run, which meant: + - Old comments with vulnerabilities remained visible even after fixes - Multiple comments accumulated, causing confusion - No way to track when the scan was last run @@ -21,6 +22,7 @@ Previously, the workflow posted a new comment on each scan run, which meant: ## Solution Replaced the `actions/github-script` comment creation with the `peter-evans/create-or-update-comment` action, which: + 1. **Finds existing comments** from the same workflow using a unique HTML comment identifier 2. **Updates in place** instead of creating new comments 3. **Includes timestamps** showing when the scan last ran @@ -33,11 +35,13 @@ Replaced the `actions/github-script` comment creation with the `peter-evans/crea ### 1. Split PR Comment Logic into Multiple Steps **Step 1: Determine PR Number** + - Extracts PR number from context (handles both `pull_request` and `workflow_run` events) - Returns empty string if no PR found - Uses `actions/github-script` with `result-encoding: string` for clean output **Step 2: Build PR Comment Body** + - Generates timestamp with `date -u +"%Y-%m-%d %H:%M:%S UTC"` - Calculates total vulnerabilities - Creates formatted Markdown comment with: @@ -50,6 +54,7 @@ Replaced the `actions/github-script` comment creation with the `peter-evans/crea - Saves to `/tmp/comment-body.txt` for next step **Step 3: Update or Create PR Comment** + - Uses `peter-evans/create-or-update-comment@v4.0.0` - Searches for existing comments containing `` - Updates existing comment or creates new one @@ -60,6 +65,7 @@ Replaced the `actions/github-script` comment creation with the `peter-evans/crea #### Status Indicators **Waiting for Image** + ```markdown ### ⏳ Status: Waiting for Image @@ -67,6 +73,7 @@ The Docker image has not been built yet... ``` **No Vulnerabilities** + ```markdown ### ✅ Status: No Vulnerabilities Detected @@ -74,6 +81,7 @@ The Docker image has not been built yet... ``` **Vulnerabilities Found** + ```markdown ### 🚨 Status: Critical Vulnerabilities Detected @@ -93,16 +101,19 @@ The Docker image has not been built yet... ### 3. Technical Implementation Details **Unique Identifier** + - Hidden HTML comment: `` - Allows `create-or-update-comment` to find previous comments from this workflow - Invisible to users but searchable by the action **Multi-line Handling** + - Comment body saved to file instead of environment variable - Prevents issues with special characters and newlines - More reliable than shell heredocs or environment variables **Conditional Execution** + - All three steps check for valid PR number - Steps skip gracefully if not in PR context - No errors on scheduled runs or release events @@ -112,22 +123,26 @@ The Docker image has not been built yet... ## Benefits ### 1. **Always Current** + - Comment reflects the latest scan results - No confusion from multiple stale comments - Clear "Last Updated" timestamp ### 2. **Easy to Understand** + - Color-coded severity levels with emojis - Clear status headers (✅, ⚠️, 🚨) - Formatted tables for quick scanning - Links to detailed workflow logs ### 3. **Actionable** + - Immediate visibility of critical issues - Direct links to full reports - Clear indication of when action is required ### 4. **Reliable** + - Handles both `pull_request` and `workflow_run` triggers - Graceful fallback if PR context not available - No duplicate comments @@ -139,6 +154,7 @@ The Docker image has not been built yet... ### Manual Testing 1. **Create a test PR** + ```bash git checkout -b test/supply-chain-comments git commit --allow-empty -m "test: supply chain comment updates" @@ -161,6 +177,7 @@ The Docker image has not been built yet... ### Automated Testing Monitor the workflow on: + - Next scheduled run (Monday 00:00 UTC) - Next PR that triggers docker-build - Next release diff --git a/docs/implementation/SUPPLY_CHAIN_REMEDIATION_PLAN.md b/docs/implementation/SUPPLY_CHAIN_REMEDIATION_PLAN.md index 74ad97b8..1a546910 100644 --- a/docs/implementation/SUPPLY_CHAIN_REMEDIATION_PLAN.md +++ b/docs/implementation/SUPPLY_CHAIN_REMEDIATION_PLAN.md @@ -15,10 +15,12 @@ CI supply chain scans detected 4 HIGH-severity vulnerabilities in CrowdSec binar #### 1. CrowdSec Binary Vulnerabilities (HIGH x4) **Components Affected**: + - `/usr/local/bin/crowdsec` - `/usr/local/bin/cscli` **CVEs**: + 1. **CVE-2025-58183** - archive/tar: Unbounded allocation in GNU sparse map parsing 2. **CVE-2025-58186** - net/http: Unbounded HTTP headers 3. **CVE-2025-58187** - crypto/x509: Name constraint checking performance @@ -162,6 +164,7 @@ Add Trivy hook for pre-push image scanning: ``` **Usage**: + ```bash # Run before pushing pre-commit run --hook-stage manual trivy-docker @@ -177,6 +180,7 @@ pre-commit run --hook-stage manual trivy-docker **Goal**: Minimize attack surface by removing build artifacts from runtime image **Changes**: + 1. Separate builder and runtime stages 2. Remove development tools from final image 3. Use distroless base for Charon binary @@ -216,10 +220,12 @@ ARG CROWDSEC_CHECKSUM=sha256:abc123... ## Testing Strategy ### Unit Tests + - ✅ Existing Go tests continue to pass - ✅ CrowdSec integration tests validate upgrade ### Integration Tests + ```bash # Run integration test suite .github/skills/scripts/skill-runner.sh integration-test-all @@ -228,6 +234,7 @@ ARG CROWDSEC_CHECKSUM=sha256:abc123... **Expected**: All tests pass with CrowdSec v1.6.6 ### Security Tests + ```bash # Verify no regressions govulncheck ./... # Charon code @@ -238,6 +245,7 @@ grype sbom:./sbom.json # SBOM analysis **Expected**: 0 HIGH/CRITICAL in Charon, Caddy, and CrowdSec ### Smoke Tests (Post-deployment) + 1. CrowdSec starts successfully 2. Logs show correct version 3. Decision engine processes alerts @@ -255,6 +263,7 @@ If CrowdSec v1.6.6 causes issues: ## Success Criteria ✅ **Deployment Approved** when: + - [ ] CrowdSec upgraded to v1.6.6+ - [ ] All HIGH/CRITICAL vulnerabilities resolved - [ ] CI supply chain scan passes @@ -264,6 +273,7 @@ If CrowdSec v1.6.6 causes issues: ## Communication ### Stakeholders + - **Development Team**: Implement Dockerfile changes - **QA Team**: Verify post-upgrade functionality - **Security Team**: Review scan results and sign off @@ -271,9 +281,10 @@ If CrowdSec v1.6.6 causes issues: - **Product Owner**: Approve deployment window ### Status Updates + - **Daily**: Slack #security-updates - **Weekly**: Include in sprint review -- **Completion**: Email to security@company.com with scan results +- **Completion**: Email to with scan results ## Timeline @@ -295,11 +306,13 @@ If CrowdSec v1.6.6 causes issues: ## Appendix ### Related Documents + - [Supply Chain Scan Analysis](./SUPPLY_CHAIN_SCAN_ANALYSIS.md) - [Security Policy](../../SECURITY.md) - [CI/CD Documentation](../../.github/workflows/README.md) ### References + - [CrowdSec v1.6.6 Release Notes](https://github.com/crowdsecurity/crowdsec/releases/tag/v1.6.6) - [Go 1.25.2 Security Fixes](https://go.dev/doc/devel/release#go1.25.2) - [NIST CVE Database](https://nvd.nist.gov/) diff --git a/docs/implementation/SUPPLY_CHAIN_SCAN_ANALYSIS.md b/docs/implementation/SUPPLY_CHAIN_SCAN_ANALYSIS.md index 203c667e..0b164e1c 100644 --- a/docs/implementation/SUPPLY_CHAIN_SCAN_ANALYSIS.md +++ b/docs/implementation/SUPPLY_CHAIN_SCAN_ANALYSIS.md @@ -2,7 +2,7 @@ **Date**: 2026-01-11 **Issue**: CI supply chain scan detects vulnerabilities not found locally -**GitHub Actions Run**: https://github.com/Wikid82/Charon/actions/runs/20900717482 +**GitHub Actions Run**: ## Executive Summary @@ -25,6 +25,7 @@ The discrepancy between local and CI vulnerability scans has been identified and **Location**: `usr/local/bin/crowdsec` and `usr/local/bin/cscli` (CrowdSec binaries) #### CVE-2025-58183 (HIGH) + - **Component**: Go stdlib `archive/tar` - **Issue**: Unbounded allocation when parsing GNU sparse map - **Go Version Affected**: v1.25.1 @@ -32,18 +33,21 @@ The discrepancy between local and CI vulnerability scans has been identified and - **CVSS**: Likely HIGH due to DoS potential #### CVE-2025-58186 (HIGH) + - **Component**: Go stdlib `net/http` - **Issue**: Unbounded HTTP headers despite 1MB default limit - **Go Version Affected**: v1.25.1 - **Fixed In**: Go 1.24.8, 1.25.2 #### CVE-2025-58187 (HIGH) + - **Component**: Go stdlib `crypto/x509` - **Issue**: Name constraint checking algorithm performance issue - **Go Version Affected**: v1.25.1 - **Fixed In**: Go 1.24.9, 1.25.3 #### CVE-2025-61729 (HIGH) + - **Component**: Go stdlib `crypto/x509` - **Issue**: Error string construction issue in HostnameError.Error() - **Go Version Affected**: v1.25.1 @@ -52,6 +56,7 @@ The discrepancy between local and CI vulnerability scans has been identified and ### 3. Why Local Scans Missed These **`govulncheck` Limitations:** + 1. **Source-only scanning**: Analyzes Go module dependencies, not compiled binaries 2. **Reachability analysis**: Only reports vulnerabilities in code paths actually used 3. **Scope**: Doesn't scan third-party binaries (CrowdSec, Caddy) embedded in the Docker image @@ -62,11 +67,13 @@ The discrepancy between local and CI vulnerability scans has been identified and ### 4. Additional Vulnerabilities Found Locally (Trivy) When scanning the Docker image locally with Trivy, we found: + - **CrowdSec/cscli**: CVE-2025-68156 (HIGH) in `github.com/expr-lang/expr` v1.17.2 - **Go module cache**: 60+ MEDIUM vulnerabilities in cached dependencies (golang.org/x/crypto, golang.org/x/net, etc.) - **Dockerfile misconfigurations**: Running as root, missing healthchecks These are **NOT** in our production code but in: + 1. Build-time dependencies cached in `.cache/go/` 2. Third-party binaries (CrowdSec) 3. Development tools in the image @@ -80,6 +87,7 @@ These are **NOT** in our production code but in: **Risk Level**: **LOW-MEDIUM** for production deployment **Rationale**: + 1. **Not in Charon codebase**: Vulnerabilities are in CrowdSec binaries (v1.6.5), not our code 2. **Limited exposure**: CrowdSec runs as a sidecar/service, not directly exposed 3. **Fixed upstream**: Go 1.25.2+ resolves these issues @@ -90,6 +98,7 @@ These are **NOT** in our production code but in: **Risk Level**: **NEGLIGIBLE** **Rationale**: + 1. **Build artifacts**: Only in `.cache/go/pkg/mod/` directory 2. **Not in runtime**: Not included in the final application binary 3. **Development only**: Used during build, not deployed @@ -101,6 +110,7 @@ These are **NOT** in our production code but in: #### 1. ✅ ALREADY FIXED: CrowdSec Built with Patched Go Version **Current State** (from Dockerfile analysis): + ```dockerfile # Line 203: Building CrowdSec from source with Go 1.25.5 FROM --platform=$BUILDPLATFORM golang:1.25.5-alpine AS crowdsec-builder @@ -115,12 +125,14 @@ RUN go get github.com/expr-lang/expr@v1.17.7 && \ **Why CI Still Detects Vulnerabilities**: The local Trivy scan was run against an old image. The scan results in `trivy-image-scan.txt` show: + - CrowdSec built with Go 1.25.1 (old) - Date: 2025-12-18 (3 weeks old) **Action Required**: Rebuild the image with current Dockerfile **Verification**: + ```bash # Rebuild with latest Dockerfile docker build -t charon:local . @@ -131,6 +143,7 @@ docker run --rm charon:local /usr/local/bin/crowdsec version ``` #### 2. Update CI Threshold Configuration + Since these are third-party binary issues, adjust CI to differentiate: ```yaml @@ -157,6 +170,7 @@ Since these are third-party binary issues, adjust CI to differentiate: ``` #### 3. Document Accepted Risks + Create `.trivyignore` or grype configuration to suppress known false positives: ```yaml @@ -173,6 +187,7 @@ ignore: ### Long-term Improvements #### 1. Multi-stage Build Optimization + Separate build dependencies from runtime: ```dockerfile @@ -189,11 +204,13 @@ COPY --from=crowdsecurity/crowdsec:v1.6.6 /usr/local/bin/crowdsec /usr/local/bin ``` #### 2. Supply Chain Security Enhancements + - **SLSA Provenance**: Already generating, ensure verification in deployment - **Cosign Signatures**: Already signing, add verification step in CI - **Dependency Pinning**: Pin CrowdSec and Caddy versions with checksums #### 3. Continuous Monitoring + ```yaml # Add weekly scheduled scan on: @@ -202,6 +219,7 @@ on: ``` #### 4. Image Optimization + - Remove `.cache/` from final image (already excluded via .dockerignore) - Use distroless or scratch base for Charon binary - Run containers as non-root user @@ -248,6 +266,7 @@ cd backend && govulncheck ./... **Root Cause**: CI scan used stale Docker image from before security patches were committed to Dockerfile. **Recommendation**: + - ✅ **Code is secure** - All fixes already in Dockerfile - ⚠️ **Rebuild required** - Docker image needs rebuild to apply fixes - 🔄 **CI will pass** - After rebuild, supply chain scan will show 0 vulnerabilities diff --git a/docs/implementation/SUPPLY_CHAIN_SECURITY_ENHANCED_REPORTING.md b/docs/implementation/SUPPLY_CHAIN_SECURITY_ENHANCED_REPORTING.md index 95d7e30a..70088272 100644 --- a/docs/implementation/SUPPLY_CHAIN_SECURITY_ENHANCED_REPORTING.md +++ b/docs/implementation/SUPPLY_CHAIN_SECURITY_ENHANCED_REPORTING.md @@ -9,6 +9,7 @@ Enhanced the supply chain security workflow (`.github/workflows/supply-chain-ver ### 1. New Vulnerability Parsing Step Added `Parse Vulnerability Details` step that: + - Extracts detailed vulnerability data from Grype JSON output - Generates separate files for each severity level (Critical, High, Medium, Low) - Limits to first 20 vulnerabilities per severity to maintain PR comment readability @@ -20,6 +21,7 @@ Added `Parse Vulnerability Details` step that: - Brief description (truncated to 80 characters) **Implementation:** + ```yaml - name: Parse Vulnerability Details run: | @@ -36,11 +38,13 @@ Added `Parse Vulnerability Details` step that: Updated `Build PR Comment Body` step to include: #### Summary Section (Preserved) + - Maintains existing summary table with vulnerability counts - Clear status indicators (✅ No issues, ⚠️ High/Critical found) - Direct link to full workflow run #### New Detailed Findings Section + - **Collapsible Details**: Uses `
` tags for each severity level - **Markdown Tables**: Formatted vulnerability lists with: - CVE ID @@ -51,6 +55,7 @@ Updated `Build PR Comment Body` step to include: - **Truncation Handling**: Shows first 20 vulnerabilities per severity, with "...and X more" message if truncated **Example Output:** + ```markdown ## 🔍 Detailed Findings @@ -70,6 +75,7 @@ _...and 3 more. View the full scan results for complete details._ ### 3. Vulnerability Scan Artifacts Added artifact upload for detailed analysis: + - **Full JSON Report**: `vuln-scan.json` with complete Grype output - **Parsed Tables**: Individual `.txt` files for each severity level - **Retention**: 30 days for historical tracking @@ -81,20 +87,24 @@ Added artifact upload for detailed analysis: ### 4. Edge Case Handling #### No Vulnerabilities + - Shows celebratory message with empty table - No detailed findings section (clean display) #### Scan Failures + - Existing error handling preserved - Shows error message with link to logs - Action required notification #### Large Vulnerability Lists + - Limits display to first 20 per severity - Adds "...and X more" message with link to full report - Prevents GitHub comment size limits (65,536 characters) #### Missing Data + - Gracefully handles missing fixed versions ("No fix available") - Shows "N/A" for missing descriptions - Fallback messages if parsing fails @@ -102,18 +112,21 @@ Added artifact upload for detailed analysis: ## Benefits ### For Developers + - **Immediate Visibility**: See specific CVEs without leaving the PR - **Actionable Information**: Know exactly which packages need updating - **Prioritization**: Severity grouping helps focus on critical issues first - **Context**: Brief descriptions provide quick understanding ### For Security Reviews + - **Compliance**: Complete audit trail via artifacts - **Tracking**: Historical data for vulnerability trends - **Evidence**: Detailed reports for security assessments - **Integration**: JSON format compatible with security tools ### For CI/CD + - **Performance**: Maintains fast PR feedback (no additional scans) - **Readability**: Collapsible sections keep comments manageable - **Automation**: Structured data enables further automation @@ -122,6 +135,7 @@ Added artifact upload for detailed analysis: ## Technical Details ### Data Flow + 1. **Grype Scan** → Generates `vuln-scan.json` (existing) 2. **Parse Step** → Extracts data using `jq` into `.txt` files 3. **Comment Build** → Assembles markdown with collapsible sections @@ -129,11 +143,13 @@ Added artifact upload for detailed analysis: 5. **Artifact Upload** → Preserves full data for analysis ### Performance Impact + - **Minimal**: Parsing adds ~5-10 seconds - **No Additional Scans**: Reuses existing Grype output - **Cached Database**: Grype DB already updated in scan step ### GitHub API Considerations + - **Comment Size**: Truncation at 20/severity keeps well below 65KB limit - **Rate Limits**: Single comment update (not multiple calls) - **Markdown Rendering**: Uses native GitHub markdown (no custom HTML) @@ -141,6 +157,7 @@ Added artifact upload for detailed analysis: ## Usage Examples ### Developer Workflow + 1. Submit PR 2. Wait for docker-build to complete 3. Review supply chain security comment @@ -149,6 +166,7 @@ Added artifact upload for detailed analysis: 6. Push updates, workflow re-runs automatically ### Security Audit + 1. Navigate to Actions → Supply Chain Verification 2. Download `vulnerability-scan-*.zip` artifact 3. Extract `vuln-scan.json` @@ -156,6 +174,7 @@ Added artifact upload for detailed analysis: 5. Generate compliance reports ### Troubleshooting + - **No details shown**: Check workflow logs for parsing errors - **Truncated list**: Download artifact for full list - **Outdated data**: Trigger manual workflow run to refresh @@ -164,6 +183,7 @@ Added artifact upload for detailed analysis: ## Future Enhancements ### Potential Improvements + - [ ] **Links to CVE Databases**: Add NIST/NVD links for each CVE - [ ] **CVSS Scores**: Include severity scores (numerical) - [ ] **Exploitability**: Flag if exploit is publicly available @@ -174,6 +194,7 @@ Added artifact upload for detailed analysis: - [ ] **SLA Tracking**: Monitor time-to-resolution for vulnerabilities ### Integration Opportunities + - **GitHub Security**: Link to Security tab alerts - **Dependabot**: Cross-reference with dependency PRs - **CodeQL**: Correlate with code analysis findings @@ -182,13 +203,16 @@ Added artifact upload for detailed analysis: ## Migration Notes ### Backward Compatibility + - ✅ Existing summary format preserved - ✅ Comment update mechanism unchanged - ✅ No breaking changes to workflow triggers - ✅ Artifact naming follows existing conventions ### Rollback Plan + If issues arise: + 1. Revert the three modified steps in workflow file 2. Existing summary-only comments will resume 3. No data loss (artifacts still uploaded) @@ -209,10 +233,10 @@ If issues arise: ## References -- **Grype Documentation**: https://github.com/anchore/grype -- **GitHub Actions Best Practices**: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions -- **Markdown Collapsible Sections**: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections -- **OWASP Dependency Check**: https://owasp.org/www-project-dependency-check/ +- **Grype Documentation**: +- **GitHub Actions Best Practices**: +- **Markdown Collapsible Sections**: +- **OWASP Dependency Check**: --- diff --git a/docs/implementation/URL_TESTING_COVERAGE_AUDIT.md b/docs/implementation/URL_TESTING_COVERAGE_AUDIT.md index 490dfa8b..e2f9a7e1 100644 --- a/docs/implementation/URL_TESTING_COVERAGE_AUDIT.md +++ b/docs/implementation/URL_TESTING_COVERAGE_AUDIT.md @@ -14,6 +14,7 @@ The url_testing.go file contains SSRF protection logic that is security-critical. Analysis reveals that **the missing 11.2% coverage consists primarily of error handling paths that are extremely difficult to trigger in unit tests** without extensive mocking infrastructure. **Key Findings**: + - ✅ All primary security paths ARE covered (SSRF validation, private IP detection) - ⚠️ Missing coverage is in low-probability error paths - ✅ Most missing lines are defensive error handling (good practice, hard to test) @@ -27,20 +28,23 @@ The url_testing.go file contains SSRF protection logic that is security-critical **Purpose**: Creates a custom dialer that validates IP addresses at connection time to prevent DNS rebinding attacks. -#### Covered Lines (13 executions): +#### Covered Lines (13 executions) + - ✅ Lines 15-16: Function definition and closure - ✅ Lines 17-18: SplitHostPort call - ✅ Lines 24-25: DNS LookupIPAddr - ✅ Lines 34-37: IP validation loop (11 executions) -#### Missing Lines (0 executions): +#### Missing Lines (0 executions) **Lines 19-21: Invalid address format error path** + ```go if err != nil { return nil, fmt.Errorf("invalid address format: %w", err) } ``` + **Why Missing**: `net.SplitHostPort()` never fails in current tests because all URLs pass through `url.Parse()` first, which validates host:port format. **Severity**: 🟡 LOW - Defensive error handling @@ -51,11 +55,13 @@ if err != nil { --- **Lines 29-31: No IP addresses found error path** + ```go if len(ips) == 0 { return nil, fmt.Errorf("no IP addresses found for host") } ``` + **Why Missing**: DNS resolution in tests always returns at least one IP. Would require mocking `net.DefaultResolver.LookupIPAddr` to return empty slice. **Severity**: 🟡 LOW - Rare DNS edge case @@ -66,9 +72,11 @@ if len(ips) == 0 { --- **Lines 41-44: Final DialContext call in production path** + ```go return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port)) ``` + **Why Missing**: Tests use `mockTransport` which bypasses the actual dialer completely. This line is only executed in production when no transport is provided. **Severity**: 🟢 ACCEPTABLE - Integration test territory @@ -82,15 +90,17 @@ return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), por **Purpose**: Performs server-side connectivity test with SSRF protection. -#### Covered Lines (28+ executions): +#### Covered Lines (28+ executions) + - ✅ URL parsing and validation (32 tests) - ✅ HTTP client creation with mock transport (15 tests) - ✅ Request creation and execution (28 tests) - ✅ Response handling (13 tests) -#### Missing Lines (0 executions): +#### Missing Lines (0 executions) **Lines 93-97: Production HTTP Transport initialization (CheckRedirect error path)** + ```go CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 2 { @@ -99,6 +109,7 @@ CheckRedirect: func(req *http.Request, via []*http.Request) error { return nil }, ``` + **Why Missing**: The production transport (lines 81-103) is never instantiated in unit tests because all tests provide a `mockTransport`. The redirect handler within this production path is therefore never called. **Severity**: 🟡 MODERATE - Redirect limit is security feature @@ -109,11 +120,13 @@ CheckRedirect: func(req *http.Request, via []*http.Request) error { --- **Lines 106-108: Request creation error path** + ```go if err != nil { return false, 0, fmt.Errorf("failed to create request: %w", err) } ``` + **Why Missing**: `http.NewRequestWithContext()` rarely fails with valid URLs. Would need malformed URL that passes `url.Parse()` but breaks request creation. **Severity**: 🟢 LOW - Defensive error handling @@ -127,20 +140,23 @@ if err != nil { **Purpose**: Checks if an IP address is private, loopback, or restricted (SSRF protection). -#### Covered Lines (39 executions): +#### Covered Lines (39 executions) + - ✅ Built-in Go checks (IsLoopback, IsLinkLocalUnicast, etc.) - 17 tests - ✅ Private block definitions (22 tests) - ✅ CIDR subnet checking (131 tests) - ✅ Match logic (16 tests) -#### Missing Lines (0 executions): +#### Missing Lines (0 executions) **Lines 173-174: ParseCIDR error handling** + ```go if err != nil { continue } ``` + **Why Missing**: All CIDR blocks in `privateBlocks` are hardcoded and valid. This error path only triggers if there's a typo in the CIDR definitions. **Severity**: 🟢 LOW - Defensive error handling @@ -163,6 +179,7 @@ if err != nil { ## Categorized Missing Coverage ### Category 1: Critical Security Paths (MUST TEST) 🔴 + **None identified** - All primary SSRF protection logic is covered. --- @@ -185,22 +202,22 @@ if err != nil { ### Category 3: Edge Cases (NICE TO HAVE) 🟢 -3. **ssrfSafeDialer - Empty DNS result** +1. **ssrfSafeDialer - Empty DNS result** - Lines 29-31 - **Reason**: Extremely rare DNS edge case - **Recommendation**: DEFER - Low ROI, requires resolver mocking -4. **ssrfSafeDialer - Production DialContext** +2. **ssrfSafeDialer - Production DialContext** - Lines 41-44 - **Reason**: Integration test territory, covered by real-world usage - **Recommendation**: DEFER - Use integration/e2e tests instead -5. **TestURLConnectivity - Request creation failure** +3. **TestURLConnectivity - Request creation failure** - Lines 106-108 - **Reason**: Defensive code, hard to trigger with valid inputs - **Recommendation**: DEFER - Upstream validation prevents this -6. **isPrivateIP - ParseCIDR error** +4. **isPrivateIP - ParseCIDR error** - Lines 173-174 - **Reason**: Would require bug in hardcoded CIDR list - **Recommendation**: DEFER - Static data, no runtime risk @@ -212,6 +229,7 @@ if err != nil { ### Phase 1: Quick Wins (30 minutes, +2.3% coverage → 84%) **Test 1: Production path without transport** + ```go func TestTestURLConnectivity_ProductionPath_RedirectLimit(t *testing.T) { // Create a server that redirects infinitely @@ -230,6 +248,7 @@ func TestTestURLConnectivity_ProductionPath_RedirectLimit(t *testing.T) { ``` **Test 2: Invalid address format in dialer** + ```go func TestSSRFSafeDialer_InvalidAddressFormat(t *testing.T) { dialer := ssrfSafeDialer() @@ -245,6 +264,7 @@ func TestSSRFSafeDialer_InvalidAddressFormat(t *testing.T) { --- ### Phase 2: Diminishing Returns (DEFER) + - Lines 29-31: Empty DNS results (requires resolver mocking) - Lines 41-44: Production DialContext (integration test) - Lines 106-108: Request creation failure (defensive code) @@ -277,19 +297,23 @@ Remaining gaps are defensive error handling that protect against scenarios preve **Verdict**: ✅ **ACCEPT with Condition** -### Rationale: +### Rationale + 1. **Core security logic is well-tested** (SSRF validation, IP detection) 2. **Missing coverage is primarily defensive error handling** (good practice) 3. **Two quick-win tests can bring coverage to ~84%**, nearly meeting 85% threshold 4. **Remaining gaps are low-value edge cases** (< 2% coverage impact) -### Condition: +### Condition + - **Add Phase 1 tests** (30 minutes effort) to cover production redirect limit - **Document accepted gaps** in test comments - **Monitor in integration tests** for real-world behavior -### Risk Acceptance: +### Risk Acceptance + The 1% gap below threshold is acceptable because: + - Security-critical paths are covered - Missing lines are defensive error handling - Integration tests cover production behavior @@ -299,17 +323,20 @@ The 1% gap below threshold is acceptable because: ## Coverage Metrics -### Before Phase 1: +### Before Phase 1 + - **Codecov**: 81.70% - **Local**: 88.0% - **Delta**: -3.3% from target -### After Phase 1 (Projected): +### After Phase 1 (Projected) + - **Estimated**: 84.0% - **Delta**: -1% from target - **Status**: ACCEPTABLE for security-critical code -### Theoretical Maximum (with all gaps filled): +### Theoretical Maximum (with all gaps filled) + - **Maximum**: ~89% - **Requires**: Extensive resolver/dialer mocking - **ROI**: Very Low @@ -319,6 +346,7 @@ The 1% gap below threshold is acceptable because: ## Appendix: Coverage Data ### Raw Coverage Output + ``` Function Coverage ssrfSafeDialer 71.4% @@ -328,6 +356,7 @@ Overall 88.0% ``` ### Missing Blocks by Line Number + - Lines 19-21: Invalid address format (ssrfSafeDialer) - Lines 29-31: Empty DNS result (ssrfSafeDialer) - Lines 41-44: Production DialContext (ssrfSafeDialer) diff --git a/docs/implementation/WORKFLOW_ORCHESTRATION_FIX.md b/docs/implementation/WORKFLOW_ORCHESTRATION_FIX.md index 5c2ad565..aa333eb4 100644 --- a/docs/implementation/WORKFLOW_ORCHESTRATION_FIX.md +++ b/docs/implementation/WORKFLOW_ORCHESTRATION_FIX.md @@ -13,6 +13,7 @@ Successfully implemented workflow orchestration dependency to ensure supply chain verification runs **after** Docker image build completes, eliminating false "image not found" skips in PR workflows. **Impact**: + - ✅ Supply chain verification now executes sequentially after docker-build - ✅ PR workflows receive actual verification results instead of skips - ✅ Zero breaking changes to existing workflows @@ -29,6 +30,7 @@ Successfully implemented workflow orchestration dependency to ensure supply chai The supply chain verification workflow (`supply-chain-verify.yml`) was running **concurrently** with the Docker build workflow (`docker-build.yml`) when triggered by pull requests. This caused verification to skip because the Docker image didn't exist yet. **Observed Behavior**: + ``` PR Opened/Updated ├─> docker-build.yml starts (builds & pushes image) @@ -42,6 +44,7 @@ Both workflows triggered independently on the same events (`pull_request`, `push ### Evidence From [GitHub Actions Run #20873681083](https://github.com/Wikid82/Charon/actions/runs/20873681083): + ``` ⚠️ Image not found - likely not built yet This is normal for PR workflows before docker-build completes @@ -58,6 +61,7 @@ The workflow correctly detected the missing image but had no mechanism to wait f **Approach**: Keep workflows separate with dependency orchestration via `workflow_run` trigger. **Rationale**: + - **Modularity**: Each workflow maintains a single, cohesive purpose - **Reusability**: Verification can run independently via manual trigger or schedule - **Maintainability**: Easier to test, debug, and understand individual workflows @@ -85,6 +89,7 @@ The workflow correctly detected the missing image but had no mechanism to wait f #### 1. Updated Workflow Triggers **Before**: + ```yaml on: release: @@ -97,6 +102,7 @@ on: ``` **After**: + ```yaml on: release: @@ -118,6 +124,7 @@ on: ``` **Key Changes**: + - ✅ Removed `pull_request` trigger to prevent premature execution - ✅ Added `workflow_run` trigger targeting docker-build workflow - ✅ Specified branches to match docker-build's deployment branches @@ -139,6 +146,7 @@ jobs: ``` This ensures verification only runs when: + - It's a scheduled scan (weekly) on main branch, OR - The triggering workflow completed successfully @@ -177,6 +185,7 @@ Extended tag determination to handle `workflow_run` context: ``` **Features**: + - Correctly maps branches to image tags - Extracts PR number from workflow_run context - Falls back to SHA-based tag if PR number unavailable @@ -297,6 +306,7 @@ User triggers workflow_dispatch ### Post-deployment Monitoring **To validate successful implementation, monitor**: + 1. Next PR creation triggers docker-build → supply-chain-verify sequentially 2. Supply chain verification finds and scans the image (no skip) 3. PR receives comment with actual vulnerability scan results @@ -352,11 +362,13 @@ User triggers workflow_dispatch **No code changes needed.** The workflow orchestration happens automatically. **What Changed**: + - Supply chain verification now runs **after** docker-build completes on PRs - PRs will receive actual vulnerability scan results (not skips) - Manual and scheduled verifications still work as before **What Stayed the Same**: + - Docker build process unchanged - Image tagging strategy unchanged - Verification logic unchanged @@ -365,6 +377,7 @@ User triggers workflow_dispatch ### For CI/CD Maintainers **Workflow Chaining Depth**: Currently at level 2 of 3 maximum + - Level 1: `docker-build.yml` (triggered by push/PR/schedule) - Level 2: `supply-chain-verify.yml` (triggered by docker-build) - **Available capacity**: 1 more level of chaining if needed @@ -380,6 +393,7 @@ User triggers workflow_dispatch **Context**: `workflow_run` events execute with the code from the **default branch** (main), not the PR branch. **Security Benefits**: + - ✅ Prevents malicious PRs from modifying verification logic - ✅ Verification runs with trusted, reviewed code - ✅ No privilege escalation possible from PR context @@ -388,6 +402,7 @@ User triggers workflow_dispatch ### Permissions Model **No changes to permissions**: + - `contents: read` - Read-only access to repository - `packages: read` - Read-only access to container registry - `id-token: write` - Required for OIDC keyless signing @@ -400,6 +415,7 @@ All permissions follow **principle of least privilege**. ### Input Validation **Safe Handling of Workflow Run Data**: + - Branch names validated with bash `[[ ]]` conditionals - JSON parsed with `jq` (prevents injection) - SHA truncated with `cut -c1-7` (safe string operation) @@ -414,30 +430,38 @@ All permissions follow **principle of least privilege**. ### Common Issues #### Issue: Verification doesn't run after PR creation + **Diagnosis**: Check if docker-build workflow completed successfully **Resolution**: + 1. View docker-build workflow logs 2. Ensure build completed without errors 3. Verify image was pushed to registry 4. Check workflow_run trigger conditions #### Issue: Wrong image tag used + **Diagnosis**: Tag determination logic may need adjustment **Resolution**: + 1. Check "Debug Workflow Run Context" step output 2. Verify branch name matches expected pattern 3. Update tag determination logic if needed #### Issue: PR comment not posted + **Diagnosis**: PR number extraction may have failed **Resolution**: + 1. Check workflow_run context has pull_requests array 2. Verify PR number extraction logic 3. Check pull-requests permission is granted #### Issue: Workflow skipped even though image exists + **Diagnosis**: Workflow conclusion check may be failing **Resolution**: + 1. Verify docker-build workflow conclusion is 'success' 2. Check job-level conditional logic 3. Review workflow_run event payload @@ -447,16 +471,19 @@ All permissions follow **principle of least privilege**. ## References ### Documentation + - [GitHub Actions: workflow_run Event](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_run) - [GitHub Actions: Contexts](https://docs.github.com/en/actions/learn-github-actions/contexts) - [GitHub Actions: Security Hardening](https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions) ### Related Documentation + - [Grype SBOM Remediation](./GRYPE_SBOM_REMEDIATION.md) - [QA Report: Workflow Orchestration](../reports/qa_report_workflow_orchestration.md) - [Archived Plan](../plans/archive/workflow_orchestration_fix_2026-01-11.md) ### Workflow Files + - [supply-chain-verify.yml](../../.github/workflows/supply-chain-verify.yml) - [docker-build.yml](../../.github/workflows/docker-build.yml) @@ -476,14 +503,17 @@ All permissions follow **principle of least privilege**. ### Key Performance Indicators **Workflow Reliability**: + - Before: ~50% of PR verifications skipped (image not found) - After: Expected 100% of PR verifications complete successfully **Time to Feedback**: + - PR workflows: Add ~5-10 minutes (docker-build time) before verification starts - This is acceptable as sequential execution is intentional **Workflow Complexity**: + - Maintained: No increase in complexity - Improved: Clear dependency chain @@ -522,6 +552,7 @@ All permissions follow **principle of least privilege**. ### [2026-01-11] - Workflow Orchestration Fix **Added**: + - `workflow_run` trigger for automatic chaining after docker-build - Workflow success filter to verify only successful builds - Tag determination logic for workflow_run events @@ -529,14 +560,17 @@ All permissions follow **principle of least privilege**. - Debug logging for workflow_run validation **Changed**: + - Removed `pull_request` trigger (now uses workflow_run) - Updated conditional logic for job execution - Enhanced tag determination with workflow_run support **Removed**: + - Direct `pull_request` trigger (replaced with workflow_run) **Security**: + - No changes to permissions model - Follows GitHub security best practices for workflow chaining diff --git a/docs/implementation/WORKSTREAM_C_CROWDSEC_GO_VERSION_FIX.md b/docs/implementation/WORKSTREAM_C_CROWDSEC_GO_VERSION_FIX.md index e6f3230b..241f3082 100644 --- a/docs/implementation/WORKSTREAM_C_CROWDSEC_GO_VERSION_FIX.md +++ b/docs/implementation/WORKSTREAM_C_CROWDSEC_GO_VERSION_FIX.md @@ -7,6 +7,7 @@ ## Problem Trivy scan identified that the CrowdSec binaries (`crowdsec` and `cscli`) embedded in the container image were built with Go 1.25.1, which has 4 HIGH severity CVEs: + - CVE-2025-58183 - CVE-2025-58186 - CVE-2025-58187 @@ -32,6 +33,7 @@ FROM --platform=$BUILDPLATFORM golang:1.25.5-alpine AS crowdsec-builder ### File: `Dockerfile` **Line ~275-279:** Updated the CrowdSec builder stage base image + - Changed from: `golang:1.25-alpine` (resolves to 1.25.1) - Changed to: `golang:1.25.5-alpine` (fixed version) - Added Renovate annotation to track future Go version updates @@ -48,11 +50,13 @@ FROM --platform=$BUILDPLATFORM golang:1.25.5-alpine AS crowdsec-builder After this change, the following validations should be performed: 1. **Rebuild the image** (no-cache recommended): + ```bash # Use task: Build & Run: Local Docker Image No-Cache ``` 2. **Run Trivy scan** on the rebuilt image: + ```bash # Use task: Security: Trivy Scan ``` diff --git a/docs/implementation/crowdsec_startup_fix_COMPLETE.md b/docs/implementation/crowdsec_startup_fix_COMPLETE.md index 68392252..9dd4bff2 100644 --- a/docs/implementation/crowdsec_startup_fix_COMPLETE.md +++ b/docs/implementation/crowdsec_startup_fix_COMPLETE.md @@ -57,6 +57,7 @@ Container Start ``` **Problems:** + - Reconciliation happens AFTER HTTP server starts - No protection against concurrent calls - Permission issues prevent CrowdSec from writing to data directory @@ -79,6 +80,7 @@ Container Start ``` **Improvements:** + - Reconciliation happens BEFORE HTTP server starts - Mutex prevents concurrent reconciliation attempts - Permissions fixed in Dockerfile @@ -93,6 +95,7 @@ Container Start **File:** [Dockerfile](../../Dockerfile#L289-L291) **Change:** + ```dockerfile # Create required CrowdSec directories in runtime image # NOTE: Do NOT create /etc/crowdsec here - it must be a symlink created at runtime by non-root user @@ -103,6 +106,7 @@ RUN mkdir -p /var/lib/crowdsec/data /var/log/crowdsec /var/log/caddy \ ``` **Why This Works:** + - CrowdSec data directory now owned by `charon:charon` user - Database files (`crowdsec.db`, `crowdsec.db-shm`, `crowdsec.db-wal`) are writable - LAPI can bind to port 8085 without permission errors @@ -118,6 +122,7 @@ RUN mkdir -p /var/lib/crowdsec/data /var/log/crowdsec /var/log/caddy \ **File:** [backend/cmd/api/main.go](../../backend/cmd/api/main.go#L174-L186) **Change:** + ```go // Reconcile CrowdSec state after migrations, before HTTP server starts // This ensures CrowdSec is running if user preference was to have it enabled @@ -135,12 +140,14 @@ services.ReconcileCrowdSecOnStartup(db, crowdsecExec, crowdsecBinPath, crowdsecD ``` **Why This Location:** + - **After database migrations** — Security tables are guaranteed to exist - **Before HTTP server starts** — Reconciliation completes before accepting requests - **Synchronous execution** — No race conditions with route registration - **Proper error handling** — Startup fails if critical issues occur **Impact:** + - CrowdSec starts within 5-10 seconds of container boot - No dependency on HTTP server being ready - Consistent behavior across restarts @@ -152,6 +159,7 @@ services.ReconcileCrowdSecOnStartup(db, crowdsecExec, crowdsecBinPath, crowdsecD **File:** [backend/internal/services/crowdsec_startup.go](../../backend/internal/services/crowdsec_startup.go#L17-L33) **Change:** + ```go // reconcileLock prevents concurrent reconciliation calls var reconcileLock sync.Mutex @@ -173,17 +181,20 @@ func ReconcileCrowdSecOnStartup(db *gorm.DB, executor CrowdsecProcessManager, bi **Why Mutex Is Needed:** Reconciliation can be called from multiple places: + - **Startup:** `main.go` calls it synchronously during boot - **Manual toggle:** User clicks "Start" in Security dashboard - **Future auto-restart:** Watchdog could trigger it on crash Without mutex: + - ❌ Multiple goroutines could start CrowdSec simultaneously - ❌ Database race conditions on SecurityConfig table - ❌ Duplicate process spawning - ❌ Corrupted state in executor With mutex: + - ✅ Only one reconciliation at a time - ✅ Safe database access - ✅ Clean process lifecycle @@ -198,12 +209,14 @@ With mutex: **File:** [backend/internal/api/handlers/crowdsec_handler.go](../../backend/internal/api/handlers/crowdsec_handler.go#L244) **Change:** + ```go // Old: maxWait := 30 * time.Second maxWait := 60 * time.Second ``` **Why 60 Seconds:** + - LAPI initialization involves: - Loading parsers and scenarios (5-10s) - Initializing database connections (2-5s) @@ -212,16 +225,19 @@ maxWait := 60 * time.Second - Machine registration (2-5s) **Observed Timings:** + - **Fast systems (SSD, 4+ cores):** 5-10 seconds - **Average systems (HDD, 2 cores):** 15-25 seconds - **Slow systems (Raspberry Pi, low memory):** 30-45 seconds **Why Not Higher:** + - 60s provides 2x safety margin for slowest systems - Longer timeout = worse UX if actual failure occurs - Frontend shows loading overlay with progress messages **User Experience:** + - User sees: "Starting CrowdSec... This may take up to 30 seconds" - Backend polls LAPI every 500ms for up to 60s - Success toast when LAPI ready (usually 10-15s) @@ -234,6 +250,7 @@ maxWait := 60 * time.Second **File:** [.docker/docker-entrypoint.sh](../../.docker/docker-entrypoint.sh#L163-L169) **Existing Code (No Changes Needed):** + ```bash # Verify LAPI configuration was applied correctly if grep -q "listen_uri:.*:8085" "$CS_CONFIG_DIR/config.yaml"; then @@ -244,6 +261,7 @@ fi ``` **Why This Matters:** + - Validates `sed` commands successfully updated config.yaml - Early detection of configuration issues - Prevents port conflicts with Charon backend (port 8080) @@ -280,6 +298,7 @@ fi **File:** [backend/internal/services/crowdsec_startup_test.go](../../backend/internal/services/crowdsec_startup_test.go) **Coverage:** 11 test cases covering: + - ✅ Nil database handling - ✅ Nil executor handling - ✅ Missing SecurityConfig table auto-creation @@ -291,6 +310,7 @@ fi - ✅ Status check errors **Run Tests:** + ```bash cd backend go test ./internal/services/... -v -run TestReconcileCrowdSec @@ -299,6 +319,7 @@ go test ./internal/services/... -v -run TestReconcileCrowdSec ### Integration Tests **Manual Test Script:** + ```bash # 1. Build and start container docker compose -f docker-compose.test.yml up -d --build @@ -329,6 +350,7 @@ docker exec charon cscli lapi status ### Automated Tests **VS Code Task:** "Test: Backend Unit Tests" + ```bash cd backend && go test ./internal/services/... -v ``` @@ -342,11 +364,13 @@ cd backend && go test ./internal/services/... -v ### Container Restart Behavior **Before:** + ``` Container Restart → CrowdSec Offline → Manual GUI Start Required ``` **After:** + ``` Container Restart → Auto-Check SecurityConfig → CrowdSec Running (if enabled) ``` @@ -359,6 +383,7 @@ CrowdSec automatically starts on container boot if **ANY** of these conditions a 2. **Settings table:** `security.crowdsec.enabled = "true"` **Decision Logic:** + ``` IF SecurityConfig.crowdsec_mode == "local" THEN start ELSE IF Settings["security.crowdsec.enabled"] == "true" THEN start @@ -366,6 +391,7 @@ ELSE skip (user disabled CrowdSec) ``` **Why Two Sources:** + - **SecurityConfig:** Primary source (new, structured, strongly typed) - **Settings:** Fallback for legacy configs and runtime toggles - **Auto-init:** If no SecurityConfig exists, create one based on Settings value @@ -389,12 +415,14 @@ ELSE skip (user disabled CrowdSec) **No Action Required** — CrowdSec state is automatically preserved. **What Happens:** + 1. Container starts with old config 2. Reconciliation checks Settings table for `security.crowdsec.enabled` 3. Creates SecurityConfig matching Settings state 4. CrowdSec starts if it was previously enabled **Verification:** + ```bash # Check CrowdSec status after upgrade docker exec charon cscli lapi status @@ -410,6 +438,7 @@ docker logs charon | grep "CrowdSec reconciliation" **Migration Steps:** 1. **Remove from docker-compose.yml:** + ```yaml # REMOVE THESE: # - SECURITY_CROWDSEC_MODE=local @@ -422,16 +451,19 @@ docker logs charon | grep "CrowdSec reconciliation" - Verify status shows "Active" 3. **Restart container:** + ```bash docker compose restart ``` 4. **Verify auto-start:** + ```bash docker exec charon cscli lapi status ``` **Why This Change:** + - Consistent with other security features (WAF, ACL, Rate Limiting) - Single source of truth (database, not environment) - Easier to manage via GUI @@ -444,11 +476,13 @@ docker logs charon | grep "CrowdSec reconciliation" ### CrowdSec Not Starting After Restart **Symptoms:** + - Container starts successfully - CrowdSec status shows "Offline" - No LAPI process listening on port 8085 **Diagnosis:** + ```bash # 1. Check reconciliation logs docker logs charon 2>&1 | grep "CrowdSec reconciliation" @@ -473,6 +507,7 @@ docker exec charon sqlite3 /app/data/charon.db \ | "process started but is no longer running" | CrowdSec crashed on startup | Check `/var/log/crowdsec/crowdsec.log` | **Resolution:** + ```bash # Enable CrowdSec manually curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start @@ -484,10 +519,12 @@ docker exec charon cscli lapi status ### Permission Denied Errors **Symptoms:** + - Error: "permission denied: /var/lib/crowdsec/data/crowdsec.db" - CrowdSec process starts but immediately exits **Diagnosis:** + ```bash # Check directory ownership docker exec charon ls -la /var/lib/crowdsec/data/ @@ -497,6 +534,7 @@ docker exec charon ls -la /var/lib/crowdsec/data/ ``` **Resolution:** + ```bash # Fix permissions (requires container rebuild) docker compose down @@ -509,10 +547,12 @@ docker compose up -d ### LAPI Timeout (Takes Longer Than 60s) **Symptoms:** + - Warning toast: "LAPI is still initializing" - Status shows "Starting" for 60+ seconds **Diagnosis:** + ```bash # Check LAPI logs for errors docker exec charon tail -f /var/log/crowdsec/crowdsec.log @@ -522,12 +562,14 @@ docker stats charon ``` **Common Causes:** + - Low memory (< 512MB available) - Slow disk I/O (HDD vs SSD) - Network issues (hub update timeout) - High CPU usage (other processes) **Temporary Workaround:** + ```bash # Wait 30 more seconds, then manually check sleep 30 @@ -535,6 +577,7 @@ docker exec charon cscli lapi status ``` **Long-Term Solution:** + - Increase container memory allocation - Use faster storage (SSD recommended) - Pre-pull hub items during build (reduce runtime initialization) @@ -542,10 +585,12 @@ docker exec charon cscli lapi status ### Race Conditions / Duplicate Processes **Symptoms:** + - Multiple CrowdSec processes running - Error: "address already in use: 127.0.0.1:8085" **Diagnosis:** + ```bash # Check for multiple CrowdSec processes docker exec charon ps aux | grep crowdsec | grep -v grep @@ -557,6 +602,7 @@ docker exec charon ps aux | grep crowdsec | grep -v grep **Cause:** Mutex not protecting reconciliation (should not happen after this fix) **Resolution:** + ```bash # Kill all CrowdSec processes docker exec charon pkill crowdsec @@ -609,12 +655,14 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start ### Process Isolation **CrowdSec runs as `charon` user (UID 1000), NOT root:** + - ✅ Limited system access (can't modify system files) - ✅ Can't bind to privileged ports (< 1024) - ✅ Sandboxed within Docker container - ✅ Follows principle of least privilege **Risk Mitigation:** + - CrowdSec compromise does not grant root access - Limited blast radius if vulnerability exploited - Docker container provides additional isolation @@ -622,6 +670,7 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start ### Permission Hardening **Directory Permissions:** + ``` /var/lib/crowdsec/data/ → charon:charon (rwxr-xr-x) /var/log/crowdsec/ → charon:charon (rwxr-xr-x) @@ -629,6 +678,7 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start ``` **Why These Permissions:** + - `rwxr-xr-x` (755) allows execution and traversal - `charon` user can read/write its own files - Other users can read (required for log viewing) @@ -639,6 +689,7 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start **Potential Concern:** Auto-starting CrowdSec on boot could be exploited **Mitigations:** + 1. **Explicit Opt-In:** User must enable CrowdSec via GUI (not default) 2. **Database-Backed:** Start decision based on database, not environment variables 3. **Validation:** Binary and config paths validated before start @@ -646,6 +697,7 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start 5. **Audit Logging:** All start/stop events logged to SecurityAudit table **Threat Model:** + - ❌ **Attacker modifies environment variables** → No effect (not used) - ❌ **Attacker modifies SecurityConfig** → Requires database access (already compromised) - ✅ **Attacker deletes CrowdSec binary** → Reconciliation fails gracefully @@ -674,17 +726,17 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start ### Phase 2 Enhancements (Future) -4. **Configuration Validation** +1. **Configuration Validation** - Run `crowdsec -c -t` before starting - Prevent startup with invalid config - Show validation errors in GUI -5. **Performance Metrics** +2. **Performance Metrics** - Expose CrowdSec metrics to Prometheus endpoint - Track: LAPI requests/sec, decision count, parser success rate - Enable Grafana dashboards -6. **Log Streaming** +3. **Log Streaming** - Add WebSocket endpoint for CrowdSec logs - Real-time log viewer in GUI - Filter by severity, source, message @@ -741,6 +793,7 @@ curl -X POST http://localhost:8080/api/v1/admin/crowdsec/start --- **Next Steps:** + 1. Merge to main branch 2. Tag release (e.g., v0.9.0) 3. Update changelog diff --git a/docs/implementation/dns_providers_IMPLEMENTATION.md b/docs/implementation/dns_providers_IMPLEMENTATION.md index 86fd3cac..1e1b87da 100644 --- a/docs/implementation/dns_providers_IMPLEMENTATION.md +++ b/docs/implementation/dns_providers_IMPLEMENTATION.md @@ -348,6 +348,7 @@ type ProxyHost struct { #### Implementation Details **Encryption Service:** + ```go // backend/internal/crypto/encryption.go package crypto @@ -362,6 +363,7 @@ func (s *EncryptionService) Decrypt(ciphertextB64 string) ([]byte, error) ``` **Configuration Extension:** + ```go // backend/internal/config/config.go (add) EncryptionKey string `env:"CHARON_ENCRYPTION_KEY"` @@ -588,6 +590,7 @@ export CHARON_ENCRYPTION_KEY="" **Location:** `docs/guides/dns-providers.md` **Contents:** + - What are DNS providers and why they're needed - Setting up your first DNS provider - Managing multiple providers @@ -610,6 +613,7 @@ export CHARON_ENCRYPTION_KEY="" **Location:** `docs/troubleshooting/dns-challenges.md` **Contents:** + - DNS propagation delays - Permission/authentication errors - Firewall considerations diff --git a/docs/implementation/phase3_caddy_integration_COMPLETE.md b/docs/implementation/phase3_caddy_integration_COMPLETE.md index 89baf281..9d6fd552 100644 --- a/docs/implementation/phase3_caddy_integration_COMPLETE.md +++ b/docs/implementation/phase3_caddy_integration_COMPLETE.md @@ -30,6 +30,7 @@ type DNSProviderConfig struct { **File:** `backend/internal/caddy/manager.go` (Lines 51-58) Created interface for improved testability: + ```go type CaddyClient interface { Load(context.Context, io.Reader, bool) error @@ -43,6 +44,7 @@ type CaddyClient interface { **File:** `backend/internal/caddy/manager.go` (Lines 100-118) Modified provider detection loop to properly handle multi-credential providers: + - Detects `UseMultiCredentials=true` flag - Adds providers with empty Credentials field for Phase 2 processing - Maintains backward compatibility for single-credential providers @@ -52,6 +54,7 @@ Modified provider detection loop to properly handle multi-credential providers: **File:** `backend/internal/caddy/manager.go` (Lines 147-213) Implemented comprehensive credential resolution logic: + - Iterates through all proxy hosts - Calls `getCredentialForDomain` helper for each domain - Builds `ZoneCredentials` map per provider @@ -59,6 +62,7 @@ Implemented comprehensive credential resolution logic: - Error handling for missing credentials **Key Code Segment:** + ```go // Phase 2: For multi-credential providers, resolve per-domain credentials for _, providerConf := range dnsProviderConfigs { @@ -86,17 +90,20 @@ for _, providerConf := range dnsProviderConfigs { Enhanced `buildDNSChallengeIssuer` with conditional branching: **Multi-Credential Path (Lines 184-254):** + - Creates separate TLS automation policies per domain - Matches domains to base domains for proper credential mapping - Builds per-domain provider configurations - Supports exact match, wildcard, and catch-all zones **Single-Credential Path (Lines 256-280):** + - Preserved original logic for backward compatibility - Single policy for all domains - Uses shared credentials **Key Decision Logic:** + ```go if providerConf.UseMultiCredentials { // Multi-credential: Create separate policy per domain @@ -123,12 +130,14 @@ if providerConf.UseMultiCredentials { Implemented 4 comprehensive integration test scenarios: #### Test 1: Single-Credential Backward Compatibility + - **Purpose:** Verify existing single-credential providers work unchanged - **Setup:** Standard DNSProvider with `UseMultiCredentials=false` - **Validation:** Single TLS policy created with shared credentials - **Result:** ✅ PASS #### Test 2: Multi-Credential Exact Match + - **Purpose:** Test exact zone filter matching (example.com, example.org) - **Setup:** - Provider with `UseMultiCredentials=true` @@ -140,6 +149,7 @@ Implemented 4 comprehensive integration test scenarios: - **Result:** ✅ PASS #### Test 3: Multi-Credential Wildcard Match + - **Purpose:** Test wildcard zone filter matching (*.example.com) - **Setup:** - Credential with `*.example.com` zone filter @@ -148,6 +158,7 @@ Implemented 4 comprehensive integration test scenarios: - **Result:** ✅ PASS #### Test 4: Multi-Credential Catch-All + - **Purpose:** Test empty zone filter (catch-all) matching - **Setup:** - Credential with empty zone_filter @@ -156,6 +167,7 @@ Implemented 4 comprehensive integration test scenarios: - **Result:** ✅ PASS **Helper Functions:** + - `encryptCredentials()`: AES-256-GCM encryption with proper base64 encoding - `setupTestDB()`: Creates in-memory SQLite with all required tables - `assertDNSChallengeCredential()`: Validates TLS policy credentials @@ -164,6 +176,7 @@ Implemented 4 comprehensive integration test scenarios: ## Test Results ### Coverage Metrics + ``` Total Coverage: 94.8% Target: 85.0% @@ -171,6 +184,7 @@ Status: PASS (+9.8%) ``` ### Test Execution + ``` Total Tests: 47 Passed: 47 @@ -179,6 +193,7 @@ Duration: 1.566s ``` ### Key Test Scenarios Validated + ✅ Single-credential backward compatibility ✅ Multi-credential exact match (example.com) ✅ Multi-credential wildcard match (*.example.com) @@ -193,32 +208,40 @@ Duration: 1.566s ## Architecture Decisions ### 1. Two-Phase Processing + **Rationale:** Separates provider detection from credential resolution, enabling cleaner code and better error handling. **Implementation:** + - **Phase 1:** Build provider config list, detect multi-credential flag - **Phase 2:** Resolve per-domain credentials using helper function ### 2. Interface-Based Design + **Rationale:** Enables comprehensive testing without real Caddy server dependency. **Implementation:** + - Created `CaddyClient` interface - Modified `NewManager` signature to accept interface - Implemented `MockClient` for testing ### 3. Credential Resolution Priority + **Rationale:** Provides flexible matching while ensuring most specific match wins. **Priority Order:** + 1. Exact match (example.com → example.com) 2. Wildcard match (app.example.com → *.example.com) 3. Catch-all (any domain → empty zone_filter) ### 4. Backward Compatibility First + **Rationale:** Existing single-credential deployments must continue working unchanged. **Implementation:** + - Preserved original code paths - Conditional branching based on `UseMultiCredentials` flag - Comprehensive backward compatibility test @@ -226,12 +249,15 @@ Duration: 1.566s ## Security Considerations ### Encryption + - AES-256-GCM for all stored credentials - Base64 encoding for database storage - Proper key version management ### Audit Trail + Every credential selection logs: + ``` credential_uuid: zone_filter: @@ -239,6 +265,7 @@ domain: ``` ### Error Handling + - No credential exposure in error messages - Graceful degradation for missing credentials - Clear error propagation for debugging @@ -246,16 +273,19 @@ domain: ## Performance Impact ### Database Queries + - Phase 1: Single query for all DNS providers - Phase 2: Preloaded with Phase 1 data (no additional queries) - Result: **No additional database load** ### Memory Footprint + - `ZoneCredentials` map: ~100 bytes per domain - Typical deployment (10 domains): ~1KB additional memory - Result: **Negligible impact** ### Config Generation + - Multi-credential: O(n) policies where n = domain count - Single-credential: O(1) policy (unchanged) - Result: **Linear scaling, acceptable for typical use cases** @@ -263,6 +293,7 @@ domain: ## Files Modified ### Core Implementation + 1. `backend/internal/caddy/manager.go` (Modified) - Added struct fields - Created CaddyClient interface @@ -279,29 +310,33 @@ domain: - No modifications required ### Testing -4. `backend/internal/caddy/manager_multicred_integration_test.go` (NEW) + +1. `backend/internal/caddy/manager_multicred_integration_test.go` (NEW) - 4 comprehensive integration tests - Helper functions for setup and validation - MockClient implementation -5. `backend/internal/caddy/manager_multicred_test.go` (Modified) +2. `backend/internal/caddy/manager_multicred_test.go` (Modified) - Removed redundant unit tests - Added documentation comment explaining integration test coverage ## Backward Compatibility ### Single-Credential Providers + - **Behavior:** Unchanged - **Config:** Single TLS policy for all domains - **Credentials:** Shared across all domains - **Test Coverage:** Dedicated test validates this path ### Database Schema + - **New Fields:** `use_multi_credentials` (default: false) - **Migration:** Existing providers default to single-credential mode - **Impact:** Zero for existing deployments ### API Endpoints + - **Changes:** None required - **Client Impact:** None - **Deployment:** No coordination needed @@ -309,22 +344,26 @@ domain: ## Manual Verification Checklist ### Helper Functions ✅ + - [x] `extractBaseDomain` strips wildcard prefix correctly - [x] `matchesZoneFilter` handles exact, wildcard, and catch-all - [x] `getCredentialForDomain` implements 3-priority resolution ### Integration Flow ✅ + - [x] Phase 1 detects multi-credential providers - [x] Phase 2 resolves credentials per domain - [x] Config generation creates separate policies - [x] Backward compatibility maintained ### Audit Logging ✅ + - [x] credential_uuid logged for each selection - [x] zone_filter logged for audit trail - [x] domain logged for troubleshooting ### Error Handling ✅ + - [x] Missing credentials handled gracefully - [x] Encryption errors propagate clearly - [x] No credential exposure in error messages @@ -332,40 +371,48 @@ domain: ## Definition of Done ✅ **DNSProviderConfig struct has new fields** + - `UseMultiCredentials` bool added - `ZoneCredentials` map added ✅ **ApplyConfig resolves credentials per-domain** + - Phase 2 loop implemented - Uses `getCredentialForDomain` helper - Builds `ZoneCredentials` map ✅ **buildDNSChallengeIssuer uses zone-specific credentials** + - Conditional branching on `UseMultiCredentials` - Separate TLS policies per domain in multi-credential mode - Single policy preserved for single-credential mode ✅ **Integration tests implemented** + - 4 comprehensive test scenarios - All scenarios passing - Helper functions for setup and validation ✅ **Backward compatibility maintained** + - Single-credential providers work unchanged - Dedicated test validates backward compatibility - No breaking changes ✅ **Coverage ≥85%** + - Achieved: 94.8% - Target: 85.0% - Status: PASS (+9.8%) ✅ **Audit logging implemented** + - credential_uuid logged - zone_filter logged - domain logged ✅ **Manual verification complete** + - All helper functions tested - Integration flow validated - Error handling verified @@ -374,6 +421,7 @@ domain: ## Usage Examples ### Single-Credential Provider (Backward Compatible) + ```go provider := DNSProvider{ ProviderType: "cloudflare", @@ -384,6 +432,7 @@ provider := DNSProvider{ ``` ### Multi-Credential Provider (New Feature) + ```go provider := DNSProvider{ ProviderType: "cloudflare", @@ -398,6 +447,7 @@ provider := DNSProvider{ ``` ### Credential Resolution Flow + ``` 1. Domain: test1.example.com -> Extract base: example.com @@ -421,10 +471,12 @@ provider := DNSProvider{ ## Deployment Notes ### Prerequisites + - Database migration adds `use_multi_credentials` column (default: false) - Existing providers automatically use single-credential mode ### Rollout Strategy + 1. Deploy backend with new code 2. Existing providers continue working (backward compatible) 3. Enable multi-credential mode per provider via admin UI @@ -432,12 +484,15 @@ provider := DNSProvider{ 5. Caddy config regenerates automatically on next apply ### Rollback Procedure + If rollback needed: + 1. Set `use_multi_credentials=false` on all providers 2. Deploy previous backend version 3. No data loss, graceful degradation ### Monitoring + - Check audit logs for credential selection - Monitor Caddy config generation time - Watch for "failed to resolve credentials" errors @@ -445,6 +500,7 @@ If rollback needed: ## Future Enhancements ### Potential Improvements + 1. **Web UI for Multi-Credential Management** - Add/edit/delete credentials per provider - Zone filter validation @@ -470,6 +526,7 @@ If rollback needed: The Phase 3 Caddy Manager multi-credential integration is **COMPLETE** and **PRODUCTION-READY**. All requirements met, comprehensive testing in place, and backward compatibility ensured. **Key Achievements:** + - ✅ 94.8% test coverage (9.8% above target) - ✅ 47/47 tests passing - ✅ Full backward compatibility @@ -478,6 +535,7 @@ The Phase 3 Caddy Manager multi-credential integration is **COMPLETE** and **PRO - ✅ Production-grade error handling **Next Steps:** + 1. Deploy to staging environment for integration testing 2. Perform end-to-end testing with real DNS providers 3. Validate SSL certificate generation with zone-specific credentials diff --git a/docs/implementation/phase3_transaction_rollbacks_complete.md b/docs/implementation/phase3_transaction_rollbacks_complete.md index d7fb06cd..8027a43e 100644 --- a/docs/implementation/phase3_transaction_rollbacks_complete.md +++ b/docs/implementation/phase3_transaction_rollbacks_complete.md @@ -63,12 +63,14 @@ Analyzed 5 database-heavy test files: The `testutil/db.go` helper should be used for **future tests** that meet these criteria: ✅ **Good Candidates:** + - Tests using disk-based databases (SQLite files, PostgreSQL, MySQL) - Simple CRUD operations with straightforward setup - Tests that would benefit from parallelization - New test suites being created from scratch ❌ **Poor Candidates:** + - Tests already using `:memory:` SQLite - Tests requiring different schemas per test - Tests with complex setup/teardown logic diff --git a/docs/implementation/react-19-lucide-error-DIAGNOSTIC-REPORT.md b/docs/implementation/react-19-lucide-error-DIAGNOSTIC-REPORT.md index 08eef132..dfeed761 100644 --- a/docs/implementation/react-19-lucide-error-DIAGNOSTIC-REPORT.md +++ b/docs/implementation/react-19-lucide-error-DIAGNOSTIC-REPORT.md @@ -12,6 +12,7 @@ Completed Phase 1 (Diagnostic Testing) of the React production error remediation plan. Investigation reveals that the reported issue is **likely a false alarm or environment-specific problem** rather than a systematic lucide-react/React 19 incompatibility. **Key Findings:** + - ✅ lucide-react@0.562.0 **explicitly supports React 19** in peer dependencies - ✅ lucide-react@0.562.0 **is already the latest version** - ✅ Production build completes **without errors** @@ -20,6 +21,7 @@ Completed Phase 1 (Diagnostic Testing) of the React production error remediation - ✅ TypeScript check **passes** **Conclusion:** No code changes required. The issue may be: + 1. Browser cache problem (solved by hard refresh) 2. Stale Docker image (requires rebuild) 3. Specific browser/environment issue (not reproducible) @@ -31,6 +33,7 @@ Completed Phase 1 (Diagnostic Testing) of the React production error remediation ### 1. Version Verification **Current Versions:** + ``` lucide-react: 0.562.0 (latest) react: 19.2.3 @@ -38,6 +41,7 @@ react-dom: 19.2.3 ``` **lucide-react Peer Dependencies:** + ```json { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" @@ -52,6 +56,7 @@ react-dom: 19.2.3 **Result:** ✅ SUCCESS **Build Output:** + ``` ✓ 2402 modules transformed. dist/assets/vendor-DxsQVcK_.js 307.68 kB │ gzip: 108.33 kB @@ -61,6 +66,7 @@ dist/assets/icons-D4OKmUKi.js 16.99 kB │ gzip: 6.00 kB ``` **Bundle Size Comparison:** + | Chunk | Before | After | Change | |-------|--------|-------|--------| | vendor-DxsQVcK_.js | 307.68 kB | 307.68 kB | 0% | @@ -75,6 +81,7 @@ dist/assets/icons-D4OKmUKi.js 16.99 kB │ gzip: 6.00 kB **Result:** ✅ PASS (with coverage below threshold) **Test Summary:** + ``` Test Files 120 passed (120) Tests 1403 passed | 2 skipped (1405) @@ -100,6 +107,7 @@ No TypeScript errors detected. All imports and type definitions are correct. ### 5. Icon Usage Audit **Activity Icon Locations (Plan Section: Icon Audit):** + | File | Line | Usage | |------|------|-------| | components/UptimeWidget.tsx | 3, 53 | ✅ Import + Render | @@ -112,6 +120,7 @@ No TypeScript errors detected. All imports and type definitions are correct. **Total Activity Icon Usages:** 6 files, 12+ instances **Other lucide-react Icons Detected:** + - CheckCircle (notifications) - AlertTriangle (error states) - Settings (navigation) @@ -119,6 +128,7 @@ No TypeScript errors detected. All imports and type definitions are correct. - Shield, Lock, Globe, Server, Database, etc. (security/infra components) **Icon Import Pattern:** + ```typescript import { Activity, CheckCircle, AlertTriangle } from 'lucide-react'; ``` @@ -130,6 +140,7 @@ import { Activity, CheckCircle, AlertTriangle } from 'lucide-react'; ## Root Cause Analysis Update ### Original Hypothesis (from Plan) +> > "React 19 runtime incompatibility with lucide-react@0.562.0" ### Evidence Against Hypothesis @@ -175,6 +186,7 @@ import { Activity, CheckCircle, AlertTriangle } from 'lucide-react'; ### Why This Isn't a lucide-react Bug If this were a true React 19 incompatibility: + - ❌ Build would fail or show warnings → **Build succeeds** - ❌ Tests would fail → **All tests pass** - ❌ npm would warn about peer deps → **No warnings** @@ -186,18 +198,21 @@ If this were a true React 19 incompatibility: ## Actions Taken (28-Step Checklist) ### Pre-Implementation (Steps 1-4) + - [x] **Step 1:** Create feature branch `fix/react-19-lucide-icon-error` - [x] **Step 2:** Document current versions (react@19.2.3, lucide-react@0.562.0) - [x] **Step 3:** Take baseline bundle size measurement (307.68 kB vendor) - [x] **Step 4:** Run baseline Lighthouse audit (skipped - not accessible in terminal) ### Diagnostic Phase (Steps 5-8) + - [x] **Step 5:** Test with alternative icons (all icons import correctly) - [x] **Step 6:** Review Vite production config (no issues found) - [x] **Step 7:** Check for console warnings in dev mode (none detected) - [x] **Step 8:** Verify lucide-react import statements (all consistent) ### Implementation (Steps 9-13) + - [x] **Step 9:** Reinstall lucide-react@0.562.0 (already at latest, no change) - [x] **Step 10:** Run `npm audit fix` (0 vulnerabilities) - [x] **Step 11:** Verify package-lock.json (unchanged) @@ -205,8 +220,9 @@ If this were a true React 19 incompatibility: - [x] **Step 13:** Run linter (via pre-commit hooks, to be run on commit) ### Build & Test (Steps 14-20) + - [x] **Step 14:** Production build ✅ SUCCESS -- [x] **Step 15:** Preview production build (server started at http://localhost:4173) +- [x] **Step 15:** Preview production build (server started at ) - [⚠️] **Step 16:** Execute icon audit (visual verification requires browser access) - [⚠️] **Step 17:** Execute page rendering tests (requires browser access) - [x] **Step 18:** Run unit tests ✅ 1403 PASS @@ -214,12 +230,14 @@ If this were a true React 19 incompatibility: - [⚠️] **Step 20:** Run Lighthouse audit (requires browser access) ### Verification (Steps 21-24) + - [x] **Step 21:** Bundle size comparison (0% change - ✅ PASS) - [x] **Step 22:** Verify no new ESLint warnings (to be verified on commit) - [x] **Step 23:** Verify no new TypeScript errors ✅ PASS - [⚠️] **Step 24:** Check console logs (requires browser access) ### Documentation (Steps 25-28) + - [ ] **Step 25:** Update CHANGELOG.md (pending verification of fix) - [ ] **Step 26:** Add conventional commit message (pending merge decision) - [ ] **Step 27:** Archive plan in docs/implementation/ (this document) @@ -236,12 +254,14 @@ If this were a true React 19 incompatibility: ### Option A: Close as "Unable to Reproduce" ✅ RECOMMENDED **Rationale:** + - All diagnostic tests pass - Build succeeds without errors - lucide-react explicitly supports React 19 - No evidence of systematic issue **Actions:** + 1. Merge current branch (no code changes) 2. Document in CHANGELOG as "Verified React 19 compatibility" 3. Close issue with note: "Unable to reproduce. If issue recurs, provide: @@ -252,10 +272,12 @@ If this were a true React 19 incompatibility: ### Option B: Proceed to Browser Verification (Manual) **Rationale:** + - Error was reported in production environment - May be environment-specific issue **Actions:** + 1. Deploy to staging environment 2. Access via browser and open DevTools console 3. Navigate to all pages using Activity icon @@ -264,9 +286,11 @@ If this were a true React 19 incompatibility: ### Option C: Implement Preventive Measures **Rationale:** + - Add safeguards even if issue isn't currently reproducible **Actions:** + 1. Add error boundary around icon imports 2. Add Sentry/error tracking for production 3. Document troubleshooting steps for users @@ -294,9 +318,11 @@ If this were a true React 19 incompatibility: **None.** No code changes were required. **Files Created:** + - `docs/implementation/react-19-lucide-error-DIAGNOSTIC-REPORT.md` (this document) **Branches:** + - Created: `fix/react-19-lucide-icon-error` - Commits: 0 (no changes to commit) @@ -307,12 +333,14 @@ If this were a true React 19 incompatibility: **Recommended Path:** Close as unable to reproduce, document findings. **If Issue Recurs:** + 1. Request browser console screenshot from reporter 2. Verify Docker image tag matches latest build 3. Check for browser extensions interfering with React DevTools 4. Verify CDN/proxy cache is not serving stale assets **For Merge:** + - No code changes to merge - Close issue with diagnostic findings - Update documentation to note React 19 compatibility verified @@ -322,11 +350,13 @@ If this were a true React 19 incompatibility: ## Appendix A: Environment Details **System:** + - OS: Linux (srv599055) - Node.js: (from npm ci, latest LTS assumed) - Package Manager: npm **Frontend Stack:** + - React: 19.2.3 - React DOM: 19.2.3 - lucide-react: 0.562.0 @@ -335,6 +365,7 @@ If this were a true React 19 incompatibility: - Vitest: 2.2.4 **Build Configuration:** + - Target: ES2022 - Module: ESNext - Minify: terser (production) @@ -349,6 +380,7 @@ If this were a true React 19 incompatibility: **Gap:** -0.43% **Top Coverage Gaps (not related to this fix):** + 1. `api/auditLogs.ts` - 0% (68-143 lines uncovered) 2. `api/credentials.ts` - 0% (53-147 lines uncovered) 3. `api/encryption.ts` - 0% (53-84 lines uncovered) diff --git a/docs/implementation/sidebar-fixed-header-ui-COMPLETE.md b/docs/implementation/sidebar-fixed-header-ui-COMPLETE.md index db2b88ec..25c8cb82 100644 --- a/docs/implementation/sidebar-fixed-header-ui-COMPLETE.md +++ b/docs/implementation/sidebar-fixed-header-ui-COMPLETE.md @@ -23,12 +23,14 @@ Successfully implemented two critical UI/UX improvements to enhance the Charon f #### `/projects/Charon/frontend/src/components/Layout.tsx` **Sidebar Scrolling Improvements:** + - Line 145: Added `min-h-0` to menu container to enable proper flexbox scrolling behavior - Line 146: Added `overflow-y-auto` to navigation section for vertical scrolling - Line 280: Added `flex-shrink-0` to version/logout section to prevent compression - Line 308: Added `flex-shrink-0` to collapsed logout section for consistency **Fixed Header Improvements:** + - Line 336: Removed `overflow-auto` from main element to prevent entire page scrolling - Line 337: Added `sticky top-0 z-10` to header for fixed positioning, removed `relative` - Lines 360-362: Wrapped content in scrollable container to enable independent content scrolling @@ -36,6 +38,7 @@ Successfully implemented two critical UI/UX improvements to enhance the Charon f #### `/projects/Charon/frontend/src/index.css` **Custom Scrollbar Styling:** + - Added WebKit scrollbar styles for consistent appearance - Implemented dark mode compatible scrollbar colors - Applied subtle hover effects for better UX @@ -81,11 +84,13 @@ All manual tests passed: ### CSS Properties Used **Sidebar Scrolling:** + - `min-h-0` - Allows flex item to shrink below content size, enabling proper scrolling in flexbox containers - `overflow-y-auto` - Shows vertical scrollbar when content exceeds available space - `flex-shrink-0` - Prevents logout section from being compressed when space is tight **Fixed Header:** + - `position: sticky` - Keeps header in place within scroll container - `top-0` - Sticks to top edge of viewport - `z-index: 10` - Ensures header appears above content (below sidebar at z-30 and modals at z-50) @@ -94,6 +99,7 @@ All manual tests passed: ### Browser Compatibility Tested and verified on: + - ✅ Chrome/Edge (Chromium-based) - ✅ Firefox - ✅ Safari (modern versions with full sticky positioning support) @@ -139,12 +145,14 @@ Not introduced by this change: ## Responsive Behavior ### Mobile (< 1024px) + - Sidebar remains in slide-out panel (existing behavior) - Mobile header remains fixed at top (existing behavior) - Scrolling improvements apply to mobile sidebar overlay - No layout shifts or visual regressions ### Desktop (≥ 1024px) + - Header sticks to top of viewport when scrolling content - Sidebar menu scrolls independently when content overflows - Logout button always visible at bottom of sidebar @@ -172,12 +180,14 @@ All acceptance criteria met: ## User Impact ### Improvements + - **Better Navigation**: Users can now access all menu items without scrolling through expanded submenus - **Persistent Header**: Key actions (notifications, theme toggle, system status) remain accessible while scrolling - **Enhanced UX**: Custom scrollbars match the application's design language - **Responsive Design**: Mobile and desktop experiences remain optimal ### Breaking Changes + None - this is a purely additive UI/UX enhancement --- @@ -206,9 +216,9 @@ Potential follow-up improvements identified during implementation: - **Original Specification**: [sidebar-fixed-header-ui-SPEC.md](./sidebar-fixed-header-ui-SPEC.md) - **QA Report Summary**: [docs/reports/qa_summary_sidebar_ui.md](../reports/qa_summary_sidebar_ui.md) - **Full QA Report**: [docs/reports/qa_report_sidebar_ui.md](../reports/qa_report_sidebar_ui.md) -- **Tailwind CSS Flexbox**: https://tailwindcss.com/docs/flex -- **CSS Position Sticky**: https://developer.mozilla.org/en-US/docs/Web/CSS/position#sticky -- **Flexbox and Min-Height**: https://www.w3.org/TR/css-flexbox-1/#min-size-auto +- **Tailwind CSS Flexbox**: +- **CSS Position Sticky**: +- **Flexbox and Min-Height**: --- diff --git a/docs/implementation/sidebar-fixed-header-ui-SPEC.md b/docs/implementation/sidebar-fixed-header-ui-SPEC.md index 912aee05..b5c4537b 100644 --- a/docs/implementation/sidebar-fixed-header-ui-SPEC.md +++ b/docs/implementation/sidebar-fixed-header-ui-SPEC.md @@ -55,6 +55,7 @@ The sidebar has the following structure: ``` **Current Issues**: + - Line 145: `flex flex-col flex-1` on the menu container allows it to grow indefinitely - Line 146: `