diff --git a/docs/plans/crowdsec_nonroot_fix_spec.md b/docs/plans/crowdsec_nonroot_fix_spec.md new file mode 100644 index 00000000..f8964505 --- /dev/null +++ b/docs/plans/crowdsec_nonroot_fix_spec.md @@ -0,0 +1,380 @@ +# CrowdSec Non-Root Migration Fix Specification + +## Executive Summary + +The Charon container was migrated from root to non-root user (UID/GID 1000, username `charon`). This broke CrowdSec because of permission issues and config path mismatches. This document outlines the exact changes needed to fix CrowdSec operation under non-root. + +**Status:** Research Complete - Ready for Implementation +**Last Updated:** 2024-12-22 +**Priority:** CRITICAL + +--- + +## Root Cause Analysis + +### What Happened + +1. **Container User Change**: Root → `charon` (UID 1000) +2. **Log File Permission Issue**: CrowdSec config points to `/var/log/crowdsec.log` but non-root cannot create files in `/var/log/` +3. **Config Path Mismatch**: CrowdSec expects configs at `/etc/crowdsec/` but they're stored in `/app/data/crowdsec/config/` +4. **Missing Symlink**: Entrypoint doesn't create `/etc/crowdsec` → `/app/data/crowdsec/config` symlink + +### Current Container State + +**User**: `charon` (UID 1000, GID 1000) + +**Directory Permissions**: +``` +✓ /var/log/crowdsec/ - charon:charon (correct) +✓ /var/log/caddy/ - charon:charon (correct) +✓ /app/data/crowdsec/ - charon:charon (correct) +✗ /etc/crowdsec/ - exists but NOT symlinked to persistent storage +``` + +**CrowdSec Config Issues** (`/app/data/crowdsec/config/config.yaml`): +```yaml +common: + log_media: file + log_dir: /var/log/ # ✗ Wrong - should be /var/log/crowdsec/ + +config_paths: + config_dir: /etc/crowdsec/ # ✗ Not symlinked to persistent storage + data_dir: /var/lib/crowdsec/data/ # ✗ Wrong - should be /app/data/crowdsec/data + simulation_path: /etc/crowdsec/simulation.yaml # ✗ File doesn't exist + hub_dir: /etc/crowdsec/hub/ # ✓ Works via actual directory +``` + +--- + +## Fix Implementation Plan + +### File 1: `.docker/docker-entrypoint.sh` + +**Priority**: CRITICAL +**Lines to Modify**: 48-120 (CrowdSec initialization section) + +#### Changes Required + +1. **Fix log directory path** in config.yaml +2. **Create symlink** `/etc/crowdsec` → `/app/data/crowdsec/config` +3. **Update envsubst variables** to use correct paths +4. **Update data_dir path** in config.yaml + +#### Implementation + +Replace the section from line 48 onwards with: + +```bash +# ============================================================================ +# CrowdSec Initialization +# ============================================================================ +# Note: CrowdSec agent is not auto-started. Lifecycle is GUI-controlled via backend handlers. + +# Initialize CrowdSec configuration if cscli is present +if command -v cscli >/dev/null; then + echo "Initializing CrowdSec configuration..." + + # Define persistent paths + CS_PERSIST_DIR="/app/data/crowdsec" + CS_CONFIG_DIR="$CS_PERSIST_DIR/config" + CS_DATA_DIR="$CS_PERSIST_DIR/data" + CS_LOG_DIR="/var/log/crowdsec" + + # Ensure persistent directories exist (within writable volume) + mkdir -p "$CS_CONFIG_DIR" 2>/dev/null || echo "Warning: Cannot create $CS_CONFIG_DIR" + mkdir -p "$CS_DATA_DIR" 2>/dev/null || echo "Warning: Cannot create $CS_DATA_DIR" + # Log directories are created at build time with correct ownership + mkdir -p "$CS_LOG_DIR" 2>/dev/null || true + mkdir -p /var/log/caddy 2>/dev/null || true + + # Initialize persistent config if key files are missing + if [ ! -f "$CS_CONFIG_DIR/config.yaml" ]; then + echo "Initializing persistent CrowdSec configuration..." + if [ -d "/etc/crowdsec.dist" ]; then + cp -r /etc/crowdsec.dist/* "$CS_CONFIG_DIR/" 2>/dev/null || echo "Warning: Could not copy dist config" + fi + fi + + # Create symlink from /etc/crowdsec to persistent config BEFORE envsubst + # This ensures cscli and other tools can find configs at the standard path + if [ ! -L "/etc/crowdsec" ]; then + echo "Creating symlink: /etc/crowdsec -> $CS_CONFIG_DIR" + # Remove directory if it exists (from Dockerfile) + if [ -d "/etc/crowdsec" ] && [ ! -L "/etc/crowdsec" ]; then + # Move any existing configs to persistent storage first + if [ -n "$(ls -A /etc/crowdsec 2>/dev/null)" ]; then + echo "Migrating existing /etc/crowdsec files to persistent storage..." + cp -rn /etc/crowdsec/* "$CS_CONFIG_DIR/" 2>/dev/null || true + fi + rm -rf /etc/crowdsec + fi + ln -sf "$CS_CONFIG_DIR" /etc/crowdsec + echo "Symlink created successfully" + else + echo "Symlink already exists: /etc/crowdsec -> $(readlink /etc/crowdsec)" + fi + + # Create/update acquisition config for Caddy logs + if [ ! -f "/etc/crowdsec/acquis.yaml" ] || [ ! -s "/etc/crowdsec/acquis.yaml" ]; then + echo "Creating acquisition configuration for Caddy logs..." + cat > /etc/crowdsec/acquis.yaml << 'ACQUIS_EOF' +# Caddy access logs acquisition +# CrowdSec will monitor these files for security events +source: file +filenames: + - /var/log/caddy/access.log + - /var/log/caddy/*.log +labels: + type: caddy +ACQUIS_EOF + fi + + # Ensure hub directory exists in persistent storage + mkdir -p /etc/crowdsec/hub + + # Perform variable substitution with CORRECT paths + export CFG="$CS_CONFIG_DIR" + export DATA="$CS_DATA_DIR" + export PID="$CS_PERSIST_DIR/crowdsec.pid" + export LOG="$CS_LOG_DIR/crowdsec.log" + + # Process config.yaml and user.yaml with envsubst + for file in /etc/crowdsec/config.yaml /etc/crowdsec/user.yaml; do + if [ -f "$file" ]; then + envsubst < "$file" > "$file.tmp" && mv "$file.tmp" "$file" + fi + done + + # Fix log_dir path in config.yaml (must be /var/log/crowdsec/ not /var/log/) + if [ -f "/etc/crowdsec/config.yaml" ]; then + echo "Updating config.yaml paths for non-root operation..." + sed -i 's|log_dir: /var/log/|log_dir: /var/log/crowdsec/|g' /etc/crowdsec/config.yaml + sed -i 's|log_dir: /var/log$|log_dir: /var/log/crowdsec|g' /etc/crowdsec/config.yaml + sed -i 's|data_dir: /var/lib/crowdsec/data/|data_dir: /app/data/crowdsec/data/|g' /etc/crowdsec/config.yaml + sed -i 's|data_dir: /var/lib/crowdsec/data$|data_dir: /app/data/crowdsec/data|g' /etc/crowdsec/config.yaml + fi + + # Configure CrowdSec LAPI to use port 8085 to avoid conflict with Charon (port 8080) + if [ -f "/etc/crowdsec/config.yaml" ]; then + sed -i 's|listen_uri: 127.0.0.1:8080|listen_uri: 127.0.0.1:8085|g' /etc/crowdsec/config.yaml + sed -i 's|listen_uri: 0.0.0.0:8080|listen_uri: 127.0.0.1:8085|g' /etc/crowdsec/config.yaml + fi + + # Update local_api_credentials.yaml to use correct port + if [ -f "/etc/crowdsec/local_api_credentials.yaml" ]; then + sed -i 's|url: http://127.0.0.1:8080|url: http://127.0.0.1:8085|g' /etc/crowdsec/local_api_credentials.yaml + sed -i 's|url: http://localhost:8080|url: http://127.0.0.1:8085|g' /etc/crowdsec/local_api_credentials.yaml + fi + + # Update hub index to ensure CrowdSec can start + if [ ! -f "/etc/crowdsec/hub/.index.json" ]; then + echo "Updating CrowdSec hub index..." + timeout 60s cscli hub update 2>/dev/null || echo "⚠️ Hub update timed out or failed, continuing..." + fi + + # Ensure local machine is registered (auto-heal for volume/config mismatch) + echo "Registering local machine..." + cscli machines add -a --force 2>/dev/null || echo "Warning: Machine registration may have failed" + + # Install hub items (parsers, scenarios, collections) if local mode enabled + if [ "$SECURITY_CROWDSEC_MODE" = "local" ]; then + echo "Installing CrowdSec hub items..." + if [ -x /usr/local/bin/install_hub_items.sh ]; then + /usr/local/bin/install_hub_items.sh 2>/dev/null || echo "Warning: Some hub items may not have installed" + fi + fi +fi + +# CrowdSec Lifecycle Management: +# CrowdSec configuration is initialized above (symlinks, directories, hub updates) +# However, the CrowdSec agent is NOT auto-started in the entrypoint. +# Instead, CrowdSec lifecycle is managed by the backend handlers via GUI controls. +echo "CrowdSec configuration initialized. Agent lifecycle is GUI-controlled." +``` + +--- + +### File 2: `Dockerfile` + +**Priority**: HIGH +**Lines to Modify**: 286-294, 305-309 + +#### Changes Required + +1. **Don't create `/etc/crowdsec` as a directory** - will be symlink at runtime +2. **Keep `/etc/crowdsec.dist` for template storage** +3. **Update ownership commands** to not assume `/etc/crowdsec` is a directory + +#### Implementation + +**Replace lines 286-294:** + +```dockerfile +# Create required CrowdSec directories in runtime image +# Note: /etc/crowdsec will be a SYMLINK to /app/data/crowdsec/config (created at runtime) +# We keep /etc/crowdsec.dist as the source template +RUN mkdir -p /etc/crowdsec.dist /etc/crowdsec.dist/acquis.d /etc/crowdsec.dist/bouncers \ + /etc/crowdsec.dist/hub /etc/crowdsec.dist/notifications \ + /var/lib/crowdsec/data /var/log/crowdsec /var/log/caddy \ + /app/data/crowdsec/config /app/data/crowdsec/data +``` + +**Replace lines 305-309 (ownership section):** + +```dockerfile +# Security: Set ownership of all application directories to non-root charon user +# Note: /etc/crowdsec will be created as symlink at runtime by entrypoint +RUN chown -R charon:charon /app /config /var/log/crowdsec /var/log/caddy && \ + chown -R charon:charon /etc/crowdsec.dist 2>/dev/null || true && \ + chown -R charon:charon /var/lib/crowdsec 2>/dev/null || true +``` + +--- + +## Verification Checklist + +After implementation, verify these conditions: + +### 1. Container Startup +```bash +docker logs charon 2>&1 | grep -i crowdsec +# Expected: "CrowdSec configuration initialized" +# Expected: "Created symlink: /etc/crowdsec -> /app/data/crowdsec/config" +# No errors about permissions or missing files +``` + +### 2. Symlink Creation +```bash +docker exec charon ls -la /etc/crowdsec +# Expected: lrwxrwxrwx ... /etc/crowdsec -> /app/data/crowdsec/config +``` + +### 3. Config File Paths +```bash +docker exec charon grep -E "log_dir|data_dir|config_dir" /app/data/crowdsec/config/config.yaml +# Expected: +# log_dir: /var/log/crowdsec/ +# data_dir: /app/data/crowdsec/data/ +# config_dir: /etc/crowdsec/ (resolves via symlink) +``` + +### 4. Log Directory Writability +```bash +docker exec charon test -w /var/log/crowdsec/ && echo "writable" || echo "not writable" +# Expected: writable +``` + +### 5. CrowdSec Start via API +```bash +# Enable CrowdSec via API +curl -X POST -H "Authorization: Bearer $TOKEN" http://localhost:8080/api/v1/admin/crowdsec/start + +# Check status +curl -H "Authorization: Bearer $TOKEN" http://localhost:8080/api/v1/admin/crowdsec/status +# Expected: {"running":true,"pid":XXXX,"lapi_ready":true} +``` + +### 6. Manual Process Start (Direct Test) +```bash +docker exec charon /usr/local/bin/crowdsec -c /app/data/crowdsec/config/config.yaml +# Should start without permission errors +# Check logs: docker exec charon cat /var/log/crowdsec/crowdsec.log +``` + +### 7. LAPI Connectivity +```bash +docker exec charon cscli lapi status +# Expected: "You can successfully interact with Local API (LAPI)" +``` + +--- + +## Testing Strategy + +### Phase 1: Clean Start Test +1. Remove existing volume: `docker volume rm charon_data` +2. Start fresh container: `docker compose up -d` +3. Verify symlink and config paths +4. Enable CrowdSec via UI +5. Verify process starts successfully + +### Phase 2: Upgrade Test (Migration Scenario) +1. Use existing volume with old directory structure +2. Start updated container +3. Verify entrypoint migrates old configs +4. Verify symlink creation +5. Enable CrowdSec via UI + +### Phase 3: Lifecycle Test +1. Start CrowdSec via API +2. Verify LAPI becomes ready +3. Stop CrowdSec via API +4. Restart container +5. Verify CrowdSec auto-starts if enabled + +### Phase 4: Hub Operations Test +1. Run `cscli hub update` +2. Install test preset via API +3. Verify files stored in correct locations +4. Check cache permissions + +--- + +## Rollback Plan + +If issues occur after implementation: + +1. **Immediate Rollback**: Revert to previous container image +2. **Config Recovery**: Backup script creates timestamped copies +3. **Manual Fix**: Mount volume and fix symlink/paths manually + +--- + +## Implementation Priority + +| Priority | Task | Impact | Complexity | +|----------|------|--------|------------| +| CRITICAL | Fix `.docker/docker-entrypoint.sh` | CrowdSec won't start | Medium | +| HIGH | Update `Dockerfile` directory creation | Prevents symlink creation | Low | +| MEDIUM | Add verification tests | CI/CD coverage | Medium | +| LOW | Document in migration guide | User awareness | Low | + +--- + +## Related Files (No Changes Needed) + +✓ `backend/internal/api/handlers/crowdsec_exec.go` - Uses correct paths +✓ `backend/internal/config/config.go` - Default config is correct +✓ `backend/internal/services/crowdsec_startup.go` - Logic is correct +✓ `configs/crowdsec/acquis.yaml` - Already correct +✓ `configs/crowdsec/install_hub_items.sh` - Already correct +✓ `configs/crowdsec/register_bouncer.sh` - Already correct + +--- + +## Additional Notes + +- **CrowdSec LAPI Port**: 8085 (correctly configured to avoid port conflict with Charon on 8080) +- **Acquisition Config**: Correctly points to `/var/log/caddy/*.log` +- **Hub Cache**: Stored in `/app/data/crowdsec/hub_cache/` (writable by charon user) +- **Bouncer API Key**: Expected at `/etc/crowdsec/bouncers/caddy-bouncer.key` (will resolve via symlink) +- **PID File**: Stored at `/app/data/crowdsec/crowdsec.pid` (correct location) + +--- + +## Success Criteria + +Implementation is complete when: + +1. ✅ Container starts without CrowdSec errors +2. ✅ `/etc/crowdsec` symlink exists and points to persistent storage +3. ✅ Config files use correct paths (`/var/log/crowdsec/`, `/app/data/crowdsec/data/`) +4. ✅ CrowdSec can be started via UI without permission errors +5. ✅ LAPI becomes ready within 30 seconds +6. ✅ `cscli` commands work correctly (hub update, preset install, etc.) +7. ✅ Process survives container restarts when enabled + +--- + +*Research completed: December 22, 2024* +*Ready for implementation* diff --git a/docs/plans/current_spec.md b/docs/plans/current_spec.md index bb64e7b7..091d8049 100644 --- a/docs/plans/current_spec.md +++ b/docs/plans/current_spec.md @@ -1,667 +1,1013 @@ -# URL Test Button Navigation Bug - Implementation Plan +# Current Project Specification -**Status**: Ready for Implementation -**Priority**: High -**Affected Component**: System Settings - Application URL Test -**Last Updated**: December 22, 2025 (Security Review Completed) +## Active Issue: CrowdSec Non-Root Migration Fix - REVISED + +**Status**: Implementation Ready - Supervisor Review Complete +**Priority**: CRITICAL +**Last Updated**: 2024-12-22 (Revised after supervisor review) + +### Quick Summary + +The container migration from root to non-root user broke CrowdSec. Supervisor review identified **7 critical issues** that would cause the original fix to fail. This revised plan addresses all issues. + +**Root Cause**: Permission issues, missing symlink creation logic, and incomplete config template population. + +### Changes Required + +1. **Dockerfile** (Line ~332): Add config template population before final COPY +2. **Entrypoint Script** (Lines 68-73): Replace symlink verification with creation logic +3. **Entrypoint Script** (Line 100): Fix LOG variable to use directory-based path +4. **Entrypoint Script** (Line 51): Add hub_cache directory creation +5. **Entrypoint Script** (Line 99): Keep CFG pointing to `/etc/crowdsec` (resolves via symlink) +6. **Entrypoint Script** (Lines 68-73): Strengthen error handling in migration +7. **Verification Checklist**: Expand from 7 to 11 steps --- -## Security Review Summary +## Detailed Implementation Plan -**Critical vulnerabilities fixed in this revision:** +### Issue 1: Missing Config Template Population (HIGH PRIORITY) -1. ✅ **DNS Rebinding Protection**: HTTP requests now use validated IP addresses instead of hostnames, preventing TOCTOU attacks -2. ✅ **Redirect Validation**: All redirect targets validated for private IPs before following -3. ✅ **Complete IP Blocklist**: 15 IPv4 + 6 IPv6 reserved ranges blocked (RFC-compliant) -4. ✅ **HTTPS Enforcement**: Only HTTPS URLs accepted for secure testing -5. ✅ **Port Restrictions**: Limited to 443/8443 only -6. ✅ **Hostname Blocklist**: Cloud metadata endpoints explicitly blocked -7. ✅ **Rate Limiting**: Middleware implementation with 5 tests/minute per user +**Location**: `Dockerfile` before line 332 (before final COPY commands) + +**Problem**: The Dockerfile doesn't populate `/etc/crowdsec.dist/` with CrowdSec default configs (`config.yaml`, `user.yaml`, etc.). This causes the entrypoint script to have nothing to copy when initializing persistent storage. + +**Current Code** (Lines 330-332): +```dockerfile +# Copy CrowdSec configuration templates from source +COPY configs/crowdsec/acquis.yaml /etc/crowdsec.dist/acquis.yaml +COPY configs/crowdsec/install_hub_items.sh /usr/local/bin/install_hub_items.sh +``` + +**Required Fix** (Add BEFORE line 330): +```dockerfile +# Generate CrowdSec default configs to .dist directory +RUN if command -v cscli >/dev/null; then \ + mkdir -p /etc/crowdsec.dist && \ + cscli config restore /etc/crowdsec.dist/ || \ + cp -r /etc/crowdsec/* /etc/crowdsec.dist/ 2>/dev/null || true; \ + fi +``` + +**Rationale**: The `cscli config restore` command generates all required default configs (`config.yaml`, `user.yaml`, `local_api_credentials.yaml`, etc.). If that fails, we fall back to copying any existing configs. This ensures the `.dist` directory is always populated for the entrypoint to use. + +**Risk**: Low - Command has multiple fallbacks and won't fail the build if CrowdSec is unavailable. --- -## Executive Summary +### Issue 2: Symlink Not Created (HIGH PRIORITY) -The URL test button in System Settings incorrectly uses `window.open()` instead of performing a server-side connectivity test. This causes the browser to open the URL in a new tab (blank screen if unreachable) rather than executing a proper health check. +**Location**: `.docker/docker-entrypoint.sh` lines 68-73 -**User Report**: Clicking test button for `http://100.98.12.109:8080/settings/https//charon.hatfieldhosted.com` opened blank blue screen. +**Problem**: The entrypoint only VERIFIES the symlink exists but never CREATES it. This is the root cause of CrowdSec failures. + +**Current Code** (Lines 68-73): +```bash +# Link /etc/crowdsec to persistent config for runtime compatibility +# Note: This symlink is created at build time; verify it exists +if [ -L "/etc/crowdsec" ]; then + echo "CrowdSec config symlink verified: /etc/crowdsec -> $CS_CONFIG_DIR" +else + echo "Warning: /etc/crowdsec symlink not found. CrowdSec may use volume config directly." +fi +``` + +**Required Fix** (Replace lines 68-73): +```bash +# Migrate existing directory to persistent storage if needed +if [ -d "/etc/crowdsec" ] && [ ! -L "/etc/crowdsec" ]; then + echo "Migrating /etc/crowdsec to persistent storage..." + if [ -n "$(ls -A /etc/crowdsec 2>/dev/null)" ]; then + cp -rn /etc/crowdsec/* "$CS_CONFIG_DIR/" || { + echo "ERROR: Failed to migrate configs" + exit 1 + } + fi + rm -rf /etc/crowdsec || { + echo "ERROR: Failed to remove old directory" + exit 1 + } +fi + +# Create symlink if it doesn't exist +if [ ! -L "/etc/crowdsec" ]; then + ln -sf "$CS_CONFIG_DIR" /etc/crowdsec || { + echo "ERROR: Failed to create symlink" + exit 1 + } + echo "Created symlink: /etc/crowdsec -> $CS_CONFIG_DIR" +fi +``` + +**Rationale**: This implements proper migration logic with fail-fast error handling. If `/etc/crowdsec` exists as a directory, we migrate its contents before creating the symlink. + +**Risk**: Medium - Changes startup flow. Must test with both fresh and existing volumes. --- -## Current Implementation Analysis +### Issue 3: Wrong LOG Environment Variable -### Frontend: SystemSettings.tsx +**Location**: `.docker/docker-entrypoint.sh` line 100 -**File**: [frontend/src/pages/SystemSettings.tsx](frontend/src/pages/SystemSettings.tsx#L103-L118) +**Problem**: The `LOG` variable points directly to a file instead of using the log directory variable, breaking consistency. -```typescript -const testPublicURL = async () => { - if (!publicURL) { - toast.error(t('systemSettings.applicationUrl.invalidUrl')) - return - } - setPublicURLSaving(true) - try { - window.open(publicURL, '_blank') // ❌ Opens URL in browser instead of API test - toast.success('URL opened in new tab') - } catch { - toast.error('Failed to open URL') - } finally { - setPublicURLSaving(false) - } -} +**Current Code** (Line 100): +```bash +export LOG=/var/log/crowdsec.log ``` -**Button** (line 417): -```typescript - +**Required Fix** (Replace line 100): +```bash +export LOG="$CS_LOG_DIR/crowdsec.log" ``` -### Backend: Existing Validation Only - -**File**: [backend/internal/api/routes/routes.go](backend/internal/api/routes/routes.go#L195) - -```go -protected.POST("/settings/validate-url", settingsHandler.ValidatePublicURL) +**Required Addition** (Add after line 47 where other CS_* variables are defined): +```bash +CS_LOG_DIR="/var/log/crowdsec" ``` -**Handler**: [backend/internal/api/handlers/settings_handler.go](backend/internal/api/handlers/settings_handler.go#L229-L267) +**Rationale**: Ensures all CrowdSec paths are consistently managed through variables, making future changes easier. -This endpoint **only validates format** (scheme, no paths), does NOT test connectivity. +**Risk**: Low - Simple variable change with no behavioral impact. --- -## Root Cause +### Issue 4: Missing Hub Cache Directory -1. **Misnamed Function**: `testPublicURL()` implies connectivity test but performs navigation -2. **No Backend Endpoint**: Missing API for server-side reachability tests -3. **User Expectation**: "Test" button should verify connectivity, not open URL -4. **Malformed URL Issue**: User input `https//charon.hatfieldhosted.com` (missing colon) causes navigation failure +**Location**: `.docker/docker-entrypoint.sh` after line 51 + +**Problem**: The hub cache directory `/app/data/crowdsec/hub_cache/` is never explicitly created, causing hub operations to fail. + +**Current Code** (Lines 49-51): +```bash +# Ensure persistent directories exist (within writable volume) +mkdir -p "$CS_CONFIG_DIR" 2>/dev/null || echo "Warning: Cannot create $CS_CONFIG_DIR" +mkdir -p "$CS_DATA_DIR" 2>/dev/null || echo "Warning: Cannot create $CS_DATA_DIR" +``` + +**Required Fix** (Add after line 51): +```bash +mkdir -p "$CS_PERSIST_DIR/hub_cache" +``` + +**Rationale**: CrowdSec stores hub metadata in a separate cache directory. Without this, `cscli hub update` fails silently. + +**Risk**: Low - Simple directory creation with no side effects. --- -## Security: SSRF Protection Requirements +### Issue 5: CFG Variable Should Stay /etc/crowdsec -**CRITICAL**: Backend URL testing must prevent Server-Side Request Forgery attacks. +**Location**: `.docker/docker-entrypoint.sh` line 99 -### Required Protections +**Problem**: The original plan incorrectly suggested changing CFG to `$CS_CONFIG_DIR`, but it should remain `/etc/crowdsec` since it resolves to persistent storage via the symlink. -1. **Complete IP Blocklist**: Reject all private/reserved IPs - - IPv4: `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16` - - Loopback: `127.0.0.0/8`, IPv6 `::1/128` - - Link-local: `169.254.0.0/16`, IPv6 `fe80::/10` - - Cloud metadata: `169.254.169.254` (AWS/GCP/Azure) - - IPv6 ULA: `fc00::/7` - - Test/doc ranges: `192.0.2.0/24`, `198.51.100.0/24`, `203.0.113.0/24` - - Reserved: `0.0.0.0/8`, `240.0.0.0/4`, `255.255.255.255/32` - - CGNAT: `100.64.0.0/10` - - Multicast: `224.0.0.0/4`, IPv6 `ff00::/8` +**Current Code** (Line 99): +```bash +export CFG=/etc/crowdsec +``` -2. **DNS Rebinding Protection** (CRITICAL): - - Make HTTP request directly to validated IP address - - Use `req.Host` header for SNI/vhost routing - - Prevents TOCTOU attacks where DNS changes between check and use +**Required Action**: **KEEP AS-IS** - Do NOT change this line. -3. **Redirect Validation** (CRITICAL): - - Validate each redirect target's IP before following - - Max 2 redirects - - Block redirects to private IPs +**Rationale**: The CFG variable should point to `/etc/crowdsec` which resolves to `$CS_CONFIG_DIR` via symlink. This maintains compatibility with CrowdSec's expected paths while still using persistent storage. -4. **Hostname Blocklist**: - - `metadata.google.internal`, `metadata.goog`, `metadata` - - `169.254.169.254`, `localhost` - -5. **HTTPS Enforcement**: - - Require HTTPS scheme (reject HTTP for security) - - Warn users about insecure connections - -6. **Port Restrictions**: - - Allow only: 443 (HTTPS), 8443 (alternate HTTPS) - - Block all other ports including privileged ports - -7. **Rate Limiting**: 5 tests per minute per user - - Implement using `golang.org/x/time/rate` - - Per-user token bucket with burst allowance - -8. **Request Restrictions**: - - 5 second HTTP timeout - - 3 second DNS timeout - - HEAD method only (no full GET) - -9. **Admin-Only**: Require admin role (already enforced on `/settings/*`) +**Risk**: None - No change required. --- -## Implementation Plan +### Issue 6: Weak Migration Error Handling -### Backend: New API Endpoint +**Location**: `.docker/docker-entrypoint.sh` lines 56-62 -#### 1. Register Route with Rate Limiting +**Problem**: Too many `|| true` statements allow silent failures during config migration. -**File**: [backend/internal/api/routes/routes.go](backend/internal/api/routes/routes.go#L195) - -After line 195: -```go -// Create rate limiter for URL testing (5 requests per minute) -urlTestLimiter := middleware.NewRateLimiter(5.0/60.0, 5) -protected.POST("/settings/test-url", - urlTestLimiter.Limit(), - settingsHandler.TestPublicURL) +**Current Code** (Lines 56-62): +```bash +# Initialize persistent config if key files are missing +if [ ! -f "$CS_CONFIG_DIR/config.yaml" ]; then + echo "Initializing persistent CrowdSec configuration..." + if [ -d "/etc/crowdsec.dist" ]; then + cp -r /etc/crowdsec.dist/* "$CS_CONFIG_DIR/" 2>/dev/null || echo "Warning: Could not copy dist config" + elif [ -d "/etc/crowdsec" ] && [ ! -L "/etc/crowdsec" ]; then + # Fallback if .dist is missing + cp -r /etc/crowdsec/* "$CS_CONFIG_DIR/" 2>/dev/null || echo "Warning: Could not copy config" + fi +fi ``` -#### 2. Handler - -**File**: [backend/internal/api/handlers/settings_handler.go](backend/internal/api/handlers/settings_handler.go#L267) - -```go -// TestPublicURL performs server-side connectivity test with SSRF protection -func (h *SettingsHandler) TestPublicURL(c *gin.Context) { -role, _ := c.Get("role") -if role != "admin" { -c.JSON(http.StatusForbidden, gin.H{"error": "Admin access required"}) -return -} - -type TestURLRequest struct { -URL string `json:"url" binding:"required"` -} - -var req TestURLRequest -if err := c.ShouldBindJSON(&req); err != nil { -c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) -return -} - -// Validate format first -normalized, _, err := utils.ValidateURL(req.URL) -if err != nil { -c.JSON(http.StatusBadRequest, gin.H{ -"reachable": false, -"error": "Invalid URL format", -}) -return -} - -// Test connectivity (SSRF-safe) -reachable, latency, err := utils.TestURLConnectivity(normalized) -if err != nil { -c.JSON(http.StatusOK, gin.H{ -"reachable": false, -"error": err.Error(), -}) -return -} - -c.JSON(http.StatusOK, gin.H{ -"reachable": reachable, -"latency": latency, -"message": fmt.Sprintf("URL reachable (%.0fms)", latency), -}) -} +**Required Fix** (Replace lines 56-62): +```bash +# Initialize persistent config if key files are missing +if [ ! -f "$CS_CONFIG_DIR/config.yaml" ]; then + echo "Initializing persistent CrowdSec configuration..." + if [ -d "/etc/crowdsec.dist" ] && [ -n "$(ls -A /etc/crowdsec.dist 2>/dev/null)" ]; then + cp -r /etc/crowdsec.dist/* "$CS_CONFIG_DIR/" || { + echo "ERROR: Failed to copy config from /etc/crowdsec.dist" + exit 1 + } + echo "Successfully initialized config from .dist directory" + elif [ -d "/etc/crowdsec" ] && [ ! -L "/etc/crowdsec" ] && [ -n "$(ls -A /etc/crowdsec 2>/dev/null)" ]; then + cp -r /etc/crowdsec/* "$CS_CONFIG_DIR/" || { + echo "ERROR: Failed to copy config from /etc/crowdsec" + exit 1 + } + echo "Successfully initialized config from /etc/crowdsec" + else + echo "ERROR: No config source found (neither .dist nor /etc/crowdsec available)" + exit 1 + fi +fi ``` -#### 3. Utility Function with DNS Rebinding Protection +**Rationale**: Fail-fast approach ensures we detect misconfigurations early. Empty directory checks prevent copying empty directories. -**File**: Create `backend/internal/utils/url_test.go` - -```go -package utils - -import ( - "context" - "fmt" - "net" - "net/http" - "net/url" - "strings" - "time" -) - -// TestURLConnectivity checks if URL is reachable with comprehensive SSRF protection -// including DNS rebinding prevention, redirect validation, and complete IP blocklist -func TestURLConnectivity(rawURL string) (bool, float64, error) { - parsed, err := url.Parse(rawURL) - if err != nil { - return false, 0, fmt.Errorf("invalid URL: %w", err) - } - - host := parsed.Hostname() - port := parsed.Port() - if port == "" { - port = map[string]string{"https": "443", "http": "80"}[parsed.Scheme] - } - - // Enforce HTTPS for security - if parsed.Scheme != "https" { - return false, 0, fmt.Errorf("HTTPS required") - } - - // Validate port - allowedPorts := map[string]bool{"443": true, "8443": true} - if !allowedPorts[port] { - return false, 0, fmt.Errorf("port %s not allowed", port) - } - - // Block metadata hostnames explicitly - forbiddenHosts := []string{ - "metadata.google.internal", "metadata.goog", "metadata", - "169.254.169.254", "localhost", - } - for _, forbidden := range forbiddenHosts { - if strings.EqualFold(host, forbidden) { - return false, 0, fmt.Errorf("blocked hostname") - } - } - - // DNS resolution with timeout - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - ips, err := net.DefaultResolver.LookupIPAddr(ctx, host) - if err != nil { - return false, 0, fmt.Errorf("DNS failed: %w", err) - } - if len(ips) == 0 { - return false, 0, fmt.Errorf("no IPs found") - } - - // SSRF protection: block private IPs - for _, ip := range ips { - if isPrivateIP(ip.IP) { - return false, 0, fmt.Errorf("private IP blocked: %s", ip.IP) - } - } - - // DNS REBINDING PROTECTION: Use first validated IP for request - validatedIP := ips[0].IP.String() - - // Construct URL using validated IP to prevent TOCTOU attacks - var targetURL string - if port != "" { - targetURL = fmt.Sprintf("%s://%s:%s%s", parsed.Scheme, validatedIP, port, parsed.Path) - } else { - targetURL = fmt.Sprintf("%s://%s%s", parsed.Scheme, validatedIP, parsed.Path) - } - - // HTTP request with redirect validation - client := &http.Client{ - Timeout: 5 * time.Second, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - if len(via) >= 2 { - return fmt.Errorf("too many redirects") - } - - // CRITICAL: Validate redirect target IPs - redirectHost := req.URL.Hostname() - redirectIPs, err := net.DefaultResolver.LookupIPAddr(ctx, redirectHost) - if err != nil { - return fmt.Errorf("redirect DNS failed: %w", err) - } - if len(redirectIPs) == 0 { - return fmt.Errorf("redirect DNS returned no IPs") - } - - // Check redirect target IPs - for _, ip := range redirectIPs { - if isPrivateIP(ip.IP) { - return fmt.Errorf("redirect to private IP blocked: %s", ip.IP) - } - } - return nil - }, - } - - start := time.Now() - req, err := http.NewRequestWithContext(ctx, http.MethodHead, targetURL, nil) - if err != nil { - return false, 0, fmt.Errorf("request creation failed: %w", err) - } - - // Set Host header to original hostname for SNI/vhost routing - req.Host = parsed.Host - req.Header.Set("User-Agent", "Charon-Health-Check/1.0") - - resp, err := client.Do(req) - latency := time.Since(start).Seconds() * 1000 - - if err != nil { - return false, 0, fmt.Errorf("connection failed: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode >= 200 && resp.StatusCode < 400 { - return true, latency, nil - } - - return false, latency, fmt.Errorf("status %d", resp.StatusCode) -} - -// isPrivateIP checks if an IP is in any private/reserved range -func isPrivateIP(ip net.IP) bool { - // Check special addresses - if ip.IsLoopback() || ip.IsLinkLocalUnicast() || - ip.IsLinkLocalMulticast() || ip.IsMulticast() { - return true - } - - // Check if it's IPv4 or IPv6 - if ip.To4() != nil { - // IPv4 private ranges (comprehensive RFC compliance) - privateBlocks := []string{ - "0.0.0.0/8", // Current network - "10.0.0.0/8", // Private - "100.64.0.0/10", // Shared address space (CGNAT) - "127.0.0.0/8", // Loopback - "169.254.0.0/16", // Link-local / Cloud metadata - "172.16.0.0/12", // Private - "192.0.0.0/24", // IETF protocol assignments - "192.0.2.0/24", // TEST-NET-1 - "192.168.0.0/16", // Private - "198.18.0.0/15", // Benchmarking - "198.51.100.0/24", // TEST-NET-2 - "203.0.113.0/24", // TEST-NET-3 - "224.0.0.0/4", // Multicast - "240.0.0.0/4", // Reserved - "255.255.255.255/32", // Broadcast - } - - for _, block := range privateBlocks { - _, subnet, _ := net.ParseCIDR(block) - if subnet.Contains(ip) { - return true - } - } - } else { - // IPv6 private ranges - privateBlocks := []string{ - "::1/128", // Loopback - "::/128", // Unspecified - "::ffff:0:0/96", // IPv4-mapped - "fe80::/10", // Link-local - "fc00::/7", // Unique local - "ff00::/8", // Multicast - } - - for _, block := range privateBlocks { - _, subnet, _ := net.ParseCIDR(block) - if subnet.Contains(ip) { - return true - } - } - } - - return false -} -``` - -#### 4. Rate Limiting Middleware - -**File**: Create `backend/internal/middleware/rate_limit.go` - -```go -package middleware - -import ( - "net/http" - "sync" - "time" - - "github.com/gin-gonic/gin" - "golang.org/x/time/rate" -) - -type RateLimiter struct { - limiters map[string]*rate.Limiter - mu sync.RWMutex - rate rate.Limit - burst int -} - -func NewRateLimiter(rps float64, burst int) *RateLimiter { - return &RateLimiter{ - limiters: make(map[string]*rate.Limiter), - rate: rate.Limit(rps), - burst: burst, - } -} - -func (rl *RateLimiter) getLimiter(key string) *rate.Limiter { - rl.mu.Lock() - defer rl.mu.Unlock() - - limiter, exists := rl.limiters[key] - if !exists { - limiter = rate.NewLimiter(rl.rate, rl.burst) - rl.limiters[key] = limiter - } - return limiter -} - -func (rl *RateLimiter) Limit() gin.HandlerFunc { - return func(c *gin.Context) { - userID, exists := c.Get("user_id") - if !exists { - c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{ - "error": "Authentication required", - }) - return - } - - limiter := rl.getLimiter(userID.(string)) - if !limiter.Allow() { - c.AbortWithStatusJSON(http.StatusTooManyRequests, gin.H{ - "error": "Rate limit exceeded. Maximum 5 tests per minute.", - }) - return - } - - c.Next() - } -} -``` - -### Frontend: Use API Instead of window.open - -#### 1. API Client - -**File**: [frontend/src/api/settings.ts](frontend/src/api/settings.ts#L40) - -```typescript -export const testPublicURL = async (url: string): Promise<{ - reachable: boolean - latency?: number - message?: string - error?: string -}> => { - const response = await client.post('/settings/test-url', { url }) - return response.data -} -``` - -#### 2. Component Update - -**File**: [frontend/src/pages/SystemSettings.tsx](frontend/src/pages/SystemSettings.tsx#L103-L118) - -Replace function: - -```typescript -const testPublicURLHandler = async () => { - if (!publicURL) { - toast.error(t('systemSettings.applicationUrl.invalidUrl')) - return - } - setPublicURLSaving(true) - try { - const result = await testPublicURL(publicURL) - if (result.reachable) { - toast.success( - result.message || `URL reachable (${result.latency?.toFixed(0)}ms)` - ) - } else { - toast.error(result.error || 'URL not reachable') - } - } catch (error) { - toast.error(error instanceof Error ? error.message : 'Test failed') - } finally { - setPublicURLSaving(false) - } -} -``` - -#### 3. Update Button (line 417) - -```typescript -