diff --git a/backend/internal/caddy/config.go b/backend/internal/caddy/config.go index 31d6336f..13ab92b3 100644 --- a/backend/internal/caddy/config.go +++ b/backend/internal/caddy/config.go @@ -701,6 +701,13 @@ func GenerateConfig(hosts []models.ProxyHost, storageDir, acmeEmail, frontendDir Handle: emergencyHandlers, Terminal: true, } + logger.Log().WithFields(map[string]any{ + "host_id": host.ID, + "host_uuid": host.UUID, + "unique_domains": uniqueDomains, + "has_paths": true, + "path_count": len(emergencyPaths), + }).Debug("[CONFIG DEBUG] Creating EMERGENCY route") routes = append(routes, emergencyRoute) mainHandlers := append(append([]Handler{}, securityHandlers...), handlers...) @@ -714,6 +721,14 @@ func GenerateConfig(hosts []models.ProxyHost, storageDir, acmeEmail, frontendDir Terminal: true, } + logger.Log().WithFields(map[string]any{ + "host_id": host.ID, + "host_uuid": host.UUID, + "unique_domains": uniqueDomains, + "has_paths": false, + "route_type": "main", + }).Debug("[CONFIG DEBUG] Creating MAIN route (no path matchers)") + routes = append(routes, route) } diff --git a/backend/internal/caddy/validator.go b/backend/internal/caddy/validator.go index da4f20a7..fb9e39c6 100644 --- a/backend/internal/caddy/validator.go +++ b/backend/internal/caddy/validator.go @@ -6,6 +6,8 @@ import ( "net" "strconv" "strings" + + "github.com/Wikid82/charon/backend/internal/logger" ) // Validate performs pre-flight validation on a Caddy config before applying it. @@ -18,8 +20,9 @@ func Validate(cfg *Config) error { return nil // Empty config is valid } - // Track seen hosts to detect duplicates - seenHosts := make(map[string]bool) + // Track seen hosts with their path configuration + // Value: "with_paths" or "without_paths" + seenHosts := make(map[string]string) for serverName, server := range cfg.Apps.HTTP.Servers { if len(server.Listen) == 0 { @@ -81,18 +84,48 @@ func validateListenAddr(addr string) error { return nil } -func validateRoute(route *Route, seenHosts map[string]bool) error { +func validateRoute(route *Route, seenHosts map[string]string) error { if len(route.Handle) == 0 { return fmt.Errorf("route has no handlers") } - // Check for duplicate host matchers + // Check for duplicate host matchers with incompatible path configurations + // Allow emergency+main pattern: one route with paths, one without for _, match := range route.Match { + hasPaths := len(match.Path) > 0 + pathConfig := "without_paths" + if hasPaths { + pathConfig = "with_paths" + } + for _, host := range match.Host { - if seenHosts[host] { - return fmt.Errorf("duplicate host matcher: %s", host) + logger.Log().WithFields(map[string]any{ + "host": host, + "has_paths": hasPaths, + "paths": match.Path, + "path_config": pathConfig, + }).Debug("[VALIDATOR] Checking host matcher") + + if existingConfig, seen := seenHosts[host]; seen { + // Host already seen - check if path configs are compatible + if existingConfig == pathConfig { + // Same path configuration = true duplicate + if pathConfig == "with_paths" { + logger.Log().WithField("host", host).Error("[VALIDATOR] Duplicate host with paths") + return fmt.Errorf("duplicate host with paths: %s", host) + } + logger.Log().WithField("host", host).Error("[VALIDATOR] Duplicate host without paths") + return fmt.Errorf("duplicate host without paths: %s", host) + } + // Different path configuration = emergency+main pattern (ALLOWED) + logger.Log().WithFields(map[string]any{ + "host": host, + "existing_config": existingConfig, + "new_config": pathConfig, + }).Debug("[VALIDATOR] Allowing emergency+main pattern") + continue } - seenHosts[host] = true + seenHosts[host] = pathConfig } } diff --git a/backend/internal/caddy/validator_emergency_test.go b/backend/internal/caddy/validator_emergency_test.go new file mode 100644 index 00000000..8e86f11e --- /dev/null +++ b/backend/internal/caddy/validator_emergency_test.go @@ -0,0 +1,287 @@ +package caddy + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// TestValidate_EmergencyPlusMainPattern tests the core fix: +// Allow duplicate host when one has path matchers (emergency) and one doesn't (main) +func TestValidate_EmergencyPlusMainPattern(t *testing.T) { + tests := []struct { + name string + routes []*Route + expectError bool + errorText string + }{ + { + name: "Emergency+Main Pattern (ALLOWED)", + routes: []*Route{ + { + // Emergency route WITH paths + Match: []Match{{ + Host: []string{"test.com"}, + Path: []string{"/api/v1/emergency/*", "/emergency/*"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + }, + { + // Main route WITHOUT paths + Match: []Match{{ + Host: []string{"test.com"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + }, + }, + expectError: false, + }, + { + name: "Reverse Order: Main+Emergency (ALLOWED)", + routes: []*Route{ + { + // Main route WITHOUT paths (comes first) + Match: []Match{{ + Host: []string{"example.com"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + }, + { + // Emergency route WITH paths (comes second) + Match: []Match{{ + Host: []string{"example.com"}, + Path: []string{"/emergency/*"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + }, + }, + expectError: false, + }, + { + name: "Duplicate Hosts With Same Paths (REJECTED)", + routes: []*Route{ + { + Match: []Match{{ + Host: []string{"test.com"}, + Path: []string{"/api/*"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app1:8080", false, "none", true), + }, + }, + { + Match: []Match{{ + Host: []string{"test.com"}, + Path: []string{"/admin/*"}, // Different paths, but both have paths + }}, + Handle: []Handler{ + ReverseProxyHandler("app2:8080", false, "none", true), + }, + }, + }, + expectError: true, + errorText: "duplicate host with paths", + }, + { + name: "Duplicate Hosts Without Paths (REJECTED)", + routes: []*Route{ + { + Match: []Match{{ + Host: []string{"test.com"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app1:8080", false, "none", true), + }, + }, + { + Match: []Match{{ + Host: []string{"test.com"}, // Same host, both without paths + }}, + Handle: []Handler{ + ReverseProxyHandler("app2:8080", false, "none", true), + }, + }, + }, + expectError: true, + errorText: "duplicate host without paths", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &Config{ + Apps: Apps{ + HTTP: &HTTPApp{ + Servers: map[string]*Server{ + "test": { + Listen: []string{":80"}, + Routes: tt.routes, + }, + }, + }, + }, + } + + err := Validate(config) + if tt.expectError { + require.Error(t, err, "Expected validation to fail") + if tt.errorText != "" { + require.Contains(t, err.Error(), tt.errorText) + } + } else { + require.NoError(t, err, "Expected validation to pass") + } + }) + } +} + +// TestValidate_MultipleHostsWithEmergencyPattern tests scalability: +// Verify validator handles 5, 10, and 18+ hosts with emergency+main pattern +func TestValidate_MultipleHostsWithEmergencyPattern(t *testing.T) { + hostCounts := []int{5, 10, 18} + + for _, count := range hostCounts { + t.Run("RouteCount_"+string(rune(count+'0')), func(t *testing.T) { + routes := make([]*Route, 0, count*2) // 2 routes per host + + for i := 0; i < count; i++ { + hostname := "host" + string(rune(i+'0')) + ".example.com" + + // Emergency route + routes = append(routes, &Route{ + Match: []Match{{ + Host: []string{hostname}, + Path: []string{"/api/v1/emergency/*", "/emergency/*"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + }) + + // Main route + routes = append(routes, &Route{ + Match: []Match{{ + Host: []string{hostname}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + }) + } + + config := &Config{ + Apps: Apps{ + HTTP: &HTTPApp{ + Servers: map[string]*Server{ + "test": { + Listen: []string{":80"}, + Routes: routes, + }, + }, + }, + }, + } + + err := Validate(config) + require.NoError(t, err, "Expected validation to pass for %d hosts", count) + }) + } +} + +// TestValidate_RouteOrdering verifies route ordering is preserved +func TestValidate_RouteOrdering(t *testing.T) { + // Emergency route should be checked BEFORE main route + // This test ensures validator doesn't impose ordering constraints + config := &Config{ + Apps: Apps{ + HTTP: &HTTPApp{ + Servers: map[string]*Server{ + "test": { + Listen: []string{":80"}, + Routes: []*Route{ + { + // Route 0: Emergency (with paths) - should match /emergency/* first + Match: []Match{{ + Host: []string{"test.com"}, + Path: []string{"/emergency/*"}, + }}, + Handle: []Handler{ + Handler{ + "handler": "static_response", + "body": "Emergency bypass", + }, + }, + Terminal: true, + }, + { + // Route 1: Main (no paths) - matches everything else + Match: []Match{{ + Host: []string{"test.com"}, + }}, + Handle: []Handler{ + ReverseProxyHandler("app:8080", false, "none", true), + }, + Terminal: true, + }, + }, + }, + }, + }, + }, + } + + err := Validate(config) + require.NoError(t, err, "Route ordering should be preserved") +} + +// TestValidate_CaseInsensitiveHosts tests that host comparison is case-sensitive +// This is intentional - DNS is case-insensitive, but Caddy handles normalization +func TestValidate_CaseSensitiveHostnames(t *testing.T) { + // Note: This test documents current behavior. + // The validator treats "Test.com" and "test.com" as DIFFERENT strings. + // This is acceptable because config.go normalizes all hostnames to lowercase + // BEFORE calling the validator. By the time validator sees them, they're already + // normalized, so this scenario doesn't occur in production. + config := &Config{ + Apps: Apps{ + HTTP: &HTTPApp{ + Servers: map[string]*Server{ + "test": { + Listen: []string{":80"}, + Routes: []*Route{ + { + Match: []Match{{ + Host: []string{"Test.com"}, // Uppercase T + }}, + Handle: []Handler{ + ReverseProxyHandler("app1:8080", false, "none", true), + }, + }, + { + Match: []Match{{ + Host: []string{"test.com"}, // Lowercase t + }}, + Handle: []Handler{ + ReverseProxyHandler("app2:8080", false, "none", true), + }, + }, + }, + }, + }, + }, + }, + } + + // Current behavior: validator treats these as different hosts (case-sensitive string comparison) + // This is fine because config.go normalizes all domains to lowercase before validation + err := Validate(config) + require.NoError(t, err, "Validator treats different case as different hosts (config.go normalizes before validation)") +} diff --git a/docs/plans/current_spec.md b/docs/plans/current_spec.md index c1f74122..4a5190dc 100644 --- a/docs/plans/current_spec.md +++ b/docs/plans/current_spec.md @@ -1,298 +1,689 @@ -# GORM ID Leak Security Vulnerability Fix +# Duplicate Proxy Host Bug Fix - Simplified Validator (SYSTEMIC ISSUE) -**Status**: ACTIVE -**Priority**: CRITICAL πŸ”΄ +**Status**: ACTIVE - MINIMAL FIX APPROACH +**Priority**: CRITICAL πŸ”΄πŸ”΄πŸ”΄ - ALL 18 ENABLED PROXY HOSTS DOWN **Created**: 2026-01-28 -**Security Issue**: SQL Injection Vulnerability via GORM `.First()` ID Leaking +**Updated**: 2026-01-28 (EXPANDED SCOPE - Systemic issue confirmed) +**Bug**: Caddy validator rejects emergency+main route pattern for EVERY proxy host (duplicate host with different path constraints) --- ## Executive Summary -**Critical Security Bug**: GORM's `.First(&model, id)` method with string ID fields causes SQL injection-like behavior by directly inserting the ID value into the WHERE clause without proper parameterization. This affects **12 production service methods** and **multiple test files**, causing widespread test failures. +**CRITICAL SYSTEMIC BUG**: Caddy's pre-flight validator rejects the emergency+main route pattern for **EVERY enabled proxy host**. The emergency route (with path matchers) and main route (without path matchers) share the same domain, causing "duplicate host matcher" error on ALL hosts. **Impact**: -- πŸ”΄ SQL syntax errors in production code -- πŸ”΄ Test suite failures (uptime_service_race_test.go) -- πŸ”΄ Potential SQL injection vector -- πŸ”΄ String-based UUIDs are leaking into raw SQL +- πŸ”΄πŸ”΄πŸ”΄ **ZERO routes loaded in Caddy** - ALL proxy hosts are down +- πŸ”΄ **18 enabled proxy hosts** cannot be activated (not just Host ID 24) +- πŸ”΄ Entire reverse proxy functionality is non-functional +- 🟑 Emergency bypass routes blocked for all hosts +- 🟑 Sequential failures: Host 24 β†’ Host 22 β†’ (pattern repeats for every host) +- 🟒 Backend health endpoint returns 200 OK (separate container health issue) -**Root Cause**: When calling `.First(&model, id)` where `model` has a string ID field, GORM directly inserts the ID as a raw SQL fragment instead of using parameterized queries. +**Root Cause**: Validator treats ALL duplicate hosts as errors without considering that routes with different path constraints are valid. The emergency+main route pattern is applied to EVERY proxy host by design, causing systematic rejection. + +**Minimal Fix**: Simplify validator to allow duplicate hosts when ONE has path matchers and ONE doesn't. **This will unblock ALL 18 enabled proxy hosts simultaneously**, restoring full reverse proxy functionality. Full overlap detection is future work. + +**Database**: NO issues - DNS is already case-insensitive. No migration needed. + +**Secondary Issues** (tracked but deferred): +- 🟑 Slow SQL queries (>200ms) on uptime_heartbeats and security_configs tables +- 🟑 Container health check fails despite 200 OK from health endpoint (may be timeout issue) --- -## SQL Injection Evidence +## Technical Analysis -### Actual Error from Tests +### Current Route Structure -``` -/uptime_service_race_test.go:100 unrecognized token: "639fb2d8" -SELECT * FROM \`uptime_hosts\` WHERE 639fb2d8-f3a7-4c37-9a8e-22f2e73f8d87 AND \`uptime_hosts\`.\`id\` = "639fb2d8-f3a7-4c37-9a8e-22f2e73f8d87" +For each proxy host, `GenerateConfig` creates TWO routes with the SAME domain list: + +1. **Emergency Route** (lines 571-584 in config.go): + ```go + emergencyRoute := &Route{ + Match: []Match{{ + Host: uniqueDomains, // immaculaterr.hatfieldhosted.com + Path: emergencyPaths, // /api/v1/emergency/* + }}, + Handle: emergencyHandlers, + Terminal: true, + } + ``` + +2. **Main Route** (lines 586-598 in config.go): + ```go + route := &Route{ + Match: []Match{{ + Host: uniqueDomains, // immaculaterr.hatfieldhosted.com (DUPLICATE!) + }}, + Handle: mainHandlers, + Terminal: true, + } + ``` + +### Why Validator Fails + +```go +// validator.go lines 89-93 +for _, host := range match.Host { + if seenHosts[host] { + return fmt.Errorf("duplicate host matcher: %s", host) + } + seenHosts[host] = true +} ``` -### What Should Happen vs What Actually Happens +The validator: +1. Processes emergency route: adds "immaculaterr.hatfieldhosted.com" to `seenHosts` +2. Processes main route: sees "immaculaterr.hatfieldhosted.com" again β†’ ERROR -| Expected SQL | Actual Broken SQL | -|--------------|-------------------| -| \`WHERE \\\`uptime_hosts\\\`.\\\`id\\\` = "639fb2d8-..."\` | \`WHERE 639fb2d8-... AND \\\`uptime_hosts\\\`.\\\`id\\\` = "639fb2d8-..."\` | +The validator does NOT consider: +- Path matchers that make routes non-overlapping +- Route ordering/priority (emergency route is checked first) +- Caddy's native ability to handle this correctly -**The Problem**: The UUID string \`639fb2d8-f3a7-4c37-9a8e-22f2e73f8d87\` is being leaked as a raw SQL fragment. GORM interprets it as a SQL expression and injects it directly into the WHERE clause. +### Why Caddy Handles This Correctly + +Caddy processes routes in order: +1. First matches emergency route (host + path): `/api/v1/emergency/*` β†’ bypass security +2. Falls through to main route (host only): everything else β†’ apply security + +This is a **valid and intentional design pattern** - the validator is wrong to reject it. --- -## Technical Root Cause +## Solution: Simplified Validator Fix ⭐ CHOSEN APPROACH -### GORM Behavior with String IDs +**Approach**: Minimal fix to allow emergency+main route pattern specifically. -When calling \`.First(&model, id)\`: -1. If the model's ID field is \`uint\`, GORM correctly parameterizes: \`WHERE id = ?\` -2. If the model's ID field is \`string\`, GORM **leaks the ID as raw SQL**: \`WHERE AND id = ?\` +**Implementation**: +- Track hosts seen with path matchers vs without path matchers separately +- Allow duplicate host if ONE has paths and ONE doesn't (the emergency+main pattern) +- Reject if both routes have paths OR both have no paths -### Models Affected by String IDs +**Pros**: +- βœ… Minimal change - unblocks ALL 18 proxy hosts simultaneously +- βœ… Preserves current route structure +- βœ… Simple logic - easy to understand and maintain +- βœ… Fixes the systemic design pattern bug affecting entire reverse proxy -From codebase search, these models have string ID fields: +**Limitations** (Future Work): +- ⚠️ Does not detect complex path overlaps (e.g., `/api/*` vs `/api/v1/*`) +- ⚠️ Full path pattern analysis deferred to future enhancement +- ⚠️ Assumes emergency+main pattern is primary use case -| Model | ID Field | Location | -|-------|----------|----------| -| \`ManualChallenge\` | \`ID string\` | \`internal/models/manual_challenge.go:30\` | -| \`UptimeMonitor\` | \`ID string\` | (inferred from error logs) | -| \`UptimeHost\` | \`ID string\` | (inferred from error logs) | +**Changes Required**: +- `backend/internal/caddy/validator.go`: Simplified duplicate detection (two maps: withPaths/withoutPaths) +- Tests for emergency+main pattern, route ordering, rollback -**Note**: Even models with \`uint\` IDs are vulnerable if the parameter passed is a string! +**Deferred**: +- Database migration (DNS already case-insensitive) +- Complex path overlap detection (future enhancement) --- -## Vulnerability Inventory +## Phase 1: Root Cause Verification - SYSTEMIC SCOPE -### CRITICAL: Production Service Files (12 instances) +**Objective**: Confirm bug affects ALL enabled proxy hosts and document the systemic failure pattern. -1. **access_list_service.go:105** - \`s.db.First(&acl, id)\` -2. **remoteserver_service.go:68** - \`s.db.First(&server, id)\` -3. **notification_service.go:458** - \`s.DB.First(&t, "id = ?", id)\` -4. **uptime_service.go:353** - \`s.DB.First(&uptimeHost, "id = ?", hostID)\` -5. **uptime_service.go:845** - \`s.DB.First(&uptimeHost, "id = ?", hostID)\` -6. **uptime_service.go:999** - \`s.DB.First(&host, hostID)\` -7. **uptime_service.go:1101** - \`s.DB.First(&monitor, "id = ?", id)\` -8. **uptime_service.go:1115** - \`s.DB.First(&monitor, "id = ?", id)\` -9. **uptime_service.go:1143** - \`s.DB.First(&monitor, "id = ?", id)\` -10. **certificate_service.go:412** - \`s.db.First(&cert, id)\` -11. **dns_provider_service.go:118** - \`s.db.WithContext(ctx).First(&provider, id)\` -12. **proxyhost_service.go:108** - \`s.db.First(&host, id)\` +**Tasks**: -### HIGH PRIORITY: Test Files (8 instances causing failures) +1. **Verify Systemic Impact** ⭐ NEW: + - [ ] Query database for ALL enabled proxy hosts (should be 18) + - [ ] Verify Caddy has ZERO routes loaded (admin API check) + - [ ] Document sequential failure pattern (Host 24 disabled β†’ Host 22 fails next) + - [ ] Confirm EVERY enabled host triggers same validator error + - [ ] Test hypothesis: Disable all hosts except one β†’ still fails -#### uptime_service_race_test.go (5 instances) ⚠️ CAUSING TEST FAILURES +2. **Reproduce Error on Multiple Hosts**: + - [ ] Test Host ID 24 (immaculaterr.hatfieldhosted.com) - original failure + - [ ] Test Host ID 22 (dockhand.hatfieldhosted.com) - second failure after disabling 24 + - [ ] Test at least 3 additional hosts to confirm pattern + - [ ] Capture full error message from validator for each + - [ ] Document that error is identical across all hosts -1. **Line 100** πŸ”₯ - \`db.First(&host, host.ID)\` -2. **Line 106** πŸ”₯ - \`db.First(&host, host.ID)\` -3. **Line 152** - \`db.First(&host, host.ID)\` -4. **Line 255** - \`db.First(&updatedHost, "id = ?", host.ID)\` -5. **Line 398** - \`db.First(&updatedHost, "id = ?", host.ID)\` +3. **Analyze Generated Config for ALL Hosts**: + - [ ] Add debug logging to `GenerateConfig` before validation + - [ ] Log `uniqueDomains` list after deduplication for each host + - [ ] Log complete route structure before sending to validator + - [ ] Count how many routes contain each domain (should be 2: emergency + main) + - [ ] Verify emergency+main pattern exists for EVERY proxy host -#### Other Test Files - -6. **credential_service_test.go:426** - \`db.First(&updatedProvider, provider.ID)\` -7. **dns_provider_service_test.go:550** - \`db.First(&dbProvider, provider.ID)\` -8. **dns_provider_service_test.go:617** - \`db.First(&retrieved, provider.ID)\` -9. **security_headers_service_test.go:184** - \`db.First(&saved, profile.ID)\` - ---- - -## Fix Pattern: The Standard Safe Query - -### ❌ VULNERABLE Pattern -\`\`\`go -// NEVER DO THIS with string IDs or when ID type is uncertain: -db.First(&model, id) -db.First(&model, "id = ?", id) // Also wrong! Second param is value, not condition -\`\`\` - -### βœ… SAFE Pattern -\`\`\`go -// ALWAYS DO THIS: -db.Where("id = ?", id).First(&model) -\`\`\` - -### Why This Pattern is Safe - -1. **Explicit Parameterization**: \`id\` is treated as a parameter value, not SQL -2. **No Type Confusion**: Works for both \`uint\` and \`string\` IDs -3. **SQL Injection Safe**: GORM uses prepared statements with \`?\` placeholders -4. **Clear Intent**: Readable and obvious what the query does - ---- - -## Implementation Plan - -### Phase 1: Fix Production Services (CRITICAL) - -**Duration**: 20 minutes -**Priority**: πŸ”΄ CRITICAL - -**Files to Fix** (12 instances): -1. access_list_service.go (Line 105) -2. remoteserver_service.go (Line 68) -3. notification_service.go (Line 458) -4. uptime_service.go (Lines 353, 845, 999, 1101, 1115, 1143) -5. certificate_service.go (Line 412) -6. dns_provider_service.go (Line 118) -7. proxyhost_service.go (Line 108) - -### Phase 2: Fix Test Files - -**Duration**: 15 minutes -**Priority**: 🟑 HIGH - -**Files to Fix** (8+ instances): -1. uptime_service_race_test.go (Lines 100, 106, 152, 255, 398) -2. credential_service_test.go (Line 426) -3. dns_provider_service_test.go (Lines 550, 617) -4. security_headers_service_test.go (Line 184) - -### Phase 3: Fix DNS Provider Test Expectation - -**Duration**: 5 minutes -**Priority**: 🟒 MEDIUM - -**File**: dns_provider_service_test.go -**Lines**: 1382 (comment), 1385 (assertion) - -**Change**: -- Line 1382: Update comment to describe validation failure instead of decryption failure -- Line 1385: Change \`"DECRYPTION_ERROR"\` to \`"VALIDATION_ERROR"\` - -### Phase 4: Verification - -**Duration**: 10 minutes - -**Test Commands**: -\`\`\`bash -cd /projects/Charon/backend - -# Run race tests (primary failure source) -go test -v -race -run TestCheckHost ./internal/services/ - -# Run all uptime tests -go test -v -run Uptime ./internal/services/ - -# Run all service tests -go test ./internal/services/... - -# Verify no vulnerable patterns remain -grep -rn "\\.First(&.*," internal/services/ | grep -v "Where" -\`\`\` +4. **Trace Validation Flow**: + - [ ] Add debug logging to `validateRoute` function + - [ ] Log each host as it's added to `seenHosts` map + - [ ] Log route index and match conditions when duplicate detected + - [ ] Confirm emergency route (index 0) succeeds for all hosts + - [ ] Confirm main route (index 1) triggers duplicate error for all hosts **Success Criteria**: -- βœ… \`uptime_service_race_test.go\` tests pass -- βœ… No SQL syntax errors in logs -- βœ… All service tests pass -- βœ… DNS provider test expectation matches actual behavior -- βœ… Zero vulnerable \`.First(&model, id)\` patterns remain +- βœ… Confirmed: ALL 18 enabled proxy hosts trigger the same error +- βœ… Confirmed: Caddy has ZERO routes loaded (admin API returns empty) +- βœ… Confirmed: Sequential failure pattern documented (disable one β†’ next fails) +- βœ… Confirmed: Emergency+main route pattern exists for EVERY host +- βœ… Confirmed: Validator rejects at main route (index 1) for all hosts +- βœ… Confirmed: This is a design pattern bug, not a data issue + +**Files**: +- `backend/internal/caddy/config.go` - Add debug logging +- `backend/internal/caddy/validator.go` - Add debug logging +- `backend/internal/services/proxyhost_service.go` - Trigger config generation +- `docs/reports/duplicate_proxy_host_diagnosis.md` - Document systemic findings + +**Estimated Time**: 30 minutes (increased for systemic verification) --- -## Timeline Summary +## Phase 2: Fix Validator (Simplified Path Detection) -| Phase | Duration | Priority | Tasks | -|-------|----------|----------|-------| -| **Phase 1** | 20 min | πŸ”΄ CRITICAL | Fix 12 production service methods | -| **Phase 2** | 15 min | 🟑 HIGH | Fix 8+ test file instances | -| **Phase 3** | 5 min | 🟒 MEDIUM | Fix DNS provider test expectation | -| **Phase 4** | 10 min | 🟑 HIGH | Run full test suite verification | -| **TOTAL** | **50 min** | | Complete security fix | +**Objective**: MINIMAL fix to allow emergency+main route pattern (duplicate host where ONE has paths, ONE doesn't). + +**Implementation Strategy**: + +Simplify validator to handle the specific emergency+main pattern: +- Track hosts seen with paths vs without paths +- Allow duplicate hosts if ONE has path matchers, ONE doesn't +- This handles emergency route (has paths) + main route (no paths) + +**Algorithm**: + +```go +// Track hosts by whether they have path constraints +type hostTracking struct { + withPaths map[string]bool // hosts that have path matchers + withoutPaths map[string]bool // hosts without path matchers +} + +for each route: + for each match in route.Match: + for each host: + hasPaths := len(match.Path) > 0 + + if hasPaths: + // Check if we've seen this host WITHOUT paths + if tracking.withoutPaths[host]: + continue // ALLOWED: emergency (with) + main (without) + } + if tracking.withPaths[host]: + return error("duplicate host with paths") + } + tracking.withPaths[host] = true + } else { + // Check if we've seen this host WITH paths + if tracking.withPaths[host]: + continue // ALLOWED: emergency (with) + main (without) + } + if tracking.withoutPaths[host]: + return error("duplicate host without paths") + } + tracking.withoutPaths[host] = true + } +``` + +**Simplified Rules**: +1. Same host + both have paths = DUPLICATE ❌ +2. Same host + both have NO paths = DUPLICATE ❌ +3. Same host + one with paths, one without = ALLOWED βœ… (emergency+main pattern) + +**Future Work**: Full overlap detection for complex path patterns is deferred. + +**Tasks**: + +1. **Create Simple Tracking Structure**: + - [ ] Add `withPaths` and `withoutPaths` maps to validator + - [ ] Track hosts separately based on path presence + +2. **Update Validation Logic**: + - [ ] Check if match has path matchers (len(match.Path) > 0) + - [ ] For hosts with paths: allow if counterpart without paths exists + - [ ] For hosts without paths: allow if counterpart with paths exists + - [ ] Reject if both routes have same path configuration + +3. **Update Error Messages**: + - [ ] Clear error: "duplicate host with paths" or "duplicate host without paths" + - [ ] Document that this is minimal fix for emergency+main pattern + +**Success Criteria**: +- βœ… Emergency + main routes with same host pass validation (one has paths, one doesn't) +- βœ… True duplicates rejected (both with paths OR both without paths) +- βœ… Clear error messages when validation fails +- βœ… All existing tests continue to pass + +**Files**: +- `backend/internal/caddy/validator.go` - Simplified duplicate detection +- `backend/internal/caddy/validator_test.go` - Add test cases + +**Estimated Time**: 30 minutes (simplified approach) + +--- + +## Phase 3: Database Migration (DEFERRED) + +**Status**: ⏸️ DEFERRED - Not needed for this bug fix + +**Rationale**: +- DNS is already case-insensitive by RFC spec +- Caddy handles domains case-insensitively +- No database duplicates found in current data +- This bug is purely a code-level validation issue +- Database constraints can be added in future enhancement if needed + +**Future Consideration**: +If case-sensitive duplicates become an issue in production: +1. Add UNIQUE index on `LOWER(domain_names)` +2. Add `BeforeSave` hook to normalize domains +3. Update frontend validation + +**Estimated Time**: 0 minutes (deferred) + +--- + +## Phase 4: Testing & Verification + +**Objective**: Comprehensive testing to ensure fix works and no regressions. + +**Test Categories**: + +### Unit Tests + +1. **Validator Tests** (`validator_test.go`): + - [ ] Test: Single route with one host β†’ PASS + - [ ] Test: Two routes with different hosts β†’ PASS + - [ ] Test: Emergency + main route pattern (one with paths, one without) β†’ PASS βœ… NEW + - [ ] Test: Two routes with same host, both with paths β†’ FAIL + - [ ] Test: Two routes with same host, both without paths β†’ FAIL + - [ ] Test: Route ordering (emergency before main) β†’ PASS βœ… NEW + - [ ] Test: Multiple proxy hosts (5, 10, 18 hosts) β†’ PASS βœ… NEW + - [ ] Test: All hosts enabled simultaneously (real-world scenario) β†’ PASS βœ… NEW + +2. **Config Generation Tests** (`config_test.go`): + - [ ] Test: Single host generates emergency + main routes + - [ ] Test: Both routes have same domain list + - [ ] Test: Emergency route has path matchers + - [ ] Test: Main route has no path matchers + - [ ] Test: Route ordering preserved (emergency before main) + - [ ] Test: Deduplication map prevents domain appearing twice in `uniqueDomains` + +3. **Performance Tests** (NEW): + - [ ] Benchmark: Validation with 100 routes + - [ ] Benchmark: Validation with 1000 routes + - [ ] Verify: No more than 5% overhead vs old validator + - [ ] Profile: Memory usage with large configs + +### Integration Tests + - Multi-Host Scenario** ⭐ UPDATED: + - [ ] Create proxy_host with domain "ImmaculateRR.HatfieldHosted.com" + - [ ] Trigger config generation via `ApplyConfig` + - [ ] Verify validator passes + - [ ] Verify Caddy accepts config + - [ ] **Enable 5 hosts simultaneously** - verify all routes created + - [ ] **Enable 10 hosts simultaneously** - verify all routes created + - [ ] **Enable all 18 hosts** - verify complete config loads successfully + +2. **Emergency Bypass Test - Multiple Hosts**: + - [ ] Enable multiple proxy hosts with security features (WAF, rate limit) + - [ ] Verify emergency endpoint `/api/v1/emergency/security-reset` bypasses security on ALL hosts + - [ ] Verify main application routes have security checks on ALL hosts + - [ ] Confirm route ordering is correct for ALL hosts (emergency checked first) + +3. **Rollback Test - Systemic Impact**: + - [ ] Apply validator fix + - [ ] Enable ALL 18 proxy hosts successfully + - [ ] Verify Caddy loads all routes (admin API check) + - [ ] Rollback to old validator code + - [ ] Verify sequential failures (Host 24 β†’ Host 22 β†’ ...) + - [ ] Re-apply fix and confirm all 18 hosts work + +4. **Caddy AdmiALL Proxy Hosts** ⭐ UPDATED: + - [ ] Update database: `UPDATE proxy_hosts SET enabled = 1` (enable ALL hosts) + - [ ] Restart backend or trigger config reload + - [ ] Verify no "duplicate host matcher" errors for ANY host + - [ ] Verify Caddy logs show successful config load with all routes + - [ ] Query Caddy admin API: confirm 36+ routes loaded + - [ ] Test at least 5 different domains in browser + +2. **Cross-Browser Test - Multiple Hosts**: + - [ ] Test at least 3 different proxy host domains from multiple browsers + - [ ] Verify HTTPS redirects work correctly on all tested hosts + - [ ] Confirm no certificate warnings on any host + - [ ] Test emergency endpoint accessibility on all hosts + +3. **Load Test - All Hosts Enabled** ⭐ NEW: + - [ ] Enable all 18 proxy hosts + - [ ] Verify backend startup time is acceptable (<30s) + - [ ] Verify Caddy config reload time is acceptable (<5s) + - [ ] Monitor memory usage with full config loaded + - [ ] Verify no performance degradation vs single host + +**Success Criteria**: +- βœ… All unit tests pass (including multi-host scenarios) +- βœ… All integration tests pass (including 5, 10, 18 host scenarios) +- βœ… ALL 18 proxy hosts can be enabled simultaneously without errors +- βœ… Caddy admin API shows 36+ routes loaded (2 per host minimum) +- βœ… Emergency routes bypass security correctly on ALL hosts +- βœ… Route ordering verified for ALL hosts (emergency before main) +- βœ… Rollback test proves fix was necessary (sequential failures return) + - [ ] Test emergency endpoint accessibility + +**Success Criteria**: +- βœ… All unit tests p60 minutes (increased for multi-host testing) +- βœ… All integration tests pass +- βœ… Host ID 24 can be enabled without errors +- βœ… Emergency routes bypass security correctly +- βœ… Route ordering verified (emergency before main) +- βœ… Rollback test proves fix was necessary +- βœ… Performance benchmarks show <5% overhead +- βœ… No regressions in existing functionality + +**Estimated Time**: 45 minutes + +--- + +## Phase 5: Documentation & Deployment + +**Objective**: Document the fix, update runbooks, and prepare for deployment. + +**Tasks**: + +1. **Code Documentation**: + - [ ] Add comprehensive comments to validator route signature logic + - [ ] Document why duplicate hosts with different paths are allowed + - [ ] Add examples of valid and invalid route patterns + - [ ] Document edge cases and how they're handled + +2. **API Documentation**: + - [ ] Update `/docs/api.md` with validator behavior + - [ ] Document emergency+main route pattern + - [ ] Explain why duplicate hosts are allowed in this case + - [ ] Add note that DNS is case-insensitive by nature + +3. **Runbook Updates**: + - [ ] Create "Duplicate Host Matcher Error" troubleshooting section + - [ ] Document root cause and fix + - [ ] Add steps to diagnose similar issues + - [ ] Add validation bypass procedure (if needed for emergency) + +4. **Troubleshooting Guide**: + - [ ] Document "duplicate host matcher" error + - [ ] Explain emergency+main route pattern + - [ ] Provide steps to verify route ordering + - [ ] Add validation test procedure + +5. **Changelog**: + - [ ] Add entry to `CHANGELOG.md` under "Fixed" section: + ```markdown + ### Fixed + - **CRITICAL**: Fixed systemic "duplicate host matcher" error affecting ALL 18 enabled proxy hosts + - Simplified Caddy config validator to allow emergency+main route pattern (one with paths, one without) + - Restored full reverse proxy functionality - Caddy now correctly loads routes for all enabled hosts + - Emergency bypass routes now function correctly for all proxy hosts + ``` + +6. **Create Diagnostic Tool** (Optional Enhancement): + - [ ] Add admin API endpoint: `GET /api/v1/debug/caddy-routes` + - [ ] Returns current route structure with host/path matchers + - [ ] Highlights potential conflicts before validation + - [ ] Useful for troubleshooting future issues + +**Success Criteria**: +- βœ… Code is well-documented with clear explanations +- βœ… API docs reflect new behavior +- βœ… Runbook provides clear troubleshooting steps +- βœ… Migration is documented and tested +- βœ… Changelog is updated + +**Files**: +- `backend/internal/caddy/validator.go` - Inline comments +- `backend/internal/caddy/config.go` - Route generation comments +- `docs/api.md` - API documentation +- `docs/troubleshooting/duplicate-host-matcher.md` - NEW runbook +- `CHANGELOG.md` - Version entry + +**Estimated Time**: 30 minutes + +---Phase 6: Performance Investigation (DEFERRED - Optional) + +**Status**: ⏸️ DEFERRED - Secondary issue, not blocking proxy functionality +ALL 18 enabled proxy hosts can be enabled simultaneously without errors +- βœ… Caddy loads all routes successfully (36+ routes via admin API) +- βœ… Emergency routes bypass security features as designed on ALL hosts +- βœ… Main routes apply security features correctly on ALL hosts +- βœ… No false positives from validator for valid configs +- βœ… True duplicate routes still rejected appropriately +- βœ… Full reverse proxy functionality restored +- Slow queries on `security_configs` table +- May impact monitoring responsiveness but does not block proxy functionality + +**Tasks**: + +1. **Query Profiling**: + - [ ] Enable query logging in production + - [ ] Identify slowest queries with EXPLAIN ANALYZE + - [ ] Profile table sizes and row counts + - [ ] Check existing indexes + +2. **Index Analysis**: + - [ ] Analyze missing indexes on `uptime_heartbeats` + - [ ] Analyze missing indexes on `security_configs` + - [ ] Propose index additions if needed + - [ ] Test index performance impact + +3. **Optimization**: + - [ ] Add indexes if justified by query patterns + - [ ] Consider query optimization (LIMIT, pagination) + - [ ] Monitor performance after changes + - [ ] Document index strategy + +**Priority**: LOW - Does not block proxy functionality +**Estimated Time**: Deferred until Phase 2 is complete + +--- + +## Phase 7: Container Health Check In- SYSTEMIC SCOPE (30 min) +- [ ] Verify ALL 18 enabled hosts trigger validator error +- [ ] Test sequential failure pattern (disable one β†’ next fails) +- [ ] Confirm Caddy has ZERO routes loaded (admin API check) +- [ ] Verify emergency+main route pattern exists for EVERY host +- [ ] Add debug logging to config generation and validator +- [ ] Document systemic findings in diagnosis report + +### Phase 2: Fix Validator - SIMPLIFIED (30 min) +- [ ] Create simple tracking structure (withPaths/withoutPaths maps) +- [ ] Update validation logic to allow one-with-paths + one-without-paths +- [ ] Update error messages +- [ ] Write unit tests for emergency+main pattern +- [ ] Add multi-host test scenarios (5, 10, 18 hosts) +- [ ] Verify route ordering preserved + +### Phase 3: Database Migration (0 min) +- [x] DEFERRED - Not needed for this bug fix + +### Phase 4: Testing - MULTI-HOST SCENARIOS (60 min) +- [ ] Write/update validator unit tests (emergency+main pattern) +- [ ] Add multi-host test scenarios (5, 10, 18 hosts) +- [ ] Write/update config generation tests (route ordering, all hosts) +- [ ] Add performance benchmarks (validate handling 18+ hosts) +- [ ] Run integration tests with all hosts enabled +- [ ] Perform rollback test (verify sequential failures return) +- [ ] Re-enable ALL 18 hosts and verify Caddy loads all routes +- [ ] Verify Caddy admin API shows 36+ routes + +### Phase 5: Documentation (30 min) +- [ ] Add code comments explaining simplified approach +- [ ] Update API documentation +- [ ] Create troubleshooting guide emphasizing systemic nature +- [ ] Update changelog with CRITICAL scope +- [ ] Document that full overlap detection is future work +- [ ] Document multi-host verification steps + +### Phase 6: Performance Investigation (DEFERRED) +- [ ] DEFERRED - Slow SQL queries (uptime_heartbeats, security_configs) +- [ ] Track as separate issue if proxy functionality is restored + +### Phase 7: Health Check Investigation (DEFERRED) +- [ ] DEFERRED - Container health check fails despite 200 OK +- [ ] Track as separate issue if proxy functionality is restored + +**Total Estimated Time**: 2 hours 30 minutes (updated for systemic scope + +--- + +## + +## Success Metrics + +### Functionality +- βœ… Host ID 24 (immaculaterr.hatfieldhosted.com) can be enabled without errors +- βœ… Emergency routes bypass security features as designed +- βœ… Main routes apply security features correctly +- βœ… No false positives from validator for valid configs +- βœ… True duplicate routes still rejected appropriately + +### Performance +- βœ… Validation performance not significantly impacted (< 5% overhead) +- βœ… Config generation time unchanged +- βœ… Database query performance not affected by new index + +### Quality +- βœ… Zero regressions in existing tests +- βœ… New test coverage for path-aware validation +- βœ… Clear error messages for validation failures +- βœ… Code is maintainable and well-documented --- ## Risk Assessment -### Security Impact - -| Risk | Before Fix | After Fix | -|------|-----------|-----------| -| **SQL Injection** | πŸ”΄ HIGH | βœ… None | -| **Test Failures** | πŸ”΄ Blocking | βœ… All pass | -| **Production Bugs** | πŸ”΄ Likely | βœ… None | -| **Data Corruption** | 🟑 Possible | βœ… None | - -### Deployment Risk - -**Risk Level**: LOW - -**Justification**: -- Changes are surgical and well-defined -- Pattern is mechanical (\`.First(&model, id)\` β†’ \`.Where("id = ?", id).First(&model)\`) -- No business logic changes -- Comprehensive test coverage validates correctness +| Risk | Impact | Mitigation | +|------|--------|------------| +| **Validator Too Permissive** | High | Comprehensive test suite with negative test cases | +| **Route Ordering Issues** | Medium | Integration tests verify emergency routes checked first | +| **Migration Failure** | Low | Reversible migration + pre-flight data validation | +| **Case Normalization Breaks Existing Domains** | Low | Normalization is idempotent (lowercase β†’ lowercase) | +| **Performance Degradation** | Low | Profile validator changes, ensure <5% overhead | --- -## Code Change Summary +## Implementation Checklist -### Production Services: 12 Changes -- access_list_service.go: 1 fix -- remoteserver_service.go: 1 fix -- notification_service.go: 1 fix -- uptime_service.go: 6 fixes -- certificate_service.go: 1 fix -- dns_provider_service.go: 1 fix -- proxyhost_service.go: 1 fix +### Phase 1: Root Cause Verification (20 min) +- [ ] Reproduce error on demand +- [ ] Add debug logging to config generation +- [ ] Add debug logging to validator +- [ ] Confirm emergency + main route pattern +- [ ] Document findings -### Test Files: 8+ Changes -- uptime_service_race_test.go: 5 fixes -- credential_service_test.go: 1 fix -- dns_provider_service_test.go: 2 fixes (1 GORM, 1 expectation) -- security_headers_service_test.go: 1 fix +### Phase 2: Fix Validator - SIMPLIFIED (30 min) +- [ ] Create simple tracking structure (withPaths/withoutPaths maps) +- [ ] Update validation logic to allow one-with-paths + one-without-paths +- [ ] Update error messages +- [ ] Write unit tests for emergency+main pattern +- [ ] Verify route ordering preserved -### Total Lines Changed: ~25 lines -### Total Files Modified: 9 files +### Phase 3: Database Migration (0 min) +- [x] DEFERRED - Not needed for this bug fix + +### Phase 4: Testing (45 min) +- [ ] Write/update validator unit tests (emergency+main pattern) +- [ ] Write/update config generation tests (route ordering) +- [ ] Add performance benchmarks +- [ ] Run integration tests +- [ ] Perform rollback test +- [ ] Re-enable Host ID 24 verification + +### Phase 5: Documentation (30 min) +- [ ] Add code comments explaining simplified approach +- [ ] Update API documentation +- [ ] Create troubleshooting guide +- [ ] Update changelog +- [ ] Document that full overlap detection is future work + +**T**Re-enable ALL proxy hosts** (not just Host ID 24) +4. Verify Caddy loads all routes successfully (admin API check) +5. Verify emergency routes work correctly on all hosts + +### Post-Deployment +1. Verify ALL 18 proxy hosts are accessible +2. Verify Caddy admin API shows 36+ routes loaded +3. Test emergency endpoint bypasses security on multiple hosts +4. Monitor for "duplicate host matcher" errors (should be zero) +5. Verify full reverse proxy functionality restored +6. Monitor performance with all hosts enabled + +### Rollback Plan +If issues arise: +1. Rollback backend to previous version +2. Document which hosts fail (expect sequential pattern) +3. Review validator logs to identify cause +4. Disable problematic hosts temporarily if needed +5. Re-apply fix after investigation +3. Re-enable Host ID 24 if still disabled +4. Verify emergency routes work correctly + +### Post-Deployment +1. Verify Host ID 24 is accessible +2. Test emergency endpoint bypasses security +3. Monitor for "duplicate host matcher" errors +4. Check database constraint is enforcing uniqueness + +### Rollback Plan +If issues arise: +1. Rollback backend to previous version +2. Re-disable Host ID 24 if necessary +3. Review validator logs to identify cause +4. Investigate unexpected route patterns --- -## Success Criteria +## Future Enhancements -- [x] Root cause identified for SQL injection vulnerability -- [x] All vulnerable instances catalogued (20+ total) -- [x] Comprehensive fix plan documented -- [ ] All production service methods fixed (12 instances) -- [ ] All test files fixed (8+ instances) -- [ ] Test expectation corrected (DNS provider test) -- [ ] Race tests pass without SQL errors -- [ ] Full test suite passes +1. **Full Path Overlap Detection**: + - Current fix handles emergency+main pattern only (one-with-paths + one-without-paths) + - Future: Detect complex overlaps (e.g., `/api/*` vs `/api/v1/*`) + - Future: Validate path pattern specificity + - Future: Warn on ambiguous route priority + +2. **Visual Route Debugger**: + - Admin UI component showing route tree + - Highlights potential conflicts +## Known Secondary Issues (Tracked Separately) + +These issues were discovered during diagnosis but are NOT blocking proxy functionality: + +1. **Slow SQL Queries (Phase 6 - DEFERRED)**: + - `uptime_heartbeats` table queries >200ms + - `security_configs` table queries >200ms + - Impacts monitoring responsiveness, not proxy functionality + - **Action**: Track as separate performance issue after Phase 2 complete + +2. **Container Health Check Failure (Phase 7 - DEFERRED)**: + - Backend health endpoint returns 200 OK consistently + - Docker container marked as unhealthy + - May be timeout issue (3s too short?) + - Does not affect proxy functionality (backend is running) + - **Action**: Track as separate Docker configuration issue after Phase 2 complete --- -## Related Documentation +**Plan Status**: βœ… READY FOR IMPLEMENTATION (EXPANDED SCOPE) +**Next Action**: Begin Phase 1 - Root Cause Verification - SYSTEMIC SCOPE +**Assigned To**: Implementation Agent +**Priority**: CRITICAL πŸ”΄πŸ”΄πŸ”΄ - ALL 18 PROXY HOSTS DOWN, ZERO CADDY ROUTES LOADED +**Scope**: Systemic bug affecting entire reverse proxy functionality (not single-host issue) + - Warn (don't error) on suspicious patterns + - Suggest route optimizations + - Show effective route priority + - Highlight overlapping matchers -- **GORM Documentation**: https://gorm.io/docs/query.html -- **SQL Injection Prevention**: OWASP Top 10 (A03:2021 – Injection) -- **Parameterized Queries**: https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html +4. **Database Domain Normalization** (if needed): + - Add case-insensitive uniqueness constraint + - BeforeSave hook for normalization + - Frontend validation hints + - Only if case duplicates become production issue --- -## Notes - -### Why This Bug is Insidious - -1. **Silent Failure**: Works fine with \`uint\` IDs, fails with \`string\` IDs -2. **Type-Dependent**: Behavior changes based on model ID field type -3. **Misleading API**: \`.First(&model, id)\` looks safe but isn't -4. **Widespread**: Affects 20+ instances across codebase -5. **Hard to Debug**: SQL errors appear cryptic and non-obvious - -### Best Practices Going Forward - -βœ… **DO**: -- Always use \`.Where("id = ?", id).First(&model)\` -- Use parameterized queries for all dynamic values -- Test with both \`uint\` and \`string\` ID models - -❌ **DON'T**: -- Never use \`.First(&model, id)\` with string IDs -- Never trust GORM to handle ID types correctly -- Never pass SQL fragments as \`.First()\` parameters - ---- - -**Plan Status**: βœ… COMPLETE AND READY FOR IMPLEMENTATION - -**Next Action**: Begin Phase 1 - Fix Production Services +**Plan Status**: βœ… READY FOR IMPLEMENTATION +**Next Action**: Begin Phase 1 - Root Cause Verification +**Assigned To**: Implementation Agent +**Priority**: HIGH - Blocking Host ID 24 from being enabled diff --git a/docs/reports/duplicate_proxy_host_diagnosis.md b/docs/reports/duplicate_proxy_host_diagnosis.md new file mode 100644 index 00000000..da1f2107 --- /dev/null +++ b/docs/reports/duplicate_proxy_host_diagnosis.md @@ -0,0 +1,453 @@ +# Duplicate Proxy Host Diagnosis Report + +**Date:** 2026-01-28 +**Issue:** Charon container unhealthy, all proxy hosts down +**Error:** `validation failed: invalid route 1 in server charon_server: duplicate host matcher: immaculaterr.hatfieldhosted.com` + +--- + +## Executive Summary + +**Finding:** The database contains NO duplicate entries. There is only **one** proxy_host record for domain `Immaculaterr.hatfieldhosted.com` (ID 24). The duplicate host matcher error from Caddy indicates a **code-level bug** in the configuration generation logic, NOT a database integrity issue. + +**Impact:** +- Caddy failed to load configuration at startup +- All proxy hosts are unreachable +- Container health check failing +- Frontend still accessible (direct backend connection) + +**Root Cause:** Unknown bug in Caddy config generation that produces duplicate host matchers for the same domain, despite deduplication logic being present in the code. + +--- + +## Investigation Details + +### 1. Database Analysis + +#### Active Database Location +- **Host path:** `/projects/Charon/data/charon.db` (empty/corrupted - 0 bytes) +- **Container path:** `/app/data/charon.db` (active - 177MB) +- **Backup:** `/projects/Charon/data/charon.db.backup-20260128-065828` (empty - contains schema but no data) + +#### Database Integrity Check + +**Total Proxy Hosts:** 19 +**Query Results:** +```sql +-- Check for the problematic domain +SELECT id, uuid, name, domain_names, enabled, created_at, updated_at +FROM proxy_hosts +WHERE domain_names LIKE '%immaculaterr%'; +``` + +**Result:** Only **ONE** entry found: +``` +ID: 24 +UUID: 4f392485-405b-4a35-b022-e3d16c30bbde +Name: Immaculaterr +Domain: Immaculaterr.hatfieldhosted.com (note: capital 'I') +Forward Host: Immaculaterr +Forward Port: 5454 +Enabled: true +Created: 2026-01-16 20:42:59 +Updated: 2026-01-16 20:42:59 +``` + +#### Duplicate Detection Queries + +**Test 1: Case-insensitive duplicate check** +```sql +SELECT COUNT(*), LOWER(domain_names) +FROM proxy_hosts +GROUP BY LOWER(domain_names) +HAVING COUNT(*) > 1; +``` +**Result:** 0 duplicates found + +**Test 2: Comma-separated domains check** +```sql +SELECT id, name, domain_names +FROM proxy_hosts +WHERE domain_names LIKE '%,%'; +``` +**Result:** No multi-domain entries found + +**Test 3: Locations check (could cause route duplication)** +```sql +SELECT ph.id, ph.name, ph.domain_names, COUNT(l.id) as location_count +FROM proxy_hosts ph +LEFT JOIN locations l ON l.proxy_host_id = ph.id +WHERE ph.enabled = 1 +GROUP BY ph.id; +``` +**Result:** All proxy_hosts have 0 locations, including ID 24 + +**Test 4: Advanced config check** +```sql +SELECT id, name, domain_names, advanced_config +FROM proxy_hosts +WHERE id = 24; +``` +**Result:** No advanced_config set (NULL) + +**Test 5: Soft deletes check** +```sql +.schema proxy_hosts | grep -i deleted +``` +**Result:** No soft delete columns exist + +**Conclusion:** Database is clean. Only ONE entry for this domain exists. + +--- + +### 2. Error Analysis + +#### Error Message from Docker Logs +``` +{"error":"validation failed: invalid route 1 in server charon_server: duplicate host matcher: immaculaterr.hatfieldhosted.com","level":"error","msg":"Failed to apply initial Caddy config","time":"2026-01-28T13:18:53-05:00"} +``` + +#### Key Observations: +1. **"invalid route 1"** - This is the SECOND route (0-indexed), suggesting the first route (index 0) is valid +2. **Lowercase domain** - Caddy error shows `immaculaterr` (lowercase) but database has `Immaculaterr` (capital I) +3. **Timing** - Error occurs at initial startup when `ApplyConfig()` is called +4. **Validation stage** - Error happens in Caddy's validation, not in Charon's generation + +#### Code Review Findings + +**File:** `/projects/Charon/backend/internal/caddy/config.go` +**Function:** `GenerateConfig()` (line 19) + +**Deduplication Logic Present:** +- Line 437: `processedDomains := make(map[string]bool)` - Track processed domains +- Line 469-488: Domain normalization and duplicate detection + ```go + d = strings.TrimSpace(d) + d = strings.ToLower(d) // Normalize to lowercase + if processedDomains[d] { + logger.Log().WithField("domain", d).Warn("Skipping duplicate domain") + continue + } + processedDomains[d] = true + ``` +- Line 461: Reverse iteration to prefer newer hosts + ```go + for i := len(hosts) - 1; i >= 0; i-- + ``` + +**Expected Behavior:** The deduplication logic SHOULD prevent this error. + +**Hypothesis:** One of the following is occurring: +1. **Bug in deduplication logic:** The domain is bypassing the duplicate check +2. **Multiple code paths:** Domain is added through a different path (e.g., frontend route, locations, advanced config) +3. **Database query issue:** GORM joins/preloads causing duplicate records in the Go slice +4. **Race condition:** Config is being generated/applied multiple times simultaneously (unlikely at startup) + +--- + +### 3. All Proxy Hosts in Database + +``` +ID Name Domain +2 FileFlows fileflows.hatfieldhosted.com +4 Profilarr profilarr.hatfieldhosted.com +5 HomePage homepage.hatfieldhosted.com +6 Prowlarr prowlarr.hatfieldhosted.com +7 Tautulli tautulli.hatfieldhosted.com +8 TubeSync tubesync.hatfieldhosted.com +9 Bazarr bazarr.hatfieldhosted.com +11 Mealie mealie.hatfieldhosted.com +12 NZBGet nzbget.hatfieldhosted.com +13 Radarr radarr.hatfieldhosted.com +14 Sonarr sonarr.hatfieldhosted.com +15 Seerr seerr.hatfieldhosted.com +16 Plex plex.hatfieldhosted.com +17 Charon charon.hatfieldhosted.com +18 Wizarr wizarr.hatfieldhosted.com +20 PruneMate prunemate.hatfieldhosted.com +21 GiftManager giftmanager.hatfieldhosted.com +22 Dockhand dockhand.hatfieldhosted.com +24 Immaculaterr Immaculaterr.hatfieldhosted.com ← PROBLEMATIC +``` + +**Note:** ID 24 is the newest proxy_host (most recent updated_at timestamp). + +--- + +### 4. Caddy Configuration State + +**Current Status:** NO configuration loaded (Caddy is running with minimal admin-only config) + +**Query:** `curl localhost:2019/config/` returns empty/default config + +**Last Successful Config:** +- Timestamp: 2026-01-27 19:15:38 +- Config Hash: `a87bd130369d62ab29a1fcf377d855a5b058223c33818eacff6f7312c2c4d6a0` +- Status: Success (before ID 24 was added) + +**Recent Config History (from caddy_configs table):** +``` +ID Hash Applied At Success +299 a87bd130...c2c4d6a0 2026-01-27 19:15:38 true +298 a87bd130...c2c4d6a0 2026-01-27 15:40:56 true +297 a87bd130...c2c4d6a0 2026-01-27 03:34:46 true +296 dbf4c820...d963b234 2026-01-27 02:01:45 true +295 dbf4c820...d963b234 2026-01-27 02:01:45 true +``` + +All recent configs were successful. The failure happened on 2026-01-28 13:18:53 (not recorded in table due to early validation failure). + +--- + +### 5. Database File Status + +**Critical Issue:** The host's `/projects/Charon/data/charon.db` file is **empty** (0 bytes). + +**Timeline:** +- Original file was likely corrupted or truncated +- Container is using an in-memory or separate database file +- Volume mount may be broken or asynchronous + +**Evidence:** +```bash +-rw-r--r-- 1 root root 0 Jan 28 18:24 /projects/Charon/data/charon.db +-rw-r--r-- 1 root root 177M Jan 28 18:26 /projects/Charon/data/charon.db.investigation +``` + +The actual database was copied from the container. + +--- + +## Recommended Remediation Plan + +### Immediate Short-Term Fix (Workaround) + +**Option 1: Disable Problematic Proxy Host** +```sql +-- Run inside container +docker exec charon sqlite3 /app/data/charon.db \ + "UPDATE proxy_hosts SET enabled = 0 WHERE id = 24;" + +-- Restart container to apply +docker restart charon +``` + +**Option 2: Delete Duplicate Entry (if acceptable data loss)** +```sql +docker exec charon sqlite3 /app/data/charon.db \ + "DELETE FROM proxy_hosts WHERE id = 24;" +docker restart charon +``` + +**Option 3: Change Domain to Bypass Duplicate Detection** +```sql +-- Temporarily rename the domain to isolate the issue +docker exec charon sqlite3 /app/data/charon.db \ + "UPDATE proxy_hosts SET domain_names = 'immaculaterr-temp.hatfieldhosted.com' WHERE id = 24;" +docker restart charon +``` + +### Medium-Term Fix (Debug & Patch) + +**Step 1: Enable Debug Logging** +```bash +# Set debug logging in container +docker exec charon sh -c "export CHARON_DEBUG=1; kill -HUP \$(pidof charon)" +``` + +**Step 2: Generate Config Manually** +Create a debug script to generate and inspect the Caddy config: +```go +// In backend/cmd/debug/main.go +package main + +import ( + "encoding/json" + "fmt" + "log" + + "github.com/Wikid82/charon/backend/internal/caddy" + "github.com/Wikid82/charon/backend/internal/database" + "github.com/Wikid82/charon/backend/internal/models" +) + +func main() { + db, _ := database.Connect("data/charon.db") + var hosts []models.ProxyHost + db.Preload("Locations").Preload("DNSProvider").Find(&hosts) + + config, err := caddy.GenerateConfig(hosts, "data/caddy/data", "", "frontend/dist", "", false, false, false, false, false, "", nil, nil, nil, nil, nil) + if err != nil { + log.Fatal(err) + } + + json, _ := json.MarshalIndent(config, "", " ") + fmt.Println(string(json)) +} +``` + +Run and inspect: +```bash +go run backend/cmd/debug/main.go > /tmp/caddy-config-debug.json +jq '.apps.http.servers.charon_server.routes[] | select(.match[0].host[] | contains("immaculaterr"))' /tmp/caddy-config-debug.json +``` + +**Step 3: Add Unit Test** +```go +// In backend/internal/caddy/config_test.go +func TestGenerateConfig_PreventCaseSensitiveDuplicates(t *testing.T) { + hosts := []models.ProxyHost{ + {UUID: "uuid-1", DomainNames: "Example.com", Enabled: true, ForwardHost: "app1", ForwardPort: 8080}, {UUID: "uuid-2", DomainNames: "example.com", Enabled: true, ForwardHost: "app2", ForwardPort: 8081}, + } + + config, err := GenerateConfig(hosts, "/tmp/data", "", "", "", false, false, false, false, false, "", nil, nil, nil, nil, nil) + require.NoError(t, err) + + // Should only have ONE route for this domain (not two) + server := config.Apps.HTTP.Servers["charon_server"] + routes := server.Routes + + domainCount := 0 + for _, route := range routes { + for _, match := range route.Match { + for _, host := range match.Host { + if strings.ToLower(host) == "example.com" { + domainCount++ + } + } + } + } + + assert.Equal(t, 1, domainCount, "Should only have one route for case-insensitive duplicate domain") +} +``` + +### Long-Term Fix (Root Cause Prevention) + +**1. Add Database Constraint** +```sql +-- Create unique index on normalized domain names +CREATE UNIQUE INDEX idx_proxy_hosts_domain_names_lower +ON proxy_hosts(LOWER(domain_names)); +``` + +**2. Add Pre-Save Validation Hook** +```go +// In backend/internal/models/proxy_host.go +func (p *ProxyHost) BeforeSave(tx *gorm.DB) error { + // Normalize domain names to lowercase + p.DomainNames = strings.ToLower(p.DomainNames) + + // Check for existing domain (case-insensitive) + var existing ProxyHost + if err := tx.Where("id != ? AND LOWER(domain_names) = ?", + p.ID, strings.ToLower(p.DomainNames)).First(&existing).Error; err == nil { + return fmt.Errorf("domain %s already exists (ID: %d)", p.DomainNames, existing.ID) + } + + return nil +} +``` + +**3. Add Duplicate Detection to Frontend** +```typescript +// In frontend/src/components/ProxyHostForm.tsx +const checkDomainUnique = async (domain: string) => { + const response = await api.get(`/api/v1/proxy-hosts?domain=${encodeURIComponent(domain.toLowerCase())}`); + if (response.data.length > 0) { + setError(`Domain ${domain} is already in use by "${response.data[0].name}"`); + return false; + } + return true; +}; +``` + +**4. Add Monitoring/Alerting** +- Add Prometheus metric for config generation failures +- Set up alert for repeated validation failures +- Log full generated config to file for debugging + +--- + +## Next Steps + +### Immediate Action Required (Choose ONE): + +**Recommended:** Option 1 (Disable) +- **Pros:** Non-destructive, can re-enable later, allows investigation +- **Cons:** Service unavailable until bug is fixed +- **Command:** + ```bash + docker exec charon sqlite3 /app/data/charon.db \ + "UPDATE proxy_hosts SET enabled = 0 WHERE id = 24;" + docker restart charon + ``` + +### Follow-Up Investigation: + +1. **Check for code-level bug:** Add debug logging to `GenerateConfig()` to print: + - Total hosts processed + - Each domain being added to processedDomains map + - Final route count vs expected count + +2. **Verify GORM query behavior:** Check if `.Preload()` is causing duplicate records in the slice + +3. **Test with minimal reproduction:** Create a fresh database with only ID 24, see if error persists + +4. **Review recent commits:** Check if any recent changes to config.go introduced the bug + +--- + +## Files Involved + +- **Database:** `/app/data/charon.db` (inside container) +- **Backup:** `/projects/Charon/data/charon.db.backup-20260128-065828` +- **Investigation Copy:** `/projects/Charon/data/charon.db.investigation` +- **Code:** `/projects/Charon/backend/internal/caddy/config.go` (GenerateConfig function) +- **Manager:** `/projects/Charon/backend/internal/caddy/manager.go` (ApplyConfig function) + +--- + +## Appendix: SQL Queries Used + +```sql +-- Find all proxy hosts with specific domain +SELECT id, uuid, name, domain_names, forward_host, forward_port, enabled, created_at, updated_at +FROM proxy_hosts +WHERE domain_names LIKE '%immaculaterr.hatfieldhosted.com%' +ORDER BY created_at; + +-- Count total hosts +SELECT COUNT(*) as total FROM proxy_hosts; + +-- Check for duplicate domains (case-insensitive) +SELECT COUNT(*), domain_names +FROM proxy_hosts +GROUP BY LOWER(domain_names) +HAVING COUNT(*) > 1; + +-- Check proxy hosts with locations +SELECT ph.id, ph.name, ph.domain_names, COUNT(l.id) as location_count +FROM proxy_hosts ph +LEFT JOIN locations l ON l.proxy_host_id = ph.id +WHERE ph.enabled = 1 +GROUP BY ph.id +ORDER BY ph.id; + +-- Check recent Caddy config applications +SELECT * FROM caddy_configs +ORDER BY applied_at DESC +LIMIT 5; + +-- Get all enabled proxy hosts +SELECT id, name, domain_names, enabled +FROM proxy_hosts +WHERE enabled = 1 +ORDER BY id; +``` + +--- + +**Report Generated By:** GitHub Copilot +**Investigation Date:** 2026-01-28 +**Status:** Investigation Complete - Awaiting Remediation Decision