hotfix: simplify Caddy validator to allow emergency+main route pattern for duplicate hosts

This commit is contained in:
GitHub Actions
2026-01-28 19:48:49 +00:00
parent 5bcf889f84
commit 98ab664b37
5 changed files with 1419 additions and 240 deletions

View File

@@ -701,6 +701,13 @@ func GenerateConfig(hosts []models.ProxyHost, storageDir, acmeEmail, frontendDir
Handle: emergencyHandlers,
Terminal: true,
}
logger.Log().WithFields(map[string]any{
"host_id": host.ID,
"host_uuid": host.UUID,
"unique_domains": uniqueDomains,
"has_paths": true,
"path_count": len(emergencyPaths),
}).Debug("[CONFIG DEBUG] Creating EMERGENCY route")
routes = append(routes, emergencyRoute)
mainHandlers := append(append([]Handler{}, securityHandlers...), handlers...)
@@ -714,6 +721,14 @@ func GenerateConfig(hosts []models.ProxyHost, storageDir, acmeEmail, frontendDir
Terminal: true,
}
logger.Log().WithFields(map[string]any{
"host_id": host.ID,
"host_uuid": host.UUID,
"unique_domains": uniqueDomains,
"has_paths": false,
"route_type": "main",
}).Debug("[CONFIG DEBUG] Creating MAIN route (no path matchers)")
routes = append(routes, route)
}

View File

@@ -6,6 +6,8 @@ import (
"net"
"strconv"
"strings"
"github.com/Wikid82/charon/backend/internal/logger"
)
// Validate performs pre-flight validation on a Caddy config before applying it.
@@ -18,8 +20,9 @@ func Validate(cfg *Config) error {
return nil // Empty config is valid
}
// Track seen hosts to detect duplicates
seenHosts := make(map[string]bool)
// Track seen hosts with their path configuration
// Value: "with_paths" or "without_paths"
seenHosts := make(map[string]string)
for serverName, server := range cfg.Apps.HTTP.Servers {
if len(server.Listen) == 0 {
@@ -81,18 +84,48 @@ func validateListenAddr(addr string) error {
return nil
}
func validateRoute(route *Route, seenHosts map[string]bool) error {
func validateRoute(route *Route, seenHosts map[string]string) error {
if len(route.Handle) == 0 {
return fmt.Errorf("route has no handlers")
}
// Check for duplicate host matchers
// Check for duplicate host matchers with incompatible path configurations
// Allow emergency+main pattern: one route with paths, one without
for _, match := range route.Match {
hasPaths := len(match.Path) > 0
pathConfig := "without_paths"
if hasPaths {
pathConfig = "with_paths"
}
for _, host := range match.Host {
if seenHosts[host] {
return fmt.Errorf("duplicate host matcher: %s", host)
logger.Log().WithFields(map[string]any{
"host": host,
"has_paths": hasPaths,
"paths": match.Path,
"path_config": pathConfig,
}).Debug("[VALIDATOR] Checking host matcher")
if existingConfig, seen := seenHosts[host]; seen {
// Host already seen - check if path configs are compatible
if existingConfig == pathConfig {
// Same path configuration = true duplicate
if pathConfig == "with_paths" {
logger.Log().WithField("host", host).Error("[VALIDATOR] Duplicate host with paths")
return fmt.Errorf("duplicate host with paths: %s", host)
}
logger.Log().WithField("host", host).Error("[VALIDATOR] Duplicate host without paths")
return fmt.Errorf("duplicate host without paths: %s", host)
}
// Different path configuration = emergency+main pattern (ALLOWED)
logger.Log().WithFields(map[string]any{
"host": host,
"existing_config": existingConfig,
"new_config": pathConfig,
}).Debug("[VALIDATOR] Allowing emergency+main pattern")
continue
}
seenHosts[host] = true
seenHosts[host] = pathConfig
}
}

View File

@@ -0,0 +1,287 @@
package caddy
import (
"testing"
"github.com/stretchr/testify/require"
)
// TestValidate_EmergencyPlusMainPattern tests the core fix:
// Allow duplicate host when one has path matchers (emergency) and one doesn't (main)
func TestValidate_EmergencyPlusMainPattern(t *testing.T) {
tests := []struct {
name string
routes []*Route
expectError bool
errorText string
}{
{
name: "Emergency+Main Pattern (ALLOWED)",
routes: []*Route{
{
// Emergency route WITH paths
Match: []Match{{
Host: []string{"test.com"},
Path: []string{"/api/v1/emergency/*", "/emergency/*"},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
},
{
// Main route WITHOUT paths
Match: []Match{{
Host: []string{"test.com"},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
},
},
expectError: false,
},
{
name: "Reverse Order: Main+Emergency (ALLOWED)",
routes: []*Route{
{
// Main route WITHOUT paths (comes first)
Match: []Match{{
Host: []string{"example.com"},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
},
{
// Emergency route WITH paths (comes second)
Match: []Match{{
Host: []string{"example.com"},
Path: []string{"/emergency/*"},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
},
},
expectError: false,
},
{
name: "Duplicate Hosts With Same Paths (REJECTED)",
routes: []*Route{
{
Match: []Match{{
Host: []string{"test.com"},
Path: []string{"/api/*"},
}},
Handle: []Handler{
ReverseProxyHandler("app1:8080", false, "none", true),
},
},
{
Match: []Match{{
Host: []string{"test.com"},
Path: []string{"/admin/*"}, // Different paths, but both have paths
}},
Handle: []Handler{
ReverseProxyHandler("app2:8080", false, "none", true),
},
},
},
expectError: true,
errorText: "duplicate host with paths",
},
{
name: "Duplicate Hosts Without Paths (REJECTED)",
routes: []*Route{
{
Match: []Match{{
Host: []string{"test.com"},
}},
Handle: []Handler{
ReverseProxyHandler("app1:8080", false, "none", true),
},
},
{
Match: []Match{{
Host: []string{"test.com"}, // Same host, both without paths
}},
Handle: []Handler{
ReverseProxyHandler("app2:8080", false, "none", true),
},
},
},
expectError: true,
errorText: "duplicate host without paths",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := &Config{
Apps: Apps{
HTTP: &HTTPApp{
Servers: map[string]*Server{
"test": {
Listen: []string{":80"},
Routes: tt.routes,
},
},
},
},
}
err := Validate(config)
if tt.expectError {
require.Error(t, err, "Expected validation to fail")
if tt.errorText != "" {
require.Contains(t, err.Error(), tt.errorText)
}
} else {
require.NoError(t, err, "Expected validation to pass")
}
})
}
}
// TestValidate_MultipleHostsWithEmergencyPattern tests scalability:
// Verify validator handles 5, 10, and 18+ hosts with emergency+main pattern
func TestValidate_MultipleHostsWithEmergencyPattern(t *testing.T) {
hostCounts := []int{5, 10, 18}
for _, count := range hostCounts {
t.Run("RouteCount_"+string(rune(count+'0')), func(t *testing.T) {
routes := make([]*Route, 0, count*2) // 2 routes per host
for i := 0; i < count; i++ {
hostname := "host" + string(rune(i+'0')) + ".example.com"
// Emergency route
routes = append(routes, &Route{
Match: []Match{{
Host: []string{hostname},
Path: []string{"/api/v1/emergency/*", "/emergency/*"},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
})
// Main route
routes = append(routes, &Route{
Match: []Match{{
Host: []string{hostname},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
})
}
config := &Config{
Apps: Apps{
HTTP: &HTTPApp{
Servers: map[string]*Server{
"test": {
Listen: []string{":80"},
Routes: routes,
},
},
},
},
}
err := Validate(config)
require.NoError(t, err, "Expected validation to pass for %d hosts", count)
})
}
}
// TestValidate_RouteOrdering verifies route ordering is preserved
func TestValidate_RouteOrdering(t *testing.T) {
// Emergency route should be checked BEFORE main route
// This test ensures validator doesn't impose ordering constraints
config := &Config{
Apps: Apps{
HTTP: &HTTPApp{
Servers: map[string]*Server{
"test": {
Listen: []string{":80"},
Routes: []*Route{
{
// Route 0: Emergency (with paths) - should match /emergency/* first
Match: []Match{{
Host: []string{"test.com"},
Path: []string{"/emergency/*"},
}},
Handle: []Handler{
Handler{
"handler": "static_response",
"body": "Emergency bypass",
},
},
Terminal: true,
},
{
// Route 1: Main (no paths) - matches everything else
Match: []Match{{
Host: []string{"test.com"},
}},
Handle: []Handler{
ReverseProxyHandler("app:8080", false, "none", true),
},
Terminal: true,
},
},
},
},
},
},
}
err := Validate(config)
require.NoError(t, err, "Route ordering should be preserved")
}
// TestValidate_CaseInsensitiveHosts tests that host comparison is case-sensitive
// This is intentional - DNS is case-insensitive, but Caddy handles normalization
func TestValidate_CaseSensitiveHostnames(t *testing.T) {
// Note: This test documents current behavior.
// The validator treats "Test.com" and "test.com" as DIFFERENT strings.
// This is acceptable because config.go normalizes all hostnames to lowercase
// BEFORE calling the validator. By the time validator sees them, they're already
// normalized, so this scenario doesn't occur in production.
config := &Config{
Apps: Apps{
HTTP: &HTTPApp{
Servers: map[string]*Server{
"test": {
Listen: []string{":80"},
Routes: []*Route{
{
Match: []Match{{
Host: []string{"Test.com"}, // Uppercase T
}},
Handle: []Handler{
ReverseProxyHandler("app1:8080", false, "none", true),
},
},
{
Match: []Match{{
Host: []string{"test.com"}, // Lowercase t
}},
Handle: []Handler{
ReverseProxyHandler("app2:8080", false, "none", true),
},
},
},
},
},
},
},
}
// Current behavior: validator treats these as different hosts (case-sensitive string comparison)
// This is fine because config.go normalizes all domains to lowercase before validation
err := Validate(config)
require.NoError(t, err, "Validator treats different case as different hosts (config.go normalizes before validation)")
}

View File

@@ -1,298 +1,689 @@
# GORM ID Leak Security Vulnerability Fix
# Duplicate Proxy Host Bug Fix - Simplified Validator (SYSTEMIC ISSUE)
**Status**: ACTIVE
**Priority**: CRITICAL 🔴
**Status**: ACTIVE - MINIMAL FIX APPROACH
**Priority**: CRITICAL 🔴🔴🔴 - ALL 18 ENABLED PROXY HOSTS DOWN
**Created**: 2026-01-28
**Security Issue**: SQL Injection Vulnerability via GORM `.First()` ID Leaking
**Updated**: 2026-01-28 (EXPANDED SCOPE - Systemic issue confirmed)
**Bug**: Caddy validator rejects emergency+main route pattern for EVERY proxy host (duplicate host with different path constraints)
---
## Executive Summary
**Critical Security Bug**: GORM's `.First(&model, id)` method with string ID fields causes SQL injection-like behavior by directly inserting the ID value into the WHERE clause without proper parameterization. This affects **12 production service methods** and **multiple test files**, causing widespread test failures.
**CRITICAL SYSTEMIC BUG**: Caddy's pre-flight validator rejects the emergency+main route pattern for **EVERY enabled proxy host**. The emergency route (with path matchers) and main route (without path matchers) share the same domain, causing "duplicate host matcher" error on ALL hosts.
**Impact**:
- 🔴 SQL syntax errors in production code
- 🔴 Test suite failures (uptime_service_race_test.go)
- 🔴 Potential SQL injection vector
- 🔴 String-based UUIDs are leaking into raw SQL
- 🔴🔴🔴 **ZERO routes loaded in Caddy** - ALL proxy hosts are down
- 🔴 **18 enabled proxy hosts** cannot be activated (not just Host ID 24)
- 🔴 Entire reverse proxy functionality is non-functional
- 🟡 Emergency bypass routes blocked for all hosts
- 🟡 Sequential failures: Host 24 → Host 22 → (pattern repeats for every host)
- 🟢 Backend health endpoint returns 200 OK (separate container health issue)
**Root Cause**: When calling `.First(&model, id)` where `model` has a string ID field, GORM directly inserts the ID as a raw SQL fragment instead of using parameterized queries.
**Root Cause**: Validator treats ALL duplicate hosts as errors without considering that routes with different path constraints are valid. The emergency+main route pattern is applied to EVERY proxy host by design, causing systematic rejection.
**Minimal Fix**: Simplify validator to allow duplicate hosts when ONE has path matchers and ONE doesn't. **This will unblock ALL 18 enabled proxy hosts simultaneously**, restoring full reverse proxy functionality. Full overlap detection is future work.
**Database**: NO issues - DNS is already case-insensitive. No migration needed.
**Secondary Issues** (tracked but deferred):
- 🟡 Slow SQL queries (>200ms) on uptime_heartbeats and security_configs tables
- 🟡 Container health check fails despite 200 OK from health endpoint (may be timeout issue)
---
## SQL Injection Evidence
## Technical Analysis
### Actual Error from Tests
### Current Route Structure
```
/uptime_service_race_test.go:100 unrecognized token: "639fb2d8"
SELECT * FROM \`uptime_hosts\` WHERE 639fb2d8-f3a7-4c37-9a8e-22f2e73f8d87 AND \`uptime_hosts\`.\`id\` = "639fb2d8-f3a7-4c37-9a8e-22f2e73f8d87"
For each proxy host, `GenerateConfig` creates TWO routes with the SAME domain list:
1. **Emergency Route** (lines 571-584 in config.go):
```go
emergencyRoute := &Route{
Match: []Match{{
Host: uniqueDomains, // immaculaterr.hatfieldhosted.com
Path: emergencyPaths, // /api/v1/emergency/*
}},
Handle: emergencyHandlers,
Terminal: true,
}
```
2. **Main Route** (lines 586-598 in config.go):
```go
route := &Route{
Match: []Match{{
Host: uniqueDomains, // immaculaterr.hatfieldhosted.com (DUPLICATE!)
}},
Handle: mainHandlers,
Terminal: true,
}
```
### Why Validator Fails
```go
// validator.go lines 89-93
for _, host := range match.Host {
if seenHosts[host] {
return fmt.Errorf("duplicate host matcher: %s", host)
}
seenHosts[host] = true
}
```
### What Should Happen vs What Actually Happens
The validator:
1. Processes emergency route: adds "immaculaterr.hatfieldhosted.com" to `seenHosts`
2. Processes main route: sees "immaculaterr.hatfieldhosted.com" again → ERROR
| Expected SQL | Actual Broken SQL |
|--------------|-------------------|
| \`WHERE \\\`uptime_hosts\\\`.\\\`id\\\` = "639fb2d8-..."\` | \`WHERE 639fb2d8-... AND \\\`uptime_hosts\\\`.\\\`id\\\` = "639fb2d8-..."\` |
The validator does NOT consider:
- Path matchers that make routes non-overlapping
- Route ordering/priority (emergency route is checked first)
- Caddy's native ability to handle this correctly
**The Problem**: The UUID string \`639fb2d8-f3a7-4c37-9a8e-22f2e73f8d87\` is being leaked as a raw SQL fragment. GORM interprets it as a SQL expression and injects it directly into the WHERE clause.
### Why Caddy Handles This Correctly
Caddy processes routes in order:
1. First matches emergency route (host + path): `/api/v1/emergency/*` → bypass security
2. Falls through to main route (host only): everything else → apply security
This is a **valid and intentional design pattern** - the validator is wrong to reject it.
---
## Technical Root Cause
## Solution: Simplified Validator Fix ⭐ CHOSEN APPROACH
### GORM Behavior with String IDs
**Approach**: Minimal fix to allow emergency+main route pattern specifically.
When calling \`.First(&model, id)\`:
1. If the model's ID field is \`uint\`, GORM correctly parameterizes: \`WHERE id = ?\`
2. If the model's ID field is \`string\`, GORM **leaks the ID as raw SQL**: \`WHERE <id_value> AND id = ?\`
**Implementation**:
- Track hosts seen with path matchers vs without path matchers separately
- Allow duplicate host if ONE has paths and ONE doesn't (the emergency+main pattern)
- Reject if both routes have paths OR both have no paths
### Models Affected by String IDs
**Pros**:
- ✅ Minimal change - unblocks ALL 18 proxy hosts simultaneously
- ✅ Preserves current route structure
- ✅ Simple logic - easy to understand and maintain
- ✅ Fixes the systemic design pattern bug affecting entire reverse proxy
From codebase search, these models have string ID fields:
**Limitations** (Future Work):
- ⚠️ Does not detect complex path overlaps (e.g., `/api/*` vs `/api/v1/*`)
- ⚠️ Full path pattern analysis deferred to future enhancement
- ⚠️ Assumes emergency+main pattern is primary use case
| Model | ID Field | Location |
|-------|----------|----------|
| \`ManualChallenge\` | \`ID string\` | \`internal/models/manual_challenge.go:30\` |
| \`UptimeMonitor\` | \`ID string\` | (inferred from error logs) |
| \`UptimeHost\` | \`ID string\` | (inferred from error logs) |
**Changes Required**:
- `backend/internal/caddy/validator.go`: Simplified duplicate detection (two maps: withPaths/withoutPaths)
- Tests for emergency+main pattern, route ordering, rollback
**Note**: Even models with \`uint\` IDs are vulnerable if the parameter passed is a string!
**Deferred**:
- Database migration (DNS already case-insensitive)
- Complex path overlap detection (future enhancement)
---
## Vulnerability Inventory
## Phase 1: Root Cause Verification - SYSTEMIC SCOPE
### CRITICAL: Production Service Files (12 instances)
**Objective**: Confirm bug affects ALL enabled proxy hosts and document the systemic failure pattern.
1. **access_list_service.go:105** - \`s.db.First(&acl, id)\`
2. **remoteserver_service.go:68** - \`s.db.First(&server, id)\`
3. **notification_service.go:458** - \`s.DB.First(&t, "id = ?", id)\`
4. **uptime_service.go:353** - \`s.DB.First(&uptimeHost, "id = ?", hostID)\`
5. **uptime_service.go:845** - \`s.DB.First(&uptimeHost, "id = ?", hostID)\`
6. **uptime_service.go:999** - \`s.DB.First(&host, hostID)\`
7. **uptime_service.go:1101** - \`s.DB.First(&monitor, "id = ?", id)\`
8. **uptime_service.go:1115** - \`s.DB.First(&monitor, "id = ?", id)\`
9. **uptime_service.go:1143** - \`s.DB.First(&monitor, "id = ?", id)\`
10. **certificate_service.go:412** - \`s.db.First(&cert, id)\`
11. **dns_provider_service.go:118** - \`s.db.WithContext(ctx).First(&provider, id)\`
12. **proxyhost_service.go:108** - \`s.db.First(&host, id)\`
**Tasks**:
### HIGH PRIORITY: Test Files (8 instances causing failures)
1. **Verify Systemic Impact** ⭐ NEW:
- [ ] Query database for ALL enabled proxy hosts (should be 18)
- [ ] Verify Caddy has ZERO routes loaded (admin API check)
- [ ] Document sequential failure pattern (Host 24 disabled → Host 22 fails next)
- [ ] Confirm EVERY enabled host triggers same validator error
- [ ] Test hypothesis: Disable all hosts except one → still fails
#### uptime_service_race_test.go (5 instances) ⚠️ CAUSING TEST FAILURES
2. **Reproduce Error on Multiple Hosts**:
- [ ] Test Host ID 24 (immaculaterr.hatfieldhosted.com) - original failure
- [ ] Test Host ID 22 (dockhand.hatfieldhosted.com) - second failure after disabling 24
- [ ] Test at least 3 additional hosts to confirm pattern
- [ ] Capture full error message from validator for each
- [ ] Document that error is identical across all hosts
1. **Line 100** 🔥 - \`db.First(&host, host.ID)\`
2. **Line 106** 🔥 - \`db.First(&host, host.ID)\`
3. **Line 152** - \`db.First(&host, host.ID)\`
4. **Line 255** - \`db.First(&updatedHost, "id = ?", host.ID)\`
5. **Line 398** - \`db.First(&updatedHost, "id = ?", host.ID)\`
3. **Analyze Generated Config for ALL Hosts**:
- [ ] Add debug logging to `GenerateConfig` before validation
- [ ] Log `uniqueDomains` list after deduplication for each host
- [ ] Log complete route structure before sending to validator
- [ ] Count how many routes contain each domain (should be 2: emergency + main)
- [ ] Verify emergency+main pattern exists for EVERY proxy host
#### Other Test Files
6. **credential_service_test.go:426** - \`db.First(&updatedProvider, provider.ID)\`
7. **dns_provider_service_test.go:550** - \`db.First(&dbProvider, provider.ID)\`
8. **dns_provider_service_test.go:617** - \`db.First(&retrieved, provider.ID)\`
9. **security_headers_service_test.go:184** - \`db.First(&saved, profile.ID)\`
---
## Fix Pattern: The Standard Safe Query
### ❌ VULNERABLE Pattern
\`\`\`go
// NEVER DO THIS with string IDs or when ID type is uncertain:
db.First(&model, id)
db.First(&model, "id = ?", id) // Also wrong! Second param is value, not condition
\`\`\`
### ✅ SAFE Pattern
\`\`\`go
// ALWAYS DO THIS:
db.Where("id = ?", id).First(&model)
\`\`\`
### Why This Pattern is Safe
1. **Explicit Parameterization**: \`id\` is treated as a parameter value, not SQL
2. **No Type Confusion**: Works for both \`uint\` and \`string\` IDs
3. **SQL Injection Safe**: GORM uses prepared statements with \`?\` placeholders
4. **Clear Intent**: Readable and obvious what the query does
---
## Implementation Plan
### Phase 1: Fix Production Services (CRITICAL)
**Duration**: 20 minutes
**Priority**: 🔴 CRITICAL
**Files to Fix** (12 instances):
1. access_list_service.go (Line 105)
2. remoteserver_service.go (Line 68)
3. notification_service.go (Line 458)
4. uptime_service.go (Lines 353, 845, 999, 1101, 1115, 1143)
5. certificate_service.go (Line 412)
6. dns_provider_service.go (Line 118)
7. proxyhost_service.go (Line 108)
### Phase 2: Fix Test Files
**Duration**: 15 minutes
**Priority**: 🟡 HIGH
**Files to Fix** (8+ instances):
1. uptime_service_race_test.go (Lines 100, 106, 152, 255, 398)
2. credential_service_test.go (Line 426)
3. dns_provider_service_test.go (Lines 550, 617)
4. security_headers_service_test.go (Line 184)
### Phase 3: Fix DNS Provider Test Expectation
**Duration**: 5 minutes
**Priority**: 🟢 MEDIUM
**File**: dns_provider_service_test.go
**Lines**: 1382 (comment), 1385 (assertion)
**Change**:
- Line 1382: Update comment to describe validation failure instead of decryption failure
- Line 1385: Change \`"DECRYPTION_ERROR"\` to \`"VALIDATION_ERROR"\`
### Phase 4: Verification
**Duration**: 10 minutes
**Test Commands**:
\`\`\`bash
cd /projects/Charon/backend
# Run race tests (primary failure source)
go test -v -race -run TestCheckHost ./internal/services/
# Run all uptime tests
go test -v -run Uptime ./internal/services/
# Run all service tests
go test ./internal/services/...
# Verify no vulnerable patterns remain
grep -rn "\\.First(&.*," internal/services/ | grep -v "Where"
\`\`\`
4. **Trace Validation Flow**:
- [ ] Add debug logging to `validateRoute` function
- [ ] Log each host as it's added to `seenHosts` map
- [ ] Log route index and match conditions when duplicate detected
- [ ] Confirm emergency route (index 0) succeeds for all hosts
- [ ] Confirm main route (index 1) triggers duplicate error for all hosts
**Success Criteria**:
- ✅ \`uptime_service_race_test.go\` tests pass
- ✅ No SQL syntax errors in logs
- ✅ All service tests pass
- ✅ DNS provider test expectation matches actual behavior
- ✅ Zero vulnerable \`.First(&model, id)\` patterns remain
- ✅ Confirmed: ALL 18 enabled proxy hosts trigger the same error
- ✅ Confirmed: Caddy has ZERO routes loaded (admin API returns empty)
- ✅ Confirmed: Sequential failure pattern documented (disable one → next fails)
- ✅ Confirmed: Emergency+main route pattern exists for EVERY host
- ✅ Confirmed: Validator rejects at main route (index 1) for all hosts
- ✅ Confirmed: This is a design pattern bug, not a data issue
**Files**:
- `backend/internal/caddy/config.go` - Add debug logging
- `backend/internal/caddy/validator.go` - Add debug logging
- `backend/internal/services/proxyhost_service.go` - Trigger config generation
- `docs/reports/duplicate_proxy_host_diagnosis.md` - Document systemic findings
**Estimated Time**: 30 minutes (increased for systemic verification)
---
## Timeline Summary
## Phase 2: Fix Validator (Simplified Path Detection)
| Phase | Duration | Priority | Tasks |
|-------|----------|----------|-------|
| **Phase 1** | 20 min | 🔴 CRITICAL | Fix 12 production service methods |
| **Phase 2** | 15 min | 🟡 HIGH | Fix 8+ test file instances |
| **Phase 3** | 5 min | 🟢 MEDIUM | Fix DNS provider test expectation |
| **Phase 4** | 10 min | 🟡 HIGH | Run full test suite verification |
| **TOTAL** | **50 min** | | Complete security fix |
**Objective**: MINIMAL fix to allow emergency+main route pattern (duplicate host where ONE has paths, ONE doesn't).
**Implementation Strategy**:
Simplify validator to handle the specific emergency+main pattern:
- Track hosts seen with paths vs without paths
- Allow duplicate hosts if ONE has path matchers, ONE doesn't
- This handles emergency route (has paths) + main route (no paths)
**Algorithm**:
```go
// Track hosts by whether they have path constraints
type hostTracking struct {
withPaths map[string]bool // hosts that have path matchers
withoutPaths map[string]bool // hosts without path matchers
}
for each route:
for each match in route.Match:
for each host:
hasPaths := len(match.Path) > 0
if hasPaths:
// Check if we've seen this host WITHOUT paths
if tracking.withoutPaths[host]:
continue // ALLOWED: emergency (with) + main (without)
}
if tracking.withPaths[host]:
return error("duplicate host with paths")
}
tracking.withPaths[host] = true
} else {
// Check if we've seen this host WITH paths
if tracking.withPaths[host]:
continue // ALLOWED: emergency (with) + main (without)
}
if tracking.withoutPaths[host]:
return error("duplicate host without paths")
}
tracking.withoutPaths[host] = true
}
```
**Simplified Rules**:
1. Same host + both have paths = DUPLICATE ❌
2. Same host + both have NO paths = DUPLICATE ❌
3. Same host + one with paths, one without = ALLOWED ✅ (emergency+main pattern)
**Future Work**: Full overlap detection for complex path patterns is deferred.
**Tasks**:
1. **Create Simple Tracking Structure**:
- [ ] Add `withPaths` and `withoutPaths` maps to validator
- [ ] Track hosts separately based on path presence
2. **Update Validation Logic**:
- [ ] Check if match has path matchers (len(match.Path) > 0)
- [ ] For hosts with paths: allow if counterpart without paths exists
- [ ] For hosts without paths: allow if counterpart with paths exists
- [ ] Reject if both routes have same path configuration
3. **Update Error Messages**:
- [ ] Clear error: "duplicate host with paths" or "duplicate host without paths"
- [ ] Document that this is minimal fix for emergency+main pattern
**Success Criteria**:
- ✅ Emergency + main routes with same host pass validation (one has paths, one doesn't)
- ✅ True duplicates rejected (both with paths OR both without paths)
- ✅ Clear error messages when validation fails
- ✅ All existing tests continue to pass
**Files**:
- `backend/internal/caddy/validator.go` - Simplified duplicate detection
- `backend/internal/caddy/validator_test.go` - Add test cases
**Estimated Time**: 30 minutes (simplified approach)
---
## Phase 3: Database Migration (DEFERRED)
**Status**: ⏸️ DEFERRED - Not needed for this bug fix
**Rationale**:
- DNS is already case-insensitive by RFC spec
- Caddy handles domains case-insensitively
- No database duplicates found in current data
- This bug is purely a code-level validation issue
- Database constraints can be added in future enhancement if needed
**Future Consideration**:
If case-sensitive duplicates become an issue in production:
1. Add UNIQUE index on `LOWER(domain_names)`
2. Add `BeforeSave` hook to normalize domains
3. Update frontend validation
**Estimated Time**: 0 minutes (deferred)
---
## Phase 4: Testing & Verification
**Objective**: Comprehensive testing to ensure fix works and no regressions.
**Test Categories**:
### Unit Tests
1. **Validator Tests** (`validator_test.go`):
- [ ] Test: Single route with one host → PASS
- [ ] Test: Two routes with different hosts → PASS
- [ ] Test: Emergency + main route pattern (one with paths, one without) → PASS ✅ NEW
- [ ] Test: Two routes with same host, both with paths → FAIL
- [ ] Test: Two routes with same host, both without paths → FAIL
- [ ] Test: Route ordering (emergency before main) → PASS ✅ NEW
- [ ] Test: Multiple proxy hosts (5, 10, 18 hosts) → PASS ✅ NEW
- [ ] Test: All hosts enabled simultaneously (real-world scenario) → PASS ✅ NEW
2. **Config Generation Tests** (`config_test.go`):
- [ ] Test: Single host generates emergency + main routes
- [ ] Test: Both routes have same domain list
- [ ] Test: Emergency route has path matchers
- [ ] Test: Main route has no path matchers
- [ ] Test: Route ordering preserved (emergency before main)
- [ ] Test: Deduplication map prevents domain appearing twice in `uniqueDomains`
3. **Performance Tests** (NEW):
- [ ] Benchmark: Validation with 100 routes
- [ ] Benchmark: Validation with 1000 routes
- [ ] Verify: No more than 5% overhead vs old validator
- [ ] Profile: Memory usage with large configs
### Integration Tests
- Multi-Host Scenario** ⭐ UPDATED:
- [ ] Create proxy_host with domain "ImmaculateRR.HatfieldHosted.com"
- [ ] Trigger config generation via `ApplyConfig`
- [ ] Verify validator passes
- [ ] Verify Caddy accepts config
- [ ] **Enable 5 hosts simultaneously** - verify all routes created
- [ ] **Enable 10 hosts simultaneously** - verify all routes created
- [ ] **Enable all 18 hosts** - verify complete config loads successfully
2. **Emergency Bypass Test - Multiple Hosts**:
- [ ] Enable multiple proxy hosts with security features (WAF, rate limit)
- [ ] Verify emergency endpoint `/api/v1/emergency/security-reset` bypasses security on ALL hosts
- [ ] Verify main application routes have security checks on ALL hosts
- [ ] Confirm route ordering is correct for ALL hosts (emergency checked first)
3. **Rollback Test - Systemic Impact**:
- [ ] Apply validator fix
- [ ] Enable ALL 18 proxy hosts successfully
- [ ] Verify Caddy loads all routes (admin API check)
- [ ] Rollback to old validator code
- [ ] Verify sequential failures (Host 24 → Host 22 → ...)
- [ ] Re-apply fix and confirm all 18 hosts work
4. **Caddy AdmiALL Proxy Hosts** ⭐ UPDATED:
- [ ] Update database: `UPDATE proxy_hosts SET enabled = 1` (enable ALL hosts)
- [ ] Restart backend or trigger config reload
- [ ] Verify no "duplicate host matcher" errors for ANY host
- [ ] Verify Caddy logs show successful config load with all routes
- [ ] Query Caddy admin API: confirm 36+ routes loaded
- [ ] Test at least 5 different domains in browser
2. **Cross-Browser Test - Multiple Hosts**:
- [ ] Test at least 3 different proxy host domains from multiple browsers
- [ ] Verify HTTPS redirects work correctly on all tested hosts
- [ ] Confirm no certificate warnings on any host
- [ ] Test emergency endpoint accessibility on all hosts
3. **Load Test - All Hosts Enabled** ⭐ NEW:
- [ ] Enable all 18 proxy hosts
- [ ] Verify backend startup time is acceptable (<30s)
- [ ] Verify Caddy config reload time is acceptable (<5s)
- [ ] Monitor memory usage with full config loaded
- [ ] Verify no performance degradation vs single host
**Success Criteria**:
- ✅ All unit tests pass (including multi-host scenarios)
- ✅ All integration tests pass (including 5, 10, 18 host scenarios)
- ✅ ALL 18 proxy hosts can be enabled simultaneously without errors
- ✅ Caddy admin API shows 36+ routes loaded (2 per host minimum)
- ✅ Emergency routes bypass security correctly on ALL hosts
- ✅ Route ordering verified for ALL hosts (emergency before main)
- ✅ Rollback test proves fix was necessary (sequential failures return)
- [ ] Test emergency endpoint accessibility
**Success Criteria**:
- ✅ All unit tests p60 minutes (increased for multi-host testing)
- ✅ All integration tests pass
- ✅ Host ID 24 can be enabled without errors
- ✅ Emergency routes bypass security correctly
- ✅ Route ordering verified (emergency before main)
- ✅ Rollback test proves fix was necessary
- ✅ Performance benchmarks show <5% overhead
- ✅ No regressions in existing functionality
**Estimated Time**: 45 minutes
---
## Phase 5: Documentation & Deployment
**Objective**: Document the fix, update runbooks, and prepare for deployment.
**Tasks**:
1. **Code Documentation**:
- [ ] Add comprehensive comments to validator route signature logic
- [ ] Document why duplicate hosts with different paths are allowed
- [ ] Add examples of valid and invalid route patterns
- [ ] Document edge cases and how they're handled
2. **API Documentation**:
- [ ] Update `/docs/api.md` with validator behavior
- [ ] Document emergency+main route pattern
- [ ] Explain why duplicate hosts are allowed in this case
- [ ] Add note that DNS is case-insensitive by nature
3. **Runbook Updates**:
- [ ] Create "Duplicate Host Matcher Error" troubleshooting section
- [ ] Document root cause and fix
- [ ] Add steps to diagnose similar issues
- [ ] Add validation bypass procedure (if needed for emergency)
4. **Troubleshooting Guide**:
- [ ] Document "duplicate host matcher" error
- [ ] Explain emergency+main route pattern
- [ ] Provide steps to verify route ordering
- [ ] Add validation test procedure
5. **Changelog**:
- [ ] Add entry to `CHANGELOG.md` under "Fixed" section:
```markdown
### Fixed
- **CRITICAL**: Fixed systemic "duplicate host matcher" error affecting ALL 18 enabled proxy hosts
- Simplified Caddy config validator to allow emergency+main route pattern (one with paths, one without)
- Restored full reverse proxy functionality - Caddy now correctly loads routes for all enabled hosts
- Emergency bypass routes now function correctly for all proxy hosts
```
6. **Create Diagnostic Tool** (Optional Enhancement):
- [ ] Add admin API endpoint: `GET /api/v1/debug/caddy-routes`
- [ ] Returns current route structure with host/path matchers
- [ ] Highlights potential conflicts before validation
- [ ] Useful for troubleshooting future issues
**Success Criteria**:
- ✅ Code is well-documented with clear explanations
- ✅ API docs reflect new behavior
- ✅ Runbook provides clear troubleshooting steps
- ✅ Migration is documented and tested
- ✅ Changelog is updated
**Files**:
- `backend/internal/caddy/validator.go` - Inline comments
- `backend/internal/caddy/config.go` - Route generation comments
- `docs/api.md` - API documentation
- `docs/troubleshooting/duplicate-host-matcher.md` - NEW runbook
- `CHANGELOG.md` - Version entry
**Estimated Time**: 30 minutes
---Phase 6: Performance Investigation (DEFERRED - Optional)
**Status**: ⏸️ DEFERRED - Secondary issue, not blocking proxy functionality
ALL 18 enabled proxy hosts can be enabled simultaneously without errors
- ✅ Caddy loads all routes successfully (36+ routes via admin API)
- ✅ Emergency routes bypass security features as designed on ALL hosts
- ✅ Main routes apply security features correctly on ALL hosts
- ✅ No false positives from validator for valid configs
- ✅ True duplicate routes still rejected appropriately
- ✅ Full reverse proxy functionality restored
- Slow queries on `security_configs` table
- May impact monitoring responsiveness but does not block proxy functionality
**Tasks**:
1. **Query Profiling**:
- [ ] Enable query logging in production
- [ ] Identify slowest queries with EXPLAIN ANALYZE
- [ ] Profile table sizes and row counts
- [ ] Check existing indexes
2. **Index Analysis**:
- [ ] Analyze missing indexes on `uptime_heartbeats`
- [ ] Analyze missing indexes on `security_configs`
- [ ] Propose index additions if needed
- [ ] Test index performance impact
3. **Optimization**:
- [ ] Add indexes if justified by query patterns
- [ ] Consider query optimization (LIMIT, pagination)
- [ ] Monitor performance after changes
- [ ] Document index strategy
**Priority**: LOW - Does not block proxy functionality
**Estimated Time**: Deferred until Phase 2 is complete
---
## Phase 7: Container Health Check In- SYSTEMIC SCOPE (30 min)
- [ ] Verify ALL 18 enabled hosts trigger validator error
- [ ] Test sequential failure pattern (disable one → next fails)
- [ ] Confirm Caddy has ZERO routes loaded (admin API check)
- [ ] Verify emergency+main route pattern exists for EVERY host
- [ ] Add debug logging to config generation and validator
- [ ] Document systemic findings in diagnosis report
### Phase 2: Fix Validator - SIMPLIFIED (30 min)
- [ ] Create simple tracking structure (withPaths/withoutPaths maps)
- [ ] Update validation logic to allow one-with-paths + one-without-paths
- [ ] Update error messages
- [ ] Write unit tests for emergency+main pattern
- [ ] Add multi-host test scenarios (5, 10, 18 hosts)
- [ ] Verify route ordering preserved
### Phase 3: Database Migration (0 min)
- [x] DEFERRED - Not needed for this bug fix
### Phase 4: Testing - MULTI-HOST SCENARIOS (60 min)
- [ ] Write/update validator unit tests (emergency+main pattern)
- [ ] Add multi-host test scenarios (5, 10, 18 hosts)
- [ ] Write/update config generation tests (route ordering, all hosts)
- [ ] Add performance benchmarks (validate handling 18+ hosts)
- [ ] Run integration tests with all hosts enabled
- [ ] Perform rollback test (verify sequential failures return)
- [ ] Re-enable ALL 18 hosts and verify Caddy loads all routes
- [ ] Verify Caddy admin API shows 36+ routes
### Phase 5: Documentation (30 min)
- [ ] Add code comments explaining simplified approach
- [ ] Update API documentation
- [ ] Create troubleshooting guide emphasizing systemic nature
- [ ] Update changelog with CRITICAL scope
- [ ] Document that full overlap detection is future work
- [ ] Document multi-host verification steps
### Phase 6: Performance Investigation (DEFERRED)
- [ ] DEFERRED - Slow SQL queries (uptime_heartbeats, security_configs)
- [ ] Track as separate issue if proxy functionality is restored
### Phase 7: Health Check Investigation (DEFERRED)
- [ ] DEFERRED - Container health check fails despite 200 OK
- [ ] Track as separate issue if proxy functionality is restored
**Total Estimated Time**: 2 hours 30 minutes (updated for systemic scope
---
##
## Success Metrics
### Functionality
- ✅ Host ID 24 (immaculaterr.hatfieldhosted.com) can be enabled without errors
- ✅ Emergency routes bypass security features as designed
- ✅ Main routes apply security features correctly
- ✅ No false positives from validator for valid configs
- ✅ True duplicate routes still rejected appropriately
### Performance
- ✅ Validation performance not significantly impacted (< 5% overhead)
- ✅ Config generation time unchanged
- ✅ Database query performance not affected by new index
### Quality
- ✅ Zero regressions in existing tests
- ✅ New test coverage for path-aware validation
- ✅ Clear error messages for validation failures
- ✅ Code is maintainable and well-documented
---
## Risk Assessment
### Security Impact
| Risk | Before Fix | After Fix |
|------|-----------|-----------|
| **SQL Injection** | 🔴 HIGH | ✅ None |
| **Test Failures** | 🔴 Blocking | ✅ All pass |
| **Production Bugs** | 🔴 Likely | ✅ None |
| **Data Corruption** | 🟡 Possible | ✅ None |
### Deployment Risk
**Risk Level**: LOW
**Justification**:
- Changes are surgical and well-defined
- Pattern is mechanical (\`.First(&model, id)\` → \`.Where("id = ?", id).First(&model)\`)
- No business logic changes
- Comprehensive test coverage validates correctness
| Risk | Impact | Mitigation |
|------|--------|------------|
| **Validator Too Permissive** | High | Comprehensive test suite with negative test cases |
| **Route Ordering Issues** | Medium | Integration tests verify emergency routes checked first |
| **Migration Failure** | Low | Reversible migration + pre-flight data validation |
| **Case Normalization Breaks Existing Domains** | Low | Normalization is idempotent (lowercase → lowercase) |
| **Performance Degradation** | Low | Profile validator changes, ensure <5% overhead |
---
## Code Change Summary
## Implementation Checklist
### Production Services: 12 Changes
- access_list_service.go: 1 fix
- remoteserver_service.go: 1 fix
- notification_service.go: 1 fix
- uptime_service.go: 6 fixes
- certificate_service.go: 1 fix
- dns_provider_service.go: 1 fix
- proxyhost_service.go: 1 fix
### Phase 1: Root Cause Verification (20 min)
- [ ] Reproduce error on demand
- [ ] Add debug logging to config generation
- [ ] Add debug logging to validator
- [ ] Confirm emergency + main route pattern
- [ ] Document findings
### Test Files: 8+ Changes
- uptime_service_race_test.go: 5 fixes
- credential_service_test.go: 1 fix
- dns_provider_service_test.go: 2 fixes (1 GORM, 1 expectation)
- security_headers_service_test.go: 1 fix
### Phase 2: Fix Validator - SIMPLIFIED (30 min)
- [ ] Create simple tracking structure (withPaths/withoutPaths maps)
- [ ] Update validation logic to allow one-with-paths + one-without-paths
- [ ] Update error messages
- [ ] Write unit tests for emergency+main pattern
- [ ] Verify route ordering preserved
### Total Lines Changed: ~25 lines
### Total Files Modified: 9 files
### Phase 3: Database Migration (0 min)
- [x] DEFERRED - Not needed for this bug fix
### Phase 4: Testing (45 min)
- [ ] Write/update validator unit tests (emergency+main pattern)
- [ ] Write/update config generation tests (route ordering)
- [ ] Add performance benchmarks
- [ ] Run integration tests
- [ ] Perform rollback test
- [ ] Re-enable Host ID 24 verification
### Phase 5: Documentation (30 min)
- [ ] Add code comments explaining simplified approach
- [ ] Update API documentation
- [ ] Create troubleshooting guide
- [ ] Update changelog
- [ ] Document that full overlap detection is future work
**T**Re-enable ALL proxy hosts** (not just Host ID 24)
4. Verify Caddy loads all routes successfully (admin API check)
5. Verify emergency routes work correctly on all hosts
### Post-Deployment
1. Verify ALL 18 proxy hosts are accessible
2. Verify Caddy admin API shows 36+ routes loaded
3. Test emergency endpoint bypasses security on multiple hosts
4. Monitor for "duplicate host matcher" errors (should be zero)
5. Verify full reverse proxy functionality restored
6. Monitor performance with all hosts enabled
### Rollback Plan
If issues arise:
1. Rollback backend to previous version
2. Document which hosts fail (expect sequential pattern)
3. Review validator logs to identify cause
4. Disable problematic hosts temporarily if needed
5. Re-apply fix after investigation
3. Re-enable Host ID 24 if still disabled
4. Verify emergency routes work correctly
### Post-Deployment
1. Verify Host ID 24 is accessible
2. Test emergency endpoint bypasses security
3. Monitor for "duplicate host matcher" errors
4. Check database constraint is enforcing uniqueness
### Rollback Plan
If issues arise:
1. Rollback backend to previous version
2. Re-disable Host ID 24 if necessary
3. Review validator logs to identify cause
4. Investigate unexpected route patterns
---
## Success Criteria
## Future Enhancements
- [x] Root cause identified for SQL injection vulnerability
- [x] All vulnerable instances catalogued (20+ total)
- [x] Comprehensive fix plan documented
- [ ] All production service methods fixed (12 instances)
- [ ] All test files fixed (8+ instances)
- [ ] Test expectation corrected (DNS provider test)
- [ ] Race tests pass without SQL errors
- [ ] Full test suite passes
1. **Full Path Overlap Detection**:
- Current fix handles emergency+main pattern only (one-with-paths + one-without-paths)
- Future: Detect complex overlaps (e.g., `/api/*` vs `/api/v1/*`)
- Future: Validate path pattern specificity
- Future: Warn on ambiguous route priority
2. **Visual Route Debugger**:
- Admin UI component showing route tree
- Highlights potential conflicts
## Known Secondary Issues (Tracked Separately)
These issues were discovered during diagnosis but are NOT blocking proxy functionality:
1. **Slow SQL Queries (Phase 6 - DEFERRED)**:
- `uptime_heartbeats` table queries >200ms
- `security_configs` table queries >200ms
- Impacts monitoring responsiveness, not proxy functionality
- **Action**: Track as separate performance issue after Phase 2 complete
2. **Container Health Check Failure (Phase 7 - DEFERRED)**:
- Backend health endpoint returns 200 OK consistently
- Docker container marked as unhealthy
- May be timeout issue (3s too short?)
- Does not affect proxy functionality (backend is running)
- **Action**: Track as separate Docker configuration issue after Phase 2 complete
---
## Related Documentation
**Plan Status**: ✅ READY FOR IMPLEMENTATION (EXPANDED SCOPE)
**Next Action**: Begin Phase 1 - Root Cause Verification - SYSTEMIC SCOPE
**Assigned To**: Implementation Agent
**Priority**: CRITICAL 🔴🔴🔴 - ALL 18 PROXY HOSTS DOWN, ZERO CADDY ROUTES LOADED
**Scope**: Systemic bug affecting entire reverse proxy functionality (not single-host issue)
- Warn (don't error) on suspicious patterns
- Suggest route optimizations
- Show effective route priority
- Highlight overlapping matchers
- **GORM Documentation**: https://gorm.io/docs/query.html
- **SQL Injection Prevention**: OWASP Top 10 (A03:2021 Injection)
- **Parameterized Queries**: https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html
4. **Database Domain Normalization** (if needed):
- Add case-insensitive uniqueness constraint
- BeforeSave hook for normalization
- Frontend validation hints
- Only if case duplicates become production issue
---
## Notes
### Why This Bug is Insidious
1. **Silent Failure**: Works fine with \`uint\` IDs, fails with \`string\` IDs
2. **Type-Dependent**: Behavior changes based on model ID field type
3. **Misleading API**: \`.First(&model, id)\` looks safe but isn't
4. **Widespread**: Affects 20+ instances across codebase
5. **Hard to Debug**: SQL errors appear cryptic and non-obvious
### Best Practices Going Forward
**DO**:
- Always use \`.Where("id = ?", id).First(&model)\`
- Use parameterized queries for all dynamic values
- Test with both \`uint\` and \`string\` ID models
**DON'T**:
- Never use \`.First(&model, id)\` with string IDs
- Never trust GORM to handle ID types correctly
- Never pass SQL fragments as \`.First()\` parameters
---
**Plan Status**: ✅ COMPLETE AND READY FOR IMPLEMENTATION
**Next Action**: Begin Phase 1 - Fix Production Services
**Plan Status**: ✅ READY FOR IMPLEMENTATION
**Next Action**: Begin Phase 1 - Root Cause Verification
**Assigned To**: Implementation Agent
**Priority**: HIGH - Blocking Host ID 24 from being enabled

View File

@@ -0,0 +1,453 @@
# Duplicate Proxy Host Diagnosis Report
**Date:** 2026-01-28
**Issue:** Charon container unhealthy, all proxy hosts down
**Error:** `validation failed: invalid route 1 in server charon_server: duplicate host matcher: immaculaterr.hatfieldhosted.com`
---
## Executive Summary
**Finding:** The database contains NO duplicate entries. There is only **one** proxy_host record for domain `Immaculaterr.hatfieldhosted.com` (ID 24). The duplicate host matcher error from Caddy indicates a **code-level bug** in the configuration generation logic, NOT a database integrity issue.
**Impact:**
- Caddy failed to load configuration at startup
- All proxy hosts are unreachable
- Container health check failing
- Frontend still accessible (direct backend connection)
**Root Cause:** Unknown bug in Caddy config generation that produces duplicate host matchers for the same domain, despite deduplication logic being present in the code.
---
## Investigation Details
### 1. Database Analysis
#### Active Database Location
- **Host path:** `/projects/Charon/data/charon.db` (empty/corrupted - 0 bytes)
- **Container path:** `/app/data/charon.db` (active - 177MB)
- **Backup:** `/projects/Charon/data/charon.db.backup-20260128-065828` (empty - contains schema but no data)
#### Database Integrity Check
**Total Proxy Hosts:** 19
**Query Results:**
```sql
-- Check for the problematic domain
SELECT id, uuid, name, domain_names, enabled, created_at, updated_at
FROM proxy_hosts
WHERE domain_names LIKE '%immaculaterr%';
```
**Result:** Only **ONE** entry found:
```
ID: 24
UUID: 4f392485-405b-4a35-b022-e3d16c30bbde
Name: Immaculaterr
Domain: Immaculaterr.hatfieldhosted.com (note: capital 'I')
Forward Host: Immaculaterr
Forward Port: 5454
Enabled: true
Created: 2026-01-16 20:42:59
Updated: 2026-01-16 20:42:59
```
#### Duplicate Detection Queries
**Test 1: Case-insensitive duplicate check**
```sql
SELECT COUNT(*), LOWER(domain_names)
FROM proxy_hosts
GROUP BY LOWER(domain_names)
HAVING COUNT(*) > 1;
```
**Result:** 0 duplicates found
**Test 2: Comma-separated domains check**
```sql
SELECT id, name, domain_names
FROM proxy_hosts
WHERE domain_names LIKE '%,%';
```
**Result:** No multi-domain entries found
**Test 3: Locations check (could cause route duplication)**
```sql
SELECT ph.id, ph.name, ph.domain_names, COUNT(l.id) as location_count
FROM proxy_hosts ph
LEFT JOIN locations l ON l.proxy_host_id = ph.id
WHERE ph.enabled = 1
GROUP BY ph.id;
```
**Result:** All proxy_hosts have 0 locations, including ID 24
**Test 4: Advanced config check**
```sql
SELECT id, name, domain_names, advanced_config
FROM proxy_hosts
WHERE id = 24;
```
**Result:** No advanced_config set (NULL)
**Test 5: Soft deletes check**
```sql
.schema proxy_hosts | grep -i deleted
```
**Result:** No soft delete columns exist
**Conclusion:** Database is clean. Only ONE entry for this domain exists.
---
### 2. Error Analysis
#### Error Message from Docker Logs
```
{"error":"validation failed: invalid route 1 in server charon_server: duplicate host matcher: immaculaterr.hatfieldhosted.com","level":"error","msg":"Failed to apply initial Caddy config","time":"2026-01-28T13:18:53-05:00"}
```
#### Key Observations:
1. **"invalid route 1"** - This is the SECOND route (0-indexed), suggesting the first route (index 0) is valid
2. **Lowercase domain** - Caddy error shows `immaculaterr` (lowercase) but database has `Immaculaterr` (capital I)
3. **Timing** - Error occurs at initial startup when `ApplyConfig()` is called
4. **Validation stage** - Error happens in Caddy's validation, not in Charon's generation
#### Code Review Findings
**File:** `/projects/Charon/backend/internal/caddy/config.go`
**Function:** `GenerateConfig()` (line 19)
**Deduplication Logic Present:**
- Line 437: `processedDomains := make(map[string]bool)` - Track processed domains
- Line 469-488: Domain normalization and duplicate detection
```go
d = strings.TrimSpace(d)
d = strings.ToLower(d) // Normalize to lowercase
if processedDomains[d] {
logger.Log().WithField("domain", d).Warn("Skipping duplicate domain")
continue
}
processedDomains[d] = true
```
- Line 461: Reverse iteration to prefer newer hosts
```go
for i := len(hosts) - 1; i >= 0; i--
```
**Expected Behavior:** The deduplication logic SHOULD prevent this error.
**Hypothesis:** One of the following is occurring:
1. **Bug in deduplication logic:** The domain is bypassing the duplicate check
2. **Multiple code paths:** Domain is added through a different path (e.g., frontend route, locations, advanced config)
3. **Database query issue:** GORM joins/preloads causing duplicate records in the Go slice
4. **Race condition:** Config is being generated/applied multiple times simultaneously (unlikely at startup)
---
### 3. All Proxy Hosts in Database
```
ID Name Domain
2 FileFlows fileflows.hatfieldhosted.com
4 Profilarr profilarr.hatfieldhosted.com
5 HomePage homepage.hatfieldhosted.com
6 Prowlarr prowlarr.hatfieldhosted.com
7 Tautulli tautulli.hatfieldhosted.com
8 TubeSync tubesync.hatfieldhosted.com
9 Bazarr bazarr.hatfieldhosted.com
11 Mealie mealie.hatfieldhosted.com
12 NZBGet nzbget.hatfieldhosted.com
13 Radarr radarr.hatfieldhosted.com
14 Sonarr sonarr.hatfieldhosted.com
15 Seerr seerr.hatfieldhosted.com
16 Plex plex.hatfieldhosted.com
17 Charon charon.hatfieldhosted.com
18 Wizarr wizarr.hatfieldhosted.com
20 PruneMate prunemate.hatfieldhosted.com
21 GiftManager giftmanager.hatfieldhosted.com
22 Dockhand dockhand.hatfieldhosted.com
24 Immaculaterr Immaculaterr.hatfieldhosted.com ← PROBLEMATIC
```
**Note:** ID 24 is the newest proxy_host (most recent updated_at timestamp).
---
### 4. Caddy Configuration State
**Current Status:** NO configuration loaded (Caddy is running with minimal admin-only config)
**Query:** `curl localhost:2019/config/` returns empty/default config
**Last Successful Config:**
- Timestamp: 2026-01-27 19:15:38
- Config Hash: `a87bd130369d62ab29a1fcf377d855a5b058223c33818eacff6f7312c2c4d6a0`
- Status: Success (before ID 24 was added)
**Recent Config History (from caddy_configs table):**
```
ID Hash Applied At Success
299 a87bd130...c2c4d6a0 2026-01-27 19:15:38 true
298 a87bd130...c2c4d6a0 2026-01-27 15:40:56 true
297 a87bd130...c2c4d6a0 2026-01-27 03:34:46 true
296 dbf4c820...d963b234 2026-01-27 02:01:45 true
295 dbf4c820...d963b234 2026-01-27 02:01:45 true
```
All recent configs were successful. The failure happened on 2026-01-28 13:18:53 (not recorded in table due to early validation failure).
---
### 5. Database File Status
**Critical Issue:** The host's `/projects/Charon/data/charon.db` file is **empty** (0 bytes).
**Timeline:**
- Original file was likely corrupted or truncated
- Container is using an in-memory or separate database file
- Volume mount may be broken or asynchronous
**Evidence:**
```bash
-rw-r--r-- 1 root root 0 Jan 28 18:24 /projects/Charon/data/charon.db
-rw-r--r-- 1 root root 177M Jan 28 18:26 /projects/Charon/data/charon.db.investigation
```
The actual database was copied from the container.
---
## Recommended Remediation Plan
### Immediate Short-Term Fix (Workaround)
**Option 1: Disable Problematic Proxy Host**
```sql
-- Run inside container
docker exec charon sqlite3 /app/data/charon.db \
"UPDATE proxy_hosts SET enabled = 0 WHERE id = 24;"
-- Restart container to apply
docker restart charon
```
**Option 2: Delete Duplicate Entry (if acceptable data loss)**
```sql
docker exec charon sqlite3 /app/data/charon.db \
"DELETE FROM proxy_hosts WHERE id = 24;"
docker restart charon
```
**Option 3: Change Domain to Bypass Duplicate Detection**
```sql
-- Temporarily rename the domain to isolate the issue
docker exec charon sqlite3 /app/data/charon.db \
"UPDATE proxy_hosts SET domain_names = 'immaculaterr-temp.hatfieldhosted.com' WHERE id = 24;"
docker restart charon
```
### Medium-Term Fix (Debug & Patch)
**Step 1: Enable Debug Logging**
```bash
# Set debug logging in container
docker exec charon sh -c "export CHARON_DEBUG=1; kill -HUP \$(pidof charon)"
```
**Step 2: Generate Config Manually**
Create a debug script to generate and inspect the Caddy config:
```go
// In backend/cmd/debug/main.go
package main
import (
"encoding/json"
"fmt"
"log"
"github.com/Wikid82/charon/backend/internal/caddy"
"github.com/Wikid82/charon/backend/internal/database"
"github.com/Wikid82/charon/backend/internal/models"
)
func main() {
db, _ := database.Connect("data/charon.db")
var hosts []models.ProxyHost
db.Preload("Locations").Preload("DNSProvider").Find(&hosts)
config, err := caddy.GenerateConfig(hosts, "data/caddy/data", "", "frontend/dist", "", false, false, false, false, false, "", nil, nil, nil, nil, nil)
if err != nil {
log.Fatal(err)
}
json, _ := json.MarshalIndent(config, "", " ")
fmt.Println(string(json))
}
```
Run and inspect:
```bash
go run backend/cmd/debug/main.go > /tmp/caddy-config-debug.json
jq '.apps.http.servers.charon_server.routes[] | select(.match[0].host[] | contains("immaculaterr"))' /tmp/caddy-config-debug.json
```
**Step 3: Add Unit Test**
```go
// In backend/internal/caddy/config_test.go
func TestGenerateConfig_PreventCaseSensitiveDuplicates(t *testing.T) {
hosts := []models.ProxyHost{
{UUID: "uuid-1", DomainNames: "Example.com", Enabled: true, ForwardHost: "app1", ForwardPort: 8080}, {UUID: "uuid-2", DomainNames: "example.com", Enabled: true, ForwardHost: "app2", ForwardPort: 8081},
}
config, err := GenerateConfig(hosts, "/tmp/data", "", "", "", false, false, false, false, false, "", nil, nil, nil, nil, nil)
require.NoError(t, err)
// Should only have ONE route for this domain (not two)
server := config.Apps.HTTP.Servers["charon_server"]
routes := server.Routes
domainCount := 0
for _, route := range routes {
for _, match := range route.Match {
for _, host := range match.Host {
if strings.ToLower(host) == "example.com" {
domainCount++
}
}
}
}
assert.Equal(t, 1, domainCount, "Should only have one route for case-insensitive duplicate domain")
}
```
### Long-Term Fix (Root Cause Prevention)
**1. Add Database Constraint**
```sql
-- Create unique index on normalized domain names
CREATE UNIQUE INDEX idx_proxy_hosts_domain_names_lower
ON proxy_hosts(LOWER(domain_names));
```
**2. Add Pre-Save Validation Hook**
```go
// In backend/internal/models/proxy_host.go
func (p *ProxyHost) BeforeSave(tx *gorm.DB) error {
// Normalize domain names to lowercase
p.DomainNames = strings.ToLower(p.DomainNames)
// Check for existing domain (case-insensitive)
var existing ProxyHost
if err := tx.Where("id != ? AND LOWER(domain_names) = ?",
p.ID, strings.ToLower(p.DomainNames)).First(&existing).Error; err == nil {
return fmt.Errorf("domain %s already exists (ID: %d)", p.DomainNames, existing.ID)
}
return nil
}
```
**3. Add Duplicate Detection to Frontend**
```typescript
// In frontend/src/components/ProxyHostForm.tsx
const checkDomainUnique = async (domain: string) => {
const response = await api.get(`/api/v1/proxy-hosts?domain=${encodeURIComponent(domain.toLowerCase())}`);
if (response.data.length > 0) {
setError(`Domain ${domain} is already in use by "${response.data[0].name}"`);
return false;
}
return true;
};
```
**4. Add Monitoring/Alerting**
- Add Prometheus metric for config generation failures
- Set up alert for repeated validation failures
- Log full generated config to file for debugging
---
## Next Steps
### Immediate Action Required (Choose ONE):
**Recommended:** Option 1 (Disable)
- **Pros:** Non-destructive, can re-enable later, allows investigation
- **Cons:** Service unavailable until bug is fixed
- **Command:**
```bash
docker exec charon sqlite3 /app/data/charon.db \
"UPDATE proxy_hosts SET enabled = 0 WHERE id = 24;"
docker restart charon
```
### Follow-Up Investigation:
1. **Check for code-level bug:** Add debug logging to `GenerateConfig()` to print:
- Total hosts processed
- Each domain being added to processedDomains map
- Final route count vs expected count
2. **Verify GORM query behavior:** Check if `.Preload()` is causing duplicate records in the slice
3. **Test with minimal reproduction:** Create a fresh database with only ID 24, see if error persists
4. **Review recent commits:** Check if any recent changes to config.go introduced the bug
---
## Files Involved
- **Database:** `/app/data/charon.db` (inside container)
- **Backup:** `/projects/Charon/data/charon.db.backup-20260128-065828`
- **Investigation Copy:** `/projects/Charon/data/charon.db.investigation`
- **Code:** `/projects/Charon/backend/internal/caddy/config.go` (GenerateConfig function)
- **Manager:** `/projects/Charon/backend/internal/caddy/manager.go` (ApplyConfig function)
---
## Appendix: SQL Queries Used
```sql
-- Find all proxy hosts with specific domain
SELECT id, uuid, name, domain_names, forward_host, forward_port, enabled, created_at, updated_at
FROM proxy_hosts
WHERE domain_names LIKE '%immaculaterr.hatfieldhosted.com%'
ORDER BY created_at;
-- Count total hosts
SELECT COUNT(*) as total FROM proxy_hosts;
-- Check for duplicate domains (case-insensitive)
SELECT COUNT(*), domain_names
FROM proxy_hosts
GROUP BY LOWER(domain_names)
HAVING COUNT(*) > 1;
-- Check proxy hosts with locations
SELECT ph.id, ph.name, ph.domain_names, COUNT(l.id) as location_count
FROM proxy_hosts ph
LEFT JOIN locations l ON l.proxy_host_id = ph.id
WHERE ph.enabled = 1
GROUP BY ph.id
ORDER BY ph.id;
-- Check recent Caddy config applications
SELECT * FROM caddy_configs
ORDER BY applied_at DESC
LIMIT 5;
-- Get all enabled proxy hosts
SELECT id, name, domain_names, enabled
FROM proxy_hosts
WHERE enabled = 1
ORDER BY id;
```
---
**Report Generated By:** GitHub Copilot
**Investigation Date:** 2026-01-28
**Status:** Investigation Complete - Awaiting Remediation Decision