diff --git a/backend/internal/services/crowdsec_startup.go b/backend/internal/services/crowdsec_startup.go index 449da9d8..1ac27d12 100644 --- a/backend/internal/services/crowdsec_startup.go +++ b/backend/internal/services/crowdsec_startup.go @@ -2,6 +2,9 @@ package services import ( "context" + "os" + "path/filepath" + "strings" "time" "github.com/Wikid82/charon/backend/internal/logger" @@ -21,6 +24,11 @@ type CrowdsecProcessManager interface { // and starts it if necessary. This handles container restart scenarios where the // user's preference was to have CrowdSec enabled. func ReconcileCrowdSecOnStartup(db *gorm.DB, executor CrowdsecProcessManager, binPath, dataDir string) { + logger.Log().WithFields(map[string]interface{}{ + "bin_path": binPath, + "data_dir": dataDir, + }).Info("CrowdSec reconciliation: starting startup check") + if db == nil || executor == nil { logger.Log().Debug("CrowdSec reconciliation skipped: nil db or executor") return @@ -42,9 +50,36 @@ func ReconcileCrowdSecOnStartup(db *gorm.DB, executor CrowdsecProcessManager, bi return } - // Only auto-start if CrowdSecMode is "local" - if cfg.CrowdSecMode != "local" { - logger.Log().WithField("mode", cfg.CrowdSecMode).Debug("CrowdSec reconciliation skipped: mode is not 'local'") + // Also check for runtime setting override in settings table + var settingOverride struct{ Value string } + crowdSecEnabled := false + if err := db.Raw("SELECT value FROM settings WHERE key = ? LIMIT 1", "security.crowdsec.enabled").Scan(&settingOverride).Error; err == nil && settingOverride.Value != "" { + crowdSecEnabled = strings.EqualFold(settingOverride.Value, "true") + logger.Log().WithFields(map[string]interface{}{ + "setting_value": settingOverride.Value, + "crowdsec_enabled": crowdSecEnabled, + }).Debug("CrowdSec reconciliation: found runtime setting override") + } + + // Only auto-start if CrowdSecMode is "local" OR runtime setting is enabled + if cfg.CrowdSecMode != "local" && !crowdSecEnabled { + logger.Log().WithFields(map[string]interface{}{ + "db_mode": cfg.CrowdSecMode, + "setting_enabled": crowdSecEnabled, + }).Debug("CrowdSec reconciliation skipped: mode is not 'local' and setting not enabled") + return + } + + // VALIDATE: Ensure binary exists + if _, err := os.Stat(binPath); os.IsNotExist(err) { + logger.Log().WithField("path", binPath).Error("CrowdSec reconciliation: binary not found, cannot start") + return + } + + // VALIDATE: Ensure config directory exists + configPath := filepath.Join(dataDir, "config") + if _, err := os.Stat(configPath); os.IsNotExist(err) { + logger.Log().WithField("path", configPath).Error("CrowdSec reconciliation: config directory not found, cannot start") return } @@ -64,16 +99,46 @@ func ReconcileCrowdSecOnStartup(db *gorm.DB, executor CrowdsecProcessManager, bi } // CrowdSec should be running but isn't - start it - logger.Log().Info("CrowdSec reconciliation: starting CrowdSec (mode=local, not currently running)") + logger.Log().WithFields(map[string]interface{}{ + "bin_path": binPath, + "data_dir": dataDir, + }).Info("CrowdSec reconciliation: starting CrowdSec (mode=local, not currently running)") startCtx, startCancel := context.WithTimeout(context.Background(), 30*time.Second) defer startCancel() newPid, err := executor.Start(startCtx, binPath, dataDir) if err != nil { - logger.Log().WithError(err).Error("CrowdSec reconciliation: failed to start CrowdSec") + logger.Log().WithError(err).WithFields(map[string]interface{}{ + "bin_path": binPath, + "data_dir": dataDir, + }).Error("CrowdSec reconciliation: FAILED to start CrowdSec - check binary and config") return } - logger.Log().WithField("pid", newPid).Info("CrowdSec reconciliation: successfully started CrowdSec") + // VERIFY: Wait briefly and confirm process is actually running + time.Sleep(2 * time.Second) + + verifyCtx, verifyCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer verifyCancel() + + verifyRunning, verifyPid, verifyErr := executor.Status(verifyCtx, dataDir) + if verifyErr != nil { + logger.Log().WithError(verifyErr).WithField("expected_pid", newPid).Warn("CrowdSec reconciliation: started but failed to verify status") + return + } + + if !verifyRunning { + logger.Log().WithFields(map[string]interface{}{ + "expected_pid": newPid, + "actual_pid": verifyPid, + "running": verifyRunning, + }).Error("CrowdSec reconciliation: process started but is no longer running - may have crashed") + return + } + + logger.Log().WithFields(map[string]interface{}{ + "pid": newPid, + "verified": true, + }).Info("CrowdSec reconciliation: successfully started and verified CrowdSec") } diff --git a/backend/internal/services/crowdsec_startup_test.go b/backend/internal/services/crowdsec_startup_test.go index 40151079..742f0742 100644 --- a/backend/internal/services/crowdsec_startup_test.go +++ b/backend/internal/services/crowdsec_startup_test.go @@ -2,6 +2,8 @@ package services import ( "context" + "os" + "path/filepath" "testing" "github.com/Wikid82/charon/backend/internal/models" @@ -37,6 +39,33 @@ func (m *mockCrowdsecExecutor) Status(ctx context.Context, configDir string) (bo return m.running, m.pid, m.statusErr } +// smartMockCrowdsecExecutor returns running=true after Start is called (for post-start verification) +type smartMockCrowdsecExecutor struct { + startCalled bool + startErr error + startPid int + statusCalled bool + statusErr error +} + +func (m *smartMockCrowdsecExecutor) Start(ctx context.Context, binPath, configDir string) (int, error) { + m.startCalled = true + return m.startPid, m.startErr +} + +func (m *smartMockCrowdsecExecutor) Stop(ctx context.Context, configDir string) error { + return nil +} + +func (m *smartMockCrowdsecExecutor) Status(ctx context.Context, configDir string) (bool, int, error) { + m.statusCalled = true + // Return running=true if Start was called (simulates successful start) + if m.startCalled { + return true, m.startPid, m.statusErr + } + return false, 0, m.statusErr +} + func setupCrowdsecTestDB(t *testing.T) *gorm.DB { db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{ Logger: gormlogger.Default.LogMode(gormlogger.Silent), @@ -49,6 +78,36 @@ func setupCrowdsecTestDB(t *testing.T) *gorm.DB { return db } +// setupCrowdsecTestFixtures creates temporary binary and config directory for testing +func setupCrowdsecTestFixtures(t *testing.T) (binPath, dataDir string, cleanup func()) { + t.Helper() + + // Create temp directory + tempDir, err := os.MkdirTemp("", "crowdsec-test-*") + require.NoError(t, err) + + // Create mock binary file + binPath = filepath.Join(tempDir, "crowdsec") + err = os.WriteFile(binPath, []byte("#!/bin/sh\nexit 0\n"), 0o755) + require.NoError(t, err) + + // Create data directory (passed as dataDir to the function) + dataDir = filepath.Join(tempDir, "data") + err = os.MkdirAll(dataDir, 0o755) + require.NoError(t, err) + + // Create config directory inside data dir (validation checks dataDir/config) + configDir := filepath.Join(dataDir, "config") + err = os.MkdirAll(configDir, 0o755) + require.NoError(t, err) + + cleanup = func() { + os.RemoveAll(tempDir) + } + + return binPath, dataDir, cleanup +} + func TestReconcileCrowdSecOnStartup_NilDB(t *testing.T) { exec := &mockCrowdsecExecutor{} @@ -95,6 +154,9 @@ func TestReconcileCrowdSecOnStartup_ModeDisabled(t *testing.T) { func TestReconcileCrowdSecOnStartup_ModeLocal_AlreadyRunning(t *testing.T) { db := setupCrowdsecTestDB(t) + binPath, dataDir, cleanup := setupCrowdsecTestFixtures(t) + defer cleanup() + exec := &mockCrowdsecExecutor{ running: true, pid: 12345, @@ -106,7 +168,7 @@ func TestReconcileCrowdSecOnStartup_ModeLocal_AlreadyRunning(t *testing.T) { } require.NoError(t, db.Create(&cfg).Error) - ReconcileCrowdSecOnStartup(db, exec, "crowdsec", "/tmp/crowdsec") + ReconcileCrowdSecOnStartup(db, exec, binPath, dataDir) assert.True(t, exec.statusCalled) assert.False(t, exec.startCalled, "Should not start if already running") @@ -114,10 +176,19 @@ func TestReconcileCrowdSecOnStartup_ModeLocal_AlreadyRunning(t *testing.T) { func TestReconcileCrowdSecOnStartup_ModeLocal_NotRunning_Starts(t *testing.T) { db := setupCrowdsecTestDB(t) + binPath, configDir, cleanup := setupCrowdsecTestFixtures(t) + defer cleanup() + + // Mock executor returns not running initially, then running after start + statusCallCount := 0 exec := &mockCrowdsecExecutor{ running: false, startPid: 99999, } + // Override Status to return running=true on second call (post-start verification) + originalStatus := exec.Status + _ = originalStatus // silence unused warning + exec.running = false // Create SecurityConfig with mode=local cfg := models.SecurityConfig{ @@ -125,14 +196,23 @@ func TestReconcileCrowdSecOnStartup_ModeLocal_NotRunning_Starts(t *testing.T) { } require.NoError(t, db.Create(&cfg).Error) - ReconcileCrowdSecOnStartup(db, exec, "crowdsec", "/tmp/crowdsec") + // We need a smarter mock that returns running=true after Start is called + smartExec := &smartMockCrowdsecExecutor{ + startPid: 99999, + } - assert.True(t, exec.statusCalled) - assert.True(t, exec.startCalled, "Should start if mode=local and not running") + ReconcileCrowdSecOnStartup(db, smartExec, binPath, configDir) + + assert.True(t, smartExec.statusCalled) + assert.True(t, smartExec.startCalled, "Should start if mode=local and not running") + _ = statusCallCount // silence unused warning } func TestReconcileCrowdSecOnStartup_ModeLocal_StartError(t *testing.T) { db := setupCrowdsecTestDB(t) + binPath, dataDir, cleanup := setupCrowdsecTestFixtures(t) + defer cleanup() + exec := &mockCrowdsecExecutor{ running: false, startErr: assert.AnError, @@ -145,13 +225,16 @@ func TestReconcileCrowdSecOnStartup_ModeLocal_StartError(t *testing.T) { require.NoError(t, db.Create(&cfg).Error) // Should not panic on start error - ReconcileCrowdSecOnStartup(db, exec, "crowdsec", "/tmp/crowdsec") + ReconcileCrowdSecOnStartup(db, exec, binPath, dataDir) assert.True(t, exec.startCalled) } func TestReconcileCrowdSecOnStartup_StatusError(t *testing.T) { db := setupCrowdsecTestDB(t) + binPath, dataDir, cleanup := setupCrowdsecTestFixtures(t) + defer cleanup() + exec := &mockCrowdsecExecutor{ statusErr: assert.AnError, } @@ -163,7 +246,7 @@ func TestReconcileCrowdSecOnStartup_StatusError(t *testing.T) { require.NoError(t, db.Create(&cfg).Error) // Should not panic on status error and should not attempt start - ReconcileCrowdSecOnStartup(db, exec, "crowdsec", "/tmp/crowdsec") + ReconcileCrowdSecOnStartup(db, exec, binPath, dataDir) assert.True(t, exec.statusCalled) assert.False(t, exec.startCalled, "Should not start if status check fails") diff --git a/backend/internal/services/log_watcher.go b/backend/internal/services/log_watcher.go index 62a9f5c0..348b53df 100644 --- a/backend/internal/services/log_watcher.go +++ b/backend/internal/services/log_watcher.go @@ -230,33 +230,54 @@ func (w *LogWatcher) ParseLogEntry(line string) *models.SecurityLogEntry { // detectSecurityEvent analyzes the log entry and sets security-related fields. func (w *LogWatcher) detectSecurityEvent(entry *models.SecurityLogEntry, caddyLog *models.CaddyAccessLog) { - // Check for WAF blocks (typically 403 with specific headers or logger) - if caddyLog.Status == 403 { + loggerLower := strings.ToLower(caddyLog.Logger) + + // Check for WAF/Coraza indicators (highest priority for 403s) + if strings.Contains(loggerLower, "waf") || + strings.Contains(loggerLower, "coraza") || + hasHeader(caddyLog.RespHeaders, "X-Coraza-Id") || + hasHeader(caddyLog.RespHeaders, "X-Coraza-Rule-Id") { entry.Blocked = true + entry.Source = "waf" entry.Level = "warn" + entry.BlockReason = "WAF rule triggered" - // Check for WAF/Coraza indicators - if caddyLog.Logger == "http.handlers.waf" || - hasHeader(caddyLog.RespHeaders, "X-Coraza-Id") || - strings.Contains(caddyLog.Logger, "coraza") { - entry.Source = "waf" - entry.BlockReason = "WAF rule triggered" - - // Try to extract rule ID from headers - if ruleID, ok := caddyLog.RespHeaders["X-Coraza-Id"]; ok && len(ruleID) > 0 { - entry.Details["rule_id"] = ruleID[0] - } - } else if hasHeader(caddyLog.RespHeaders, "X-Crowdsec-Decision") || - strings.Contains(caddyLog.Logger, "crowdsec") { - entry.Source = "crowdsec" - entry.BlockReason = "CrowdSec decision" - } else if hasHeader(caddyLog.Request.Headers, "X-Acl-Denied") { - entry.Source = "acl" - entry.BlockReason = "Access list denied" - } else { - entry.Source = "cerberus" - entry.BlockReason = "Access denied" + // Try to extract rule ID from headers + if ruleID, ok := caddyLog.RespHeaders["X-Coraza-Id"]; ok && len(ruleID) > 0 { + entry.Details["rule_id"] = ruleID[0] } + if ruleID, ok := caddyLog.RespHeaders["X-Coraza-Rule-Id"]; ok && len(ruleID) > 0 { + entry.Details["rule_id"] = ruleID[0] + } + return + } + + // Check for CrowdSec indicators + if strings.Contains(loggerLower, "crowdsec") || + strings.Contains(loggerLower, "bouncer") || + hasHeader(caddyLog.RespHeaders, "X-Crowdsec-Decision") || + hasHeader(caddyLog.RespHeaders, "X-Crowdsec-Origin") { + entry.Blocked = true + entry.Source = "crowdsec" + entry.Level = "warn" + entry.BlockReason = "CrowdSec decision" + + // Extract CrowdSec-specific headers + if origin, ok := caddyLog.RespHeaders["X-Crowdsec-Origin"]; ok && len(origin) > 0 { + entry.Details["crowdsec_origin"] = origin[0] + } + return + } + + // Check for ACL blocks + if strings.Contains(loggerLower, "acl") || + hasHeader(caddyLog.RespHeaders, "X-Acl-Denied") || + hasHeader(caddyLog.RespHeaders, "X-Blocked-By-Acl") { + entry.Blocked = true + entry.Source = "acl" + entry.Level = "warn" + entry.BlockReason = "Access list denied" + return } // Check for rate limiting (429 Too Many Requests) @@ -273,6 +294,19 @@ func (w *LogWatcher) detectSecurityEvent(entry *models.SecurityLogEntry, caddyLo if reset, ok := caddyLog.RespHeaders["X-Ratelimit-Reset"]; ok && len(reset) > 0 { entry.Details["ratelimit_reset"] = reset[0] } + if limit, ok := caddyLog.RespHeaders["X-Ratelimit-Limit"]; ok && len(limit) > 0 { + entry.Details["ratelimit_limit"] = limit[0] + } + return + } + + // Check for other 403s (generic security block) + if caddyLog.Status == 403 { + entry.Blocked = true + entry.Source = "cerberus" + entry.Level = "warn" + entry.BlockReason = "Access denied" + return } // Check for authentication failures @@ -280,11 +314,22 @@ func (w *LogWatcher) detectSecurityEvent(entry *models.SecurityLogEntry, caddyLo entry.Level = "warn" entry.Source = "auth" entry.Details["auth_failure"] = true + return } // Check for server errors if caddyLog.Status >= 500 { entry.Level = "error" + return + } + + // Normal traffic - set appropriate level based on status + entry.Source = "normal" + entry.Blocked = false + if caddyLog.Status >= 400 { + entry.Level = "warn" + } else { + entry.Level = "info" } } diff --git a/frontend/src/pages/Security.tsx b/frontend/src/pages/Security.tsx index 8461b84c..f406ae9e 100644 --- a/frontend/src/pages/Security.tsx +++ b/frontend/src/pages/Security.tsx @@ -84,42 +84,50 @@ export default function Security() { const crowdsecPowerMutation = useMutation({ mutationFn: async (enabled: boolean) => { + // Update setting first await updateSetting('security.crowdsec.enabled', enabled ? 'true' : 'false', 'security', 'bool') + if (enabled) { toast.info('Starting CrowdSec... This may take up to 30 seconds') const result = await startCrowdsec() + + // VERIFY: Check if it actually started + const status = await statusCrowdsec() + if (!status.running) { + // Revert the setting since process didn't start + await updateSetting('security.crowdsec.enabled', 'false', 'security', 'bool') + throw new Error('CrowdSec process failed to start. Check server logs for details.') + } + return result } else { await stopCrowdsec() + + // VERIFY: Check if it actually stopped (with brief delay for cleanup) + await new Promise(resolve => setTimeout(resolve, 500)) + const status = await statusCrowdsec() + if (status.running) { + throw new Error('CrowdSec process still running. Check server logs for details.') + } + return { enabled: false } } }, - onMutate: async (enabled: boolean) => { - await queryClient.cancelQueries({ queryKey: ['security-status'] }) - const previous = queryClient.getQueryData(['security-status']) - queryClient.setQueryData(['security-status'], (old: unknown) => { - if (!old || typeof old !== 'object') return old - const copy = { ...(old as SecurityStatus) } - if (copy.crowdsec && typeof copy.crowdsec === 'object') { - copy.crowdsec = { ...copy.crowdsec, enabled } as never - } - return copy - }) - setCrowdsecStatus(prev => prev ? { ...prev, running: enabled } : prev) - return { previous } - }, - onError: (err: unknown, enabled: boolean, context: unknown) => { - if (context && typeof context === 'object' && 'previous' in context) { - queryClient.setQueryData(['security-status'], context.previous) - } + // NO optimistic updates - wait for actual confirmation + onError: (err: unknown, enabled: boolean) => { const msg = err instanceof Error ? err.message : String(err) toast.error(enabled ? `Failed to start CrowdSec: ${msg}` : `Failed to stop CrowdSec: ${msg}`) + // Force refresh status from backend to ensure UI matches reality + queryClient.invalidateQueries({ queryKey: ['security-status'] }) fetchCrowdsecStatus() }, onSuccess: async (result: { lapi_ready?: boolean; enabled?: boolean } | boolean) => { - await fetchCrowdsecStatus() - queryClient.invalidateQueries({ queryKey: ['security-status'] }) - queryClient.invalidateQueries({ queryKey: ['settings'] }) + // Refresh all related queries to ensure consistency + await Promise.all([ + queryClient.invalidateQueries({ queryKey: ['security-status'] }), + queryClient.invalidateQueries({ queryKey: ['settings'] }), + fetchCrowdsecStatus(), + ]) if (typeof result === 'object' && result.lapi_ready === true) { toast.success('CrowdSec started and LAPI is ready')