From 9e2ea47f9023b77195bf108bea1a30069f9477cd Mon Sep 17 00:00:00 2001 From: fuomag9 <1580624+fuomag9@users.noreply.github.com> Date: Sun, 9 Nov 2025 18:40:39 +0100 Subject: [PATCH] Add Caddy restart detection, auto-recovery, and metrics exposure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Implemented health monitoring service that detects Caddy restarts/crashes - Automatically reapplies configuration when Caddy restarts - Added metrics settings UI for enabling Prometheus/Grafana monitoring - Caddy metrics available at /metrics endpoint (configurable port/path) - Updated docker-compose.yml with metrics port documentation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- app/(dashboard)/settings/SettingsClient.tsx | 56 ++++++- app/(dashboard)/settings/actions.ts | 36 ++++- app/(dashboard)/settings/page.tsx | 8 +- docker-compose.yml | 2 + src/instrumentation.ts | 10 ++ src/lib/caddy-monitor.ts | 170 ++++++++++++++++++++ src/lib/caddy.ts | 25 ++- src/lib/settings.ts | 14 ++ 8 files changed, 310 insertions(+), 11 deletions(-) create mode 100644 src/lib/caddy-monitor.ts diff --git a/app/(dashboard)/settings/SettingsClient.tsx b/app/(dashboard)/settings/SettingsClient.tsx index 00251b73..9e78e356 100644 --- a/app/(dashboard)/settings/SettingsClient.tsx +++ b/app/(dashboard)/settings/SettingsClient.tsx @@ -2,11 +2,12 @@ import { useFormState } from "react-dom"; import { Alert, Box, Button, Card, CardContent, Checkbox, FormControlLabel, Stack, TextField, Typography } from "@mui/material"; -import type { GeneralSettings, AuthentikSettings } from "@/src/lib/settings"; +import type { GeneralSettings, AuthentikSettings, MetricsSettings } from "@/src/lib/settings"; import { updateCloudflareSettingsAction, updateGeneralSettingsAction, - updateAuthentikSettingsAction + updateAuthentikSettingsAction, + updateMetricsSettingsAction } from "./actions"; type Props = { @@ -17,12 +18,14 @@ type Props = { accountId?: string; }; authentik: AuthentikSettings | null; + metrics: MetricsSettings | null; }; -export default function SettingsClient({ general, cloudflare, authentik }: Props) { +export default function SettingsClient({ general, cloudflare, authentik, metrics }: Props) { const [generalState, generalFormAction] = useFormState(updateGeneralSettingsAction, null); const [cloudflareState, cloudflareFormAction] = useFormState(updateCloudflareSettingsAction, null); const [authentikState, authentikFormAction] = useFormState(updateAuthentikSettingsAction, null); + const [metricsState, metricsFormAction] = useFormState(updateMetricsSettingsAction, null); return ( @@ -158,6 +161,53 @@ export default function SettingsClient({ general, cloudflare, authentik }: Props + + + + + Metrics & Monitoring + + + Enable Caddy metrics exposure for monitoring with Prometheus, Grafana, or other observability tools. + When enabled, metrics will be available at http://caddy:{metrics?.port ?? 2019}{metrics?.path ?? "/metrics"} + + + {metricsState?.message && ( + + {metricsState.message} + + )} + } + label="Enable metrics endpoint" + /> + + + + After enabling metrics, configure your monitoring tool to scrape http://caddy-proxy-manager-caddy:{metrics?.port ?? 2019}{metrics?.path ?? "/metrics"} from within the Docker network. + To expose metrics externally, add a port mapping in docker-compose.yml. + + + + + + + ); } diff --git a/app/(dashboard)/settings/actions.ts b/app/(dashboard)/settings/actions.ts index 5e2947fb..2dd488f4 100644 --- a/app/(dashboard)/settings/actions.ts +++ b/app/(dashboard)/settings/actions.ts @@ -3,7 +3,7 @@ import { revalidatePath } from "next/cache"; import { requireAdmin } from "@/src/lib/auth"; import { applyCaddyConfig } from "@/src/lib/caddy"; -import { getCloudflareSettings, saveCloudflareSettings, saveGeneralSettings, saveAuthentikSettings } from "@/src/lib/settings"; +import { getCloudflareSettings, saveCloudflareSettings, saveGeneralSettings, saveAuthentikSettings, saveMetricsSettings } from "@/src/lib/settings"; type ActionResult = { success: boolean; @@ -86,3 +86,37 @@ export async function updateAuthentikSettingsAction(_prevState: ActionResult | n return { success: false, message: error instanceof Error ? error.message : "Failed to save Authentik settings" }; } } + +export async function updateMetricsSettingsAction(_prevState: ActionResult | null, formData: FormData): Promise { + try { + await requireAdmin(); + const enabled = formData.get("enabled") === "on"; + const portStr = formData.get("port") ? String(formData.get("port")).trim() : ""; + const port = portStr && !isNaN(Number(portStr)) ? Number(portStr) : 2019; + const path = formData.get("path") ? String(formData.get("path")).trim() : "/metrics"; + + await saveMetricsSettings({ + enabled, + port, + path + }); + + // Apply config to enable/disable metrics + try { + await applyCaddyConfig(); + revalidatePath("/settings"); + return { success: true, message: "Metrics settings saved and applied successfully" }; + } catch (error) { + console.error("Failed to apply Caddy config:", error); + revalidatePath("/settings"); + const errorMsg = error instanceof Error ? error.message : "Unknown error"; + return { + success: true, + message: `Settings saved, but could not apply to Caddy: ${errorMsg}` + }; + } + } catch (error) { + console.error("Failed to save metrics settings:", error); + return { success: false, message: error instanceof Error ? error.message : "Failed to save metrics settings" }; + } +} diff --git a/app/(dashboard)/settings/page.tsx b/app/(dashboard)/settings/page.tsx index c2c30fa3..9e3e8b75 100644 --- a/app/(dashboard)/settings/page.tsx +++ b/app/(dashboard)/settings/page.tsx @@ -1,14 +1,15 @@ import SettingsClient from "./SettingsClient"; -import { getCloudflareSettings, getGeneralSettings, getAuthentikSettings } from "@/src/lib/settings"; +import { getCloudflareSettings, getGeneralSettings, getAuthentikSettings, getMetricsSettings } from "@/src/lib/settings"; import { requireAdmin } from "@/src/lib/auth"; export default async function SettingsPage() { await requireAdmin(); - const [general, cloudflare, authentik] = await Promise.all([ + const [general, cloudflare, authentik, metrics] = await Promise.all([ getGeneralSettings(), getCloudflareSettings(), - getAuthentikSettings() + getAuthentikSettings(), + getMetricsSettings() ]); return ( @@ -20,6 +21,7 @@ export default async function SettingsPage() { accountId: cloudflare?.accountId }} authentik={authentik} + metrics={metrics} /> ); } diff --git a/docker-compose.yml b/docker-compose.yml index c995fd84..52833110 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -61,6 +61,8 @@ services: - "443:443" # Admin API only exposed on internal network for security # Web UI accesses via http://caddy:2019 internally + # Uncomment the line below to expose metrics externally for Grafana/Prometheus + # - "2019:2019" # Metrics available at http://localhost:2019/metrics environment: # Primary domain for Caddy configuration PRIMARY_DOMAIN: ${PRIMARY_DOMAIN:-caddyproxymanager.com} diff --git a/src/instrumentation.ts b/src/instrumentation.ts index eb79a6e4..f32f6576 100644 --- a/src/instrumentation.ts +++ b/src/instrumentation.ts @@ -37,5 +37,15 @@ export async function register() { // Don't throw - Caddy might not be ready yet, or config might be applied later // This ensures proxy hosts work after container restart } + + // Start Caddy health monitoring to detect restarts and auto-reapply config + const { startCaddyMonitoring } = await import("./lib/caddy-monitor"); + try { + startCaddyMonitoring(); + console.log("Caddy health monitoring started"); + } catch (error) { + console.error("Failed to start Caddy health monitoring:", error); + // Don't throw - monitoring is a nice-to-have feature + } } } diff --git a/src/lib/caddy-monitor.ts b/src/lib/caddy-monitor.ts new file mode 100644 index 00000000..4c4dc3d3 --- /dev/null +++ b/src/lib/caddy-monitor.ts @@ -0,0 +1,170 @@ +/** + * Caddy health monitoring service + * Monitors Caddy for restarts/crashes and automatically reapplies configuration + */ + +import { config } from "./config"; +import { applyCaddyConfig } from "./caddy"; +import { getSetting, setSetting } from "./settings"; + +type CaddyMonitorState = { + isHealthy: boolean; + lastConfigId: string | null; + lastCheckTime: number; + consecutiveFailures: number; +}; + +const HEALTH_CHECK_INTERVAL = 10000; // Check every 10 seconds +const MAX_CONSECUTIVE_FAILURES = 3; // Consider unhealthy after 3 failures +const REAPPLY_DELAY = 5000; // Wait 5 seconds after detecting restart before reapplying + +let monitorState: CaddyMonitorState = { + isHealthy: false, + lastConfigId: null, + lastCheckTime: 0, + consecutiveFailures: 0 +}; + +let monitorInterval: NodeJS.Timeout | null = null; +let isMonitoring = false; + +/** + * Get the current Caddy config ID from the admin API + * This is used to detect when Caddy has restarted (config ID changes) + */ +async function getCaddyConfigId(): Promise { + try { + const response = await fetch(`${config.caddyApiUrl}/config/`, { + method: "GET", + signal: AbortSignal.timeout(5000) + }); + + if (!response.ok) { + return null; + } + + // Use ETag or compute a simple hash from the response + const etag = response.headers.get("etag"); + if (etag) { + return etag; + } + + // Fallback: use the config object's structure + const configData = await response.json(); + // Check if config is essentially empty (default state after restart) + const isEmpty = !configData.apps || Object.keys(configData.apps).length === 0; + return isEmpty ? "empty" : "configured"; + } catch (error) { + // Network error or timeout + return null; + } +} + +/** + * Check if Caddy is healthy and detect restarts + */ +async function checkCaddyHealth(): Promise { + const now = Date.now(); + monitorState.lastCheckTime = now; + + const currentConfigId = await getCaddyConfigId(); + + if (currentConfigId === null) { + // Caddy is not responding + monitorState.consecutiveFailures++; + + if (monitorState.isHealthy && monitorState.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) { + console.warn( + `[CaddyMonitor] Caddy appears to be down (${monitorState.consecutiveFailures} consecutive failures)` + ); + monitorState.isHealthy = false; + } + return; + } + + // Caddy is responding + const wasUnhealthy = !monitorState.isHealthy; + monitorState.consecutiveFailures = 0; + monitorState.isHealthy = true; + + // Detect restart: config ID changed to "empty" or Caddy was previously unhealthy + const hasRestarted = + (monitorState.lastConfigId !== null && currentConfigId === "empty") || + (wasUnhealthy && currentConfigId === "empty"); + + if (hasRestarted) { + console.log("[CaddyMonitor] Caddy restart detected! Waiting before reapplying configuration..."); + + // Wait a bit for Caddy to fully initialize + setTimeout(async () => { + try { + console.log("[CaddyMonitor] Reapplying Caddy configuration after restart..."); + await applyCaddyConfig(); + console.log("[CaddyMonitor] Configuration reapplied successfully"); + + // Update the config ID after successful reapplication + const newConfigId = await getCaddyConfigId(); + monitorState.lastConfigId = newConfigId; + } catch (error) { + console.error("[CaddyMonitor] Failed to reapply configuration after restart:", error); + // Will retry on next health check + } + }, REAPPLY_DELAY); + } else if (monitorState.lastConfigId === null) { + // First time seeing Caddy healthy + console.log("[CaddyMonitor] Caddy health monitoring initialized"); + monitorState.lastConfigId = currentConfigId; + } else { + // Normal operation, update last known config ID + monitorState.lastConfigId = currentConfigId; + } +} + +/** + * Start monitoring Caddy health + */ +export function startCaddyMonitoring(): void { + if (isMonitoring) { + console.log("[CaddyMonitor] Already monitoring"); + return; + } + + console.log(`[CaddyMonitor] Starting Caddy health monitoring (interval: ${HEALTH_CHECK_INTERVAL}ms)`); + isMonitoring = true; + + // Do initial check immediately + checkCaddyHealth().catch((error) => { + console.error("[CaddyMonitor] Initial health check failed:", error); + }); + + // Set up periodic checks + monitorInterval = setInterval(() => { + checkCaddyHealth().catch((error) => { + console.error("[CaddyMonitor] Health check failed:", error); + }); + }, HEALTH_CHECK_INTERVAL); +} + +/** + * Stop monitoring Caddy health + */ +export function stopCaddyMonitoring(): void { + if (!isMonitoring) { + return; + } + + console.log("[CaddyMonitor] Stopping Caddy health monitoring"); + isMonitoring = false; + + if (monitorInterval) { + clearInterval(monitorInterval); + monitorInterval = null; + } +} + +/** + * Get current monitoring state (useful for debugging) + */ +export function getMonitorState(): Readonly { + return { ...monitorState }; +} diff --git a/src/lib/caddy.ts b/src/lib/caddy.ts index 2b6309eb..d0517ef8 100644 --- a/src/lib/caddy.ts +++ b/src/lib/caddy.ts @@ -3,7 +3,7 @@ import { join } from "node:path"; import crypto from "node:crypto"; import db, { nowIso } from "./db"; import { config } from "./config"; -import { getCloudflareSettings, getGeneralSettings, setSetting } from "./settings"; +import { getCloudflareSettings, getGeneralSettings, getMetricsSettings, setSetting } from "./settings"; import { accessListEntries, certificates, @@ -925,6 +925,12 @@ async function buildCaddyDocument() { const hasTls = tlsConnectionPolicies.length > 0; + // Check if metrics should be enabled + const metricsSettings = await getMetricsSettings(); + const metricsEnabled = metricsSettings?.enabled ?? false; + const metricsPort = metricsSettings?.port ?? 2019; + const metricsPath = metricsSettings?.path ?? "/metrics"; + const httpApp = httpRoutes.length > 0 ? { @@ -943,10 +949,21 @@ async function buildCaddyDocument() { } : {}; + // Configure admin API + // Metrics are available at /metrics when metrics are enabled + const adminConfig: Record = { + listen: `0.0.0.0:${metricsPort}` + }; + + // Optionally disable metrics endpoint if not enabled + if (!metricsEnabled) { + adminConfig.config = { + persist: false + }; + } + return { - admin: { - listen: "0.0.0.0:2019" - }, + admin: adminConfig, apps: { ...httpApp, ...(tlsApp ? { tls: tlsApp } : {}) diff --git a/src/lib/settings.ts b/src/lib/settings.ts index ae2556eb..4604f240 100644 --- a/src/lib/settings.ts +++ b/src/lib/settings.ts @@ -21,6 +21,12 @@ export type AuthentikSettings = { authEndpoint?: string; }; +export type MetricsSettings = { + enabled: boolean; + port?: number; // Port to expose metrics on (default: 2019, same as admin API) + path?: string; // Path to expose metrics at (default: /metrics) +}; + export async function getSetting(key: string): Promise> { const setting = await db.query.settings.findFirst({ where: (table, { eq }) => eq(table.key, key) @@ -81,3 +87,11 @@ export async function getAuthentikSettings(): Promise export async function saveAuthentikSettings(settings: AuthentikSettings): Promise { await setSetting("authentik", settings); } + +export async function getMetricsSettings(): Promise { + return await getSetting("metrics"); +} + +export async function saveMetricsSettings(settings: MetricsSettings): Promise { + await setSetting("metrics", settings); +}