Add Caddy restart detection, auto-recovery, and metrics exposure

- Implemented health monitoring service that detects Caddy restarts/crashes
- Automatically reapplies configuration when Caddy restarts
- Added metrics settings UI for enabling Prometheus/Grafana monitoring
- Caddy metrics exposed on separate port (default: 9090) via reverse proxy
- Admin API (port 2019) kept internal-only for security
- Updated docker-compose.yml with metrics port documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
fuomag9
2025-11-09 18:40:39 +01:00
parent 1c09acf515
commit 88b25cee65
8 changed files with 318 additions and 23 deletions
+46 -3
View File
@@ -2,11 +2,12 @@
import { useFormState } from "react-dom";
import { Alert, Box, Button, Card, CardContent, Checkbox, FormControlLabel, Stack, TextField, Typography } from "@mui/material";
import type { GeneralSettings, AuthentikSettings } from "@/src/lib/settings";
import type { GeneralSettings, AuthentikSettings, MetricsSettings } from "@/src/lib/settings";
import {
updateCloudflareSettingsAction,
updateGeneralSettingsAction,
updateAuthentikSettingsAction
updateAuthentikSettingsAction,
updateMetricsSettingsAction
} from "./actions";
type Props = {
@@ -17,12 +18,14 @@ type Props = {
accountId?: string;
};
authentik: AuthentikSettings | null;
metrics: MetricsSettings | null;
};
export default function SettingsClient({ general, cloudflare, authentik }: Props) {
export default function SettingsClient({ general, cloudflare, authentik, metrics }: Props) {
const [generalState, generalFormAction] = useFormState(updateGeneralSettingsAction, null);
const [cloudflareState, cloudflareFormAction] = useFormState(updateCloudflareSettingsAction, null);
const [authentikState, authentikFormAction] = useFormState(updateAuthentikSettingsAction, null);
const [metricsState, metricsFormAction] = useFormState(updateMetricsSettingsAction, null);
return (
<Stack spacing={4} sx={{ width: "100%" }}>
@@ -158,6 +161,46 @@ export default function SettingsClient({ general, cloudflare, authentik }: Props
</Stack>
</CardContent>
</Card>
<Card>
<CardContent>
<Typography variant="h6" fontWeight={600} gutterBottom>
Metrics & Monitoring
</Typography>
<Typography color="text.secondary" variant="body2" sx={{ mb: 2 }}>
Enable Caddy metrics exposure for monitoring with Prometheus, Grafana, or other observability tools.
Metrics will be available at http://caddy:{metrics?.port ?? 9090}/metrics on a separate port (NOT the admin API port for security).
</Typography>
<Stack component="form" action={metricsFormAction} spacing={2}>
{metricsState?.message && (
<Alert severity={metricsState.success ? "success" : "warning"}>
{metricsState.message}
</Alert>
)}
<FormControlLabel
control={<Checkbox name="enabled" defaultChecked={metrics?.enabled ?? false} />}
label="Enable metrics endpoint"
/>
<TextField
name="port"
label="Metrics Port"
type="number"
defaultValue={metrics?.port ?? 9090}
helperText="Port to expose metrics on (default: 9090, separate from admin API on 2019)"
fullWidth
/>
<Alert severity="info">
After enabling metrics, configure your monitoring tool to scrape http://caddy-proxy-manager-caddy:{metrics?.port ?? 9090}/metrics from within the Docker network.
To expose metrics externally, add a port mapping like "{metrics?.port ?? 9090}:{metrics?.port ?? 9090}" in docker-compose.yml.
</Alert>
<Box sx={{ display: "flex", justifyContent: "flex-end" }}>
<Button type="submit" variant="contained">
Save metrics settings
</Button>
</Box>
</Stack>
</CardContent>
</Card>
</Stack>
);
}
+33 -1
View File
@@ -3,7 +3,7 @@
import { revalidatePath } from "next/cache";
import { requireAdmin } from "@/src/lib/auth";
import { applyCaddyConfig } from "@/src/lib/caddy";
import { getCloudflareSettings, saveCloudflareSettings, saveGeneralSettings, saveAuthentikSettings } from "@/src/lib/settings";
import { getCloudflareSettings, saveCloudflareSettings, saveGeneralSettings, saveAuthentikSettings, saveMetricsSettings } from "@/src/lib/settings";
type ActionResult = {
success: boolean;
@@ -86,3 +86,35 @@ export async function updateAuthentikSettingsAction(_prevState: ActionResult | n
return { success: false, message: error instanceof Error ? error.message : "Failed to save Authentik settings" };
}
}
export async function updateMetricsSettingsAction(_prevState: ActionResult | null, formData: FormData): Promise<ActionResult> {
try {
await requireAdmin();
const enabled = formData.get("enabled") === "on";
const portStr = formData.get("port") ? String(formData.get("port")).trim() : "";
const port = portStr && !isNaN(Number(portStr)) ? Number(portStr) : 9090;
await saveMetricsSettings({
enabled,
port
});
// Apply config to enable/disable metrics
try {
await applyCaddyConfig();
revalidatePath("/settings");
return { success: true, message: "Metrics settings saved and applied successfully" };
} catch (error) {
console.error("Failed to apply Caddy config:", error);
revalidatePath("/settings");
const errorMsg = error instanceof Error ? error.message : "Unknown error";
return {
success: true,
message: `Settings saved, but could not apply to Caddy: ${errorMsg}`
};
}
} catch (error) {
console.error("Failed to save metrics settings:", error);
return { success: false, message: error instanceof Error ? error.message : "Failed to save metrics settings" };
}
}
+5 -3
View File
@@ -1,14 +1,15 @@
import SettingsClient from "./SettingsClient";
import { getCloudflareSettings, getGeneralSettings, getAuthentikSettings } from "@/src/lib/settings";
import { getCloudflareSettings, getGeneralSettings, getAuthentikSettings, getMetricsSettings } from "@/src/lib/settings";
import { requireAdmin } from "@/src/lib/auth";
export default async function SettingsPage() {
await requireAdmin();
const [general, cloudflare, authentik] = await Promise.all([
const [general, cloudflare, authentik, metrics] = await Promise.all([
getGeneralSettings(),
getCloudflareSettings(),
getAuthentikSettings()
getAuthentikSettings(),
getMetricsSettings()
]);
return (
@@ -20,6 +21,7 @@ export default async function SettingsPage() {
accountId: cloudflare?.accountId
}}
authentik={authentik}
metrics={metrics}
/>
);
}
+3 -1
View File
@@ -59,8 +59,10 @@ services:
ports:
- "80:80"
- "443:443"
# Admin API only exposed on internal network for security
# Admin API (port 2019) is only exposed on internal network for security
# Web UI accesses via http://caddy:2019 internally
# Uncomment the line below to expose metrics externally for Grafana/Prometheus
# - "9090:9090" # Metrics available at http://localhost:9090/metrics (configure in Settings first)
environment:
# Primary domain for Caddy configuration
PRIMARY_DOMAIN: ${PRIMARY_DOMAIN:-caddyproxymanager.com}
+10
View File
@@ -37,5 +37,15 @@ export async function register() {
// Don't throw - Caddy might not be ready yet, or config might be applied later
// This ensures proxy hosts work after container restart
}
// Start Caddy health monitoring to detect restarts and auto-reapply config
const { startCaddyMonitoring } = await import("./lib/caddy-monitor");
try {
startCaddyMonitoring();
console.log("Caddy health monitoring started");
} catch (error) {
console.error("Failed to start Caddy health monitoring:", error);
// Don't throw - monitoring is a nice-to-have feature
}
}
}
+170
View File
@@ -0,0 +1,170 @@
/**
* Caddy health monitoring service
* Monitors Caddy for restarts/crashes and automatically reapplies configuration
*/
import { config } from "./config";
import { applyCaddyConfig } from "./caddy";
import { getSetting, setSetting } from "./settings";
type CaddyMonitorState = {
isHealthy: boolean;
lastConfigId: string | null;
lastCheckTime: number;
consecutiveFailures: number;
};
const HEALTH_CHECK_INTERVAL = 10000; // Check every 10 seconds
const MAX_CONSECUTIVE_FAILURES = 3; // Consider unhealthy after 3 failures
const REAPPLY_DELAY = 5000; // Wait 5 seconds after detecting restart before reapplying
let monitorState: CaddyMonitorState = {
isHealthy: false,
lastConfigId: null,
lastCheckTime: 0,
consecutiveFailures: 0
};
let monitorInterval: NodeJS.Timeout | null = null;
let isMonitoring = false;
/**
* Get the current Caddy config ID from the admin API
* This is used to detect when Caddy has restarted (config ID changes)
*/
async function getCaddyConfigId(): Promise<string | null> {
try {
const response = await fetch(`${config.caddyApiUrl}/config/`, {
method: "GET",
signal: AbortSignal.timeout(5000)
});
if (!response.ok) {
return null;
}
// Use ETag or compute a simple hash from the response
const etag = response.headers.get("etag");
if (etag) {
return etag;
}
// Fallback: use the config object's structure
const configData = await response.json();
// Check if config is essentially empty (default state after restart)
const isEmpty = !configData.apps || Object.keys(configData.apps).length === 0;
return isEmpty ? "empty" : "configured";
} catch (error) {
// Network error or timeout
return null;
}
}
/**
* Check if Caddy is healthy and detect restarts
*/
async function checkCaddyHealth(): Promise<void> {
const now = Date.now();
monitorState.lastCheckTime = now;
const currentConfigId = await getCaddyConfigId();
if (currentConfigId === null) {
// Caddy is not responding
monitorState.consecutiveFailures++;
if (monitorState.isHealthy && monitorState.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
console.warn(
`[CaddyMonitor] Caddy appears to be down (${monitorState.consecutiveFailures} consecutive failures)`
);
monitorState.isHealthy = false;
}
return;
}
// Caddy is responding
const wasUnhealthy = !monitorState.isHealthy;
monitorState.consecutiveFailures = 0;
monitorState.isHealthy = true;
// Detect restart: config ID changed to "empty" or Caddy was previously unhealthy
const hasRestarted =
(monitorState.lastConfigId !== null && currentConfigId === "empty") ||
(wasUnhealthy && currentConfigId === "empty");
if (hasRestarted) {
console.log("[CaddyMonitor] Caddy restart detected! Waiting before reapplying configuration...");
// Wait a bit for Caddy to fully initialize
setTimeout(async () => {
try {
console.log("[CaddyMonitor] Reapplying Caddy configuration after restart...");
await applyCaddyConfig();
console.log("[CaddyMonitor] Configuration reapplied successfully");
// Update the config ID after successful reapplication
const newConfigId = await getCaddyConfigId();
monitorState.lastConfigId = newConfigId;
} catch (error) {
console.error("[CaddyMonitor] Failed to reapply configuration after restart:", error);
// Will retry on next health check
}
}, REAPPLY_DELAY);
} else if (monitorState.lastConfigId === null) {
// First time seeing Caddy healthy
console.log("[CaddyMonitor] Caddy health monitoring initialized");
monitorState.lastConfigId = currentConfigId;
} else {
// Normal operation, update last known config ID
monitorState.lastConfigId = currentConfigId;
}
}
/**
* Start monitoring Caddy health
*/
export function startCaddyMonitoring(): void {
if (isMonitoring) {
console.log("[CaddyMonitor] Already monitoring");
return;
}
console.log(`[CaddyMonitor] Starting Caddy health monitoring (interval: ${HEALTH_CHECK_INTERVAL}ms)`);
isMonitoring = true;
// Do initial check immediately
checkCaddyHealth().catch((error) => {
console.error("[CaddyMonitor] Initial health check failed:", error);
});
// Set up periodic checks
monitorInterval = setInterval(() => {
checkCaddyHealth().catch((error) => {
console.error("[CaddyMonitor] Health check failed:", error);
});
}, HEALTH_CHECK_INTERVAL);
}
/**
* Stop monitoring Caddy health
*/
export function stopCaddyMonitoring(): void {
if (!isMonitoring) {
return;
}
console.log("[CaddyMonitor] Stopping Caddy health monitoring");
isMonitoring = false;
if (monitorInterval) {
clearInterval(monitorInterval);
monitorInterval = null;
}
}
/**
* Get current monitoring state (useful for debugging)
*/
export function getMonitorState(): Readonly<CaddyMonitorState> {
return { ...monitorState };
}
+38 -15
View File
@@ -3,7 +3,7 @@ import { join } from "node:path";
import crypto from "node:crypto";
import db, { nowIso } from "./db";
import { config } from "./config";
import { getCloudflareSettings, getGeneralSettings, setSetting } from "./settings";
import { getCloudflareSettings, getGeneralSettings, getMetricsSettings, setSetting } from "./settings";
import {
accessListEntries,
certificates,
@@ -925,23 +925,46 @@ async function buildCaddyDocument() {
const hasTls = tlsConnectionPolicies.length > 0;
const httpApp =
httpRoutes.length > 0
? {
http: {
servers: {
cpm: {
listen: hasTls ? [":80", ":443"] : [":80"],
routes: httpRoutes,
// Only disable automatic HTTPS if we have TLS automation policies
// This allows Caddy to handle HTTP-01 challenges for managed certificates
...(tlsApp ? {} : { automatic_https: { disable: true } }),
...(hasTls ? { tls_connection_policies: tlsConnectionPolicies } : {})
// Check if metrics should be enabled
const metricsSettings = await getMetricsSettings();
const metricsEnabled = metricsSettings?.enabled ?? false;
const metricsPort = metricsSettings?.port ?? 9090;
const servers: Record<string, unknown> = {};
// Main HTTP/HTTPS server for proxy hosts
if (httpRoutes.length > 0) {
servers.cpm = {
listen: hasTls ? [":80", ":443"] : [":80"],
routes: httpRoutes,
// Only disable automatic HTTPS if we have TLS automation policies
// This allows Caddy to handle HTTP-01 challenges for managed certificates
...(tlsApp ? {} : { automatic_https: { disable: true } }),
...(hasTls ? { tls_connection_policies: tlsConnectionPolicies } : {})
};
}
// Metrics server - exposes /metrics endpoint on separate port
if (metricsEnabled) {
servers.metrics = {
listen: [`:${metricsPort}`],
routes: [
{
handle: [
{
handler: "reverse_proxy",
upstreams: [{ dial: "localhost:2019" }],
rewrite: {
uri: "/metrics"
}
}
}
]
}
: {};
]
};
}
const httpApp = Object.keys(servers).length > 0 ? { http: { servers } } : {};
return {
admin: {
+13
View File
@@ -21,6 +21,11 @@ export type AuthentikSettings = {
authEndpoint?: string;
};
export type MetricsSettings = {
enabled: boolean;
port?: number; // Port to expose metrics on (default: 9090, separate from admin API)
};
export async function getSetting<T>(key: string): Promise<SettingValue<T>> {
const setting = await db.query.settings.findFirst({
where: (table, { eq }) => eq(table.key, key)
@@ -81,3 +86,11 @@ export async function getAuthentikSettings(): Promise<AuthentikSettings | null>
export async function saveAuthentikSettings(settings: AuthentikSettings): Promise<void> {
await setSetting("authentik", settings);
}
export async function getMetricsSettings(): Promise<MetricsSettings | null> {
return await getSetting<MetricsSettings>("metrics");
}
export async function saveMetricsSettings(settings: MetricsSettings): Promise<void> {
await setSetting("metrics", settings);
}