Migrate analytics from SQLite to ClickHouse

SQLite was too slow for analytical aggregations on traffic_events and
waf_events (millions of rows, GROUP BY, COUNT DISTINCT). ClickHouse is
a columnar OLAP database purpose-built for this workload.

- Add ClickHouse container to Docker Compose with health check
- Create src/lib/clickhouse/client.ts with singleton client, table DDL,
  insert helpers, and all analytics query functions
- Update log-parser.ts and waf-log-parser.ts to write to ClickHouse
- Remove purgeOldEntries — ClickHouse TTL handles 90-day retention
- Rewrite analytics-db.ts and waf-events.ts to query ClickHouse
- Remove trafficEvents/wafEvents from SQLite schema, add migration
- CLICKHOUSE_PASSWORD is required (no hardcoded default)
- Update .env.example, README, and test infrastructure

API response shapes are unchanged — no frontend modifications needed.
Parse state (file offsets) remains in SQLite.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
fuomag9
2026-04-10 00:05:38 +02:00
parent 833284efb1
commit e1c97038d4
21 changed files with 819 additions and 763 deletions

View File

@@ -107,6 +107,18 @@ OAUTH_ALLOW_AUTO_LINKING=false # Auto-link OAuth to accounts without pas
# LOGIN_BLOCK_MS=900000
# =============================================================================
# CLICKHOUSE ANALYTICS (OPTIONAL)
# =============================================================================
# ClickHouse is used for analytics data (traffic events, WAF events).
# Data is automatically retained for 90 days via ClickHouse TTL.
# CLICKHOUSE_PASSWORD is required — generate with: openssl rand -base64 32
CLICKHOUSE_PASSWORD=your-clickhouse-password-here
# CLICKHOUSE_URL=http://clickhouse:8123
# CLICKHOUSE_USER=cpm
# CLICKHOUSE_DB=analytics
# =============================================================================
# GEOIP UPDATE (OPTIONAL)
# =============================================================================

View File

@@ -12,7 +12,7 @@ Web interface for managing [Caddy Server](https://caddyserver.com/) reverse prox
## Overview
This project provides a web UI for Caddy Server, eliminating the need to manually edit JSON configurations or Caddyfiles. It handles reverse proxies, access lists, and certificate management through a shadcn/ui interface. Built with Next.js 16, React 19, shadcn/ui, Tailwind CSS, Drizzle ORM, and TypeScript.
This project provides a web UI for Caddy Server, eliminating the need to manually edit JSON configurations or Caddyfiles. It handles reverse proxies, access lists, and certificate management through a shadcn/ui interface. Built with Next.js 16, React 19, shadcn/ui, Tailwind CSS, Drizzle ORM, and TypeScript. Analytics data (traffic events, WAF events) is stored in ClickHouse for fast aggregation queries, with automatic 90-day retention via TTL.
---
@@ -93,6 +93,10 @@ Data persists in Docker volumes (caddy-manager-data, caddy-data, caddy-config, c
| `INSTANCE_SLAVES` | JSON array of slave instances for the master to push to | None | No |
| `INSTANCE_SYNC_INTERVAL` | Periodic sync interval in seconds (`0` = disabled) | `0` | No |
| `INSTANCE_SYNC_ALLOW_HTTP` | Allow sync over HTTP (for internal Docker networks) | `false` | No |
| `CLICKHOUSE_URL` | ClickHouse HTTP endpoint for analytics | `http://clickhouse:8123` | No |
| `CLICKHOUSE_USER` | ClickHouse username | `cpm` | No |
| `CLICKHOUSE_PASSWORD` | ClickHouse password (`openssl rand -base64 32`) | None | **Yes** |
| `CLICKHOUSE_DB` | ClickHouse database name | `analytics` | No |
**Production Requirements:**
- `SESSION_SECRET`: 32+ characters (`openssl rand -base64 32`)

View File

@@ -5,6 +5,7 @@
"": {
"name": "caddy-proxy-manager",
"dependencies": {
"@clickhouse/client": "^1.18.2",
"@radix-ui/react-accordion": "^1.2.12",
"@radix-ui/react-avatar": "^1.1.11",
"@radix-ui/react-checkbox": "^1.3.3",
@@ -134,6 +135,10 @@
"@babel/types": ["@babel/types@7.29.0", "", { "dependencies": { "@babel/helper-string-parser": "^7.27.1", "@babel/helper-validator-identifier": "^7.28.5" } }, "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A=="],
"@clickhouse/client": ["@clickhouse/client@1.18.2", "", { "dependencies": { "@clickhouse/client-common": "1.18.2" } }, "sha512-fuquQswRSHWM6D079ZeuGqkMOsqtcUPL06UdTnowmoeeYjVrqisfVmvnw8pc3OeKS4kVb91oygb/MfLDiMs0TQ=="],
"@clickhouse/client-common": ["@clickhouse/client-common@1.18.2", "", {}, "sha512-J0SG6q9V31ydxonglpj9xhNRsUxCsF71iEZ784yldqMYwsHixj/9xHFDgBDX3DuMiDx/kPDfXnf+pimp08wIBA=="],
"@date-fns/tz": ["@date-fns/tz@1.4.1", "", {}, "sha512-P5LUNhtbj6YfI3iJjw5EL9eUAG6OitD0W3fWQcpQjDRc/QIsL0tRNuO1PcDvPccWL1fSTXXdE1ds+l95DV/OFA=="],
"@dotenvx/dotenvx": ["@dotenvx/dotenvx@1.57.1", "", { "dependencies": { "commander": "^11.1.0", "dotenv": "^17.2.1", "eciesjs": "^0.4.10", "execa": "^5.1.1", "fdir": "^6.2.0", "ignore": "^5.3.0", "object-treeify": "1.1.33", "picomatch": "^4.0.2", "which": "^4.0.0" }, "bin": { "dotenvx": "src/cli/dotenvx.js" } }, "sha512-iKXuo8Nes9Ft4zF3AZOT4FHkl6OV8bHqn61a67qHokkBzSEurnKZAlOkT0FYrRNVGvE6nCfZMtYswyjfXCR1MQ=="],

View File

@@ -52,6 +52,12 @@ services:
OAUTH_TOKEN_URL: ${OAUTH_TOKEN_URL:-}
OAUTH_USERINFO_URL: ${OAUTH_USERINFO_URL:-}
OAUTH_ALLOW_AUTO_LINKING: ${OAUTH_ALLOW_AUTO_LINKING:-false}
# ClickHouse analytics database
CLICKHOUSE_URL: ${CLICKHOUSE_URL:-http://clickhouse:8123}
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-cpm}
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:?ERROR - CLICKHOUSE_PASSWORD is required}
CLICKHOUSE_DB: ${CLICKHOUSE_DB:-analytics}
group_add:
- "${CADDY_GID:-10000}" # caddy's GID — lets the web user read /logs/access.log
volumes:
@@ -61,6 +67,8 @@ services:
depends_on:
caddy:
condition: service_healthy
clickhouse:
condition: service_healthy
networks:
- caddy-network
healthcheck:
@@ -159,6 +167,30 @@ services:
networks:
- caddy-network
clickhouse:
container_name: caddy-proxy-manager-clickhouse
image: clickhouse/clickhouse-server:latest-alpine
restart: always
environment:
CLICKHOUSE_DB: ${CLICKHOUSE_DB:-analytics}
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-cpm}
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:?ERROR - CLICKHOUSE_PASSWORD is required}
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
- clickhouse-data:/var/lib/clickhouse
networks:
- caddy-network
healthcheck:
test: ["CMD-SHELL", "clickhouse-client --user ${CLICKHOUSE_USER:-cpm} --password ${CLICKHOUSE_PASSWORD:?ERROR - CLICKHOUSE_PASSWORD is required} --query 'SELECT 1'"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
ulimits:
nofile:
soft: 262144
hard: 262144
geoipupdate:
container_name: geoipupdate-${HOSTNAME}
image: ghcr.io/maxmind/geoipupdate
@@ -184,3 +216,4 @@ volumes:
caddy-config:
caddy-logs:
geoip-data:
clickhouse-data:

View File

@@ -0,0 +1,4 @@
-- Analytics data (traffic_events, waf_events) has been migrated to ClickHouse.
DROP TABLE IF EXISTS traffic_events;
--> statement-breakpoint
DROP TABLE IF EXISTS waf_events;

View File

@@ -134,6 +134,13 @@
"when": 1775600000000,
"tag": "0018_forward_auth_redirect_intents",
"breakpoints": true
},
{
"idx": 19,
"version": "6",
"when": 1775700000000,
"tag": "0019_drop_analytics_tables",
"breakpoints": true
}
]
}

19
package-lock.json generated
View File

@@ -8,6 +8,7 @@
"name": "caddy-proxy-manager",
"version": "1.0.0",
"dependencies": {
"@clickhouse/client": "^1.18.2",
"@radix-ui/react-accordion": "^1.2.12",
"@radix-ui/react-avatar": "^1.1.11",
"@radix-ui/react-checkbox": "^1.3.3",
@@ -579,6 +580,24 @@
"node": ">=6.9.0"
}
},
"node_modules/@clickhouse/client": {
"version": "1.18.2",
"resolved": "https://registry.npmjs.org/@clickhouse/client/-/client-1.18.2.tgz",
"integrity": "sha512-fuquQswRSHWM6D079ZeuGqkMOsqtcUPL06UdTnowmoeeYjVrqisfVmvnw8pc3OeKS4kVb91oygb/MfLDiMs0TQ==",
"license": "Apache-2.0",
"dependencies": {
"@clickhouse/client-common": "1.18.2"
},
"engines": {
"node": ">=16"
}
},
"node_modules/@clickhouse/client-common": {
"version": "1.18.2",
"resolved": "https://registry.npmjs.org/@clickhouse/client-common/-/client-common-1.18.2.tgz",
"integrity": "sha512-J0SG6q9V31ydxonglpj9xhNRsUxCsF71iEZ784yldqMYwsHixj/9xHFDgBDX3DuMiDx/kPDfXnf+pimp08wIBA==",
"license": "Apache-2.0"
},
"node_modules/@date-fns/tz": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/@date-fns/tz/-/tz-1.4.1.tgz",

View File

@@ -20,6 +20,7 @@
"test:e2e:headed": "playwright test --headed --config tests/playwright.config.ts"
},
"dependencies": {
"@clickhouse/client": "^1.18.2",
"@radix-ui/react-accordion": "^1.2.12",
"@radix-ui/react-avatar": "^1.1.11",
"@radix-ui/react-checkbox": "^1.3.3",

View File

@@ -48,6 +48,16 @@ export async function register() {
// Don't throw - monitoring is a nice-to-have feature
}
// Initialize ClickHouse analytics database
const { initClickHouse, closeClickHouse } = await import("./lib/clickhouse/client");
try {
await initClickHouse();
console.log("ClickHouse analytics initialized");
} catch (error) {
console.error("Failed to initialize ClickHouse:", error);
// Don't throw - analytics is non-critical
}
// Start log parser for analytics
const { initLogParser, parseNewLogEntries, stopLogParser } = await import("./lib/log-parser");
try {
@@ -62,6 +72,7 @@ export async function register() {
process.on("SIGTERM", () => {
stopLogParser();
clearInterval(logParserInterval);
closeClickHouse();
});
console.log("Log parser started");
} catch (error) {

View File

@@ -1,7 +1,24 @@
import { sql, and, gte, lte, eq, inArray } from 'drizzle-orm';
import db from './db';
import { trafficEvents, proxyHosts, wafEvents } from './db/schema';
import { existsSync } from 'node:fs';
import db from './db';
import { proxyHosts } from './db/schema';
import {
querySummary,
queryTimeline,
queryCountries,
queryProtocols,
queryUserAgents,
queryBlocked,
queryDistinctHosts,
type AnalyticsSummary as CHSummary,
type TimelineBucket,
type CountryStats,
type ProtoStats,
type UAStats,
type BlockedEvent,
type BlockedPage,
} from './clickhouse/client';
export type { TimelineBucket, CountryStats, ProtoStats, UAStats, BlockedEvent, BlockedPage };
export type Interval = '1h' | '12h' | '24h' | '7d' | '30d';
@@ -15,250 +32,46 @@ export const INTERVAL_SECONDS: Record<Interval, number> = {
'30d': 30 * 86400,
};
function buildWhere(from: number, to: number, hosts: string[]) {
const conditions = [gte(trafficEvents.ts, from), lte(trafficEvents.ts, to)];
if (hosts.length === 1) {
conditions.push(eq(trafficEvents.host, hosts[0]));
} else if (hosts.length > 1) {
conditions.push(inArray(trafficEvents.host, hosts));
}
return and(...conditions);
}
// ── Summary ──────────────────────────────────────────────────────────────────
export interface AnalyticsSummary {
totalRequests: number;
uniqueIps: number;
blockedRequests: number;
blockedPercent: number;
bytesServed: number;
export interface AnalyticsSummary extends CHSummary {
loggingDisabled: boolean;
}
export async function getAnalyticsSummary(from: number, to: number, hosts: string[]): Promise<AnalyticsSummary> {
const loggingDisabled = !existsSync(LOG_FILE);
const where = buildWhere(from, to, hosts);
const row = db
.select({
total: sql<number>`count(*)`,
uniqueIps: sql<number>`count(distinct ${trafficEvents.clientIp})`,
blocked: sql<number>`sum(case when ${trafficEvents.isBlocked} then 1 else 0 end)`,
bytes: sql<number>`sum(${trafficEvents.bytesSent})`,
})
.from(trafficEvents)
.where(where)
.get();
// Count WAF blocks in the same time window
const wafConditions = [gte(wafEvents.ts, from), lte(wafEvents.ts, to), eq(wafEvents.blocked, true)];
if (hosts.length === 1) {
wafConditions.push(eq(wafEvents.host, hosts[0]));
} else if (hosts.length > 1) {
wafConditions.push(inArray(wafEvents.host, hosts));
}
const wafRow = db
.select({ blocked: sql<number>`count(*)` })
.from(wafEvents)
.where(and(...wafConditions))
.get();
const total = row?.total ?? 0;
const geoBlocked = row?.blocked ?? 0;
const wafBlocked = wafRow?.blocked ?? 0;
const blocked = geoBlocked + wafBlocked;
return {
totalRequests: total,
uniqueIps: row?.uniqueIps ?? 0,
blockedRequests: blocked,
blockedPercent: total > 0 ? Math.round((blocked / total) * 1000) / 10 : 0,
bytesServed: row?.bytes ?? 0,
loggingDisabled,
};
const summary = await querySummary(from, to, hosts);
return { ...summary, loggingDisabled };
}
// ── Timeline ─────────────────────────────────────────────────────────────────
export interface TimelineBucket {
ts: number;
total: number;
blocked: number;
}
function bucketSizeForDuration(seconds: number): number {
if (seconds <= 3600) return 300;
if (seconds <= 43200) return 1800;
if (seconds <= 86400) return 3600;
if (seconds <= 7 * 86400) return 21600;
return 86400;
}
export async function getAnalyticsTimeline(from: number, to: number, hosts: string[]): Promise<TimelineBucket[]> {
const bucketSize = bucketSizeForDuration(to - from);
const where = buildWhere(from, to, hosts);
const rows = db
.select({
bucket: sql<number>`(${trafficEvents.ts} / ${sql.raw(String(bucketSize))})`,
total: sql<number>`count(*)`,
blocked: sql<number>`sum(case when ${trafficEvents.isBlocked} then 1 else 0 end)`,
})
.from(trafficEvents)
.where(where)
.groupBy(sql`(${trafficEvents.ts} / ${sql.raw(String(bucketSize))})`)
.orderBy(sql`(${trafficEvents.ts} / ${sql.raw(String(bucketSize))})`)
.all();
return rows.map((r) => ({
ts: r.bucket * bucketSize,
total: r.total,
blocked: r.blocked ?? 0,
}));
return queryTimeline(from, to, hosts);
}
// ── Countries ────────────────────────────────────────────────────────────────
export interface CountryStats {
countryCode: string;
total: number;
blocked: number;
}
export async function getAnalyticsCountries(from: number, to: number, hosts: string[]): Promise<CountryStats[]> {
const where = buildWhere(from, to, hosts);
const rows = db
.select({
countryCode: trafficEvents.countryCode,
total: sql<number>`count(*)`,
blocked: sql<number>`sum(case when ${trafficEvents.isBlocked} then 1 else 0 end)`,
})
.from(trafficEvents)
.where(where)
.groupBy(trafficEvents.countryCode)
.orderBy(sql`count(*) desc`)
.all();
return rows.map((r) => ({
countryCode: r.countryCode ?? 'XX',
total: r.total,
blocked: r.blocked ?? 0,
}));
return queryCountries(from, to, hosts);
}
// ── Protocols ────────────────────────────────────────────────────────────────
export interface ProtoStats {
proto: string;
count: number;
percent: number;
}
export async function getAnalyticsProtocols(from: number, to: number, hosts: string[]): Promise<ProtoStats[]> {
const where = buildWhere(from, to, hosts);
const rows = db
.select({
proto: trafficEvents.proto,
count: sql<number>`count(*)`,
})
.from(trafficEvents)
.where(where)
.groupBy(trafficEvents.proto)
.orderBy(sql`count(*) desc`)
.all();
const total = rows.reduce((s, r) => s + r.count, 0);
return rows.map((r) => ({
proto: r.proto || 'Unknown',
count: r.count,
percent: total > 0 ? Math.round((r.count / total) * 1000) / 10 : 0,
}));
return queryProtocols(from, to, hosts);
}
// ── User Agents ──────────────────────────────────────────────────────────────
export interface UAStats {
userAgent: string;
count: number;
percent: number;
}
export async function getAnalyticsUserAgents(from: number, to: number, hosts: string[]): Promise<UAStats[]> {
const where = buildWhere(from, to, hosts);
const rows = db
.select({
userAgent: trafficEvents.userAgent,
count: sql<number>`count(*)`,
})
.from(trafficEvents)
.where(where)
.groupBy(trafficEvents.userAgent)
.orderBy(sql`count(*) desc`)
.limit(10)
.all();
const total = rows.reduce((s, r) => s + r.count, 0);
return rows.map((r) => ({
userAgent: r.userAgent || 'Unknown',
count: r.count,
percent: total > 0 ? Math.round((r.count / total) * 1000) / 10 : 0,
}));
return queryUserAgents(from, to, hosts);
}
// ── Blocked events ───────────────────────────────────────────────────────────
export interface BlockedEvent {
id: number;
ts: number;
clientIp: string;
countryCode: string | null;
method: string;
uri: string;
status: number;
host: string;
}
export interface BlockedPage {
events: BlockedEvent[];
total: number;
page: number;
pages: number;
}
export async function getAnalyticsBlocked(from: number, to: number, hosts: string[], page: number): Promise<BlockedPage> {
const pageSize = 10;
const where = and(buildWhere(from, to, hosts), eq(trafficEvents.isBlocked, true));
const totalRow = db.select({ total: sql<number>`count(*)` }).from(trafficEvents).where(where).get();
const total = totalRow?.total ?? 0;
const pages = Math.max(1, Math.ceil(total / pageSize));
const safePage = Math.min(Math.max(1, page), pages);
const rows = db
.select({
id: trafficEvents.id,
ts: trafficEvents.ts,
clientIp: trafficEvents.clientIp,
countryCode: trafficEvents.countryCode,
method: trafficEvents.method,
uri: trafficEvents.uri,
status: trafficEvents.status,
host: trafficEvents.host,
})
.from(trafficEvents)
.where(where)
.orderBy(sql`${trafficEvents.ts} desc`)
.limit(pageSize)
.offset((safePage - 1) * pageSize)
.all();
return { events: rows, total, page: safePage, pages };
return queryBlocked(from, to, hosts, page);
}
// ── Hosts ────────────────────────────────────────────────────────────────────
@@ -266,11 +79,11 @@ export async function getAnalyticsBlocked(from: number, to: number, hosts: strin
export async function getAnalyticsHosts(): Promise<string[]> {
const hostSet = new Set<string>();
// Hosts that appear in traffic events
const trafficRows = db.selectDistinct({ host: trafficEvents.host }).from(trafficEvents).all();
for (const r of trafficRows) if (r.host) hostSet.add(r.host);
// Hosts from ClickHouse traffic events
const chHosts = await queryDistinctHosts();
for (const h of chHosts) if (h) hostSet.add(h);
// All domains configured on proxy hosts (even those with no traffic yet)
// All domains configured on proxy hosts (SQLite)
const proxyRows = db.select({ domains: proxyHosts.domains }).from(proxyHosts).all();
for (const r of proxyRows) {
try {

View File

@@ -0,0 +1,527 @@
import { createClient, type ClickHouseClient } from '@clickhouse/client';
// ── Configuration ───────────────────────────────────────────────────────────
const CH_URL = process.env.CLICKHOUSE_URL ?? 'http://clickhouse:8123';
const CH_USER = process.env.CLICKHOUSE_USER ?? 'cpm';
const CH_PASS = process.env.CLICKHOUSE_PASSWORD ?? '';
const CH_DB = process.env.CLICKHOUSE_DB ?? 'analytics';
// ── Singleton client ────────────────────────────────────────────────────────
let client: ClickHouseClient | null = null;
export function getClient(): ClickHouseClient {
if (!client) {
client = createClient({
url: CH_URL,
username: CH_USER,
password: CH_PASS,
database: CH_DB,
clickhouse_settings: {
async_insert: 1,
wait_for_async_insert: 0,
},
});
}
return client;
}
// ── Table creation ──────────────────────────────────────────────────────────
const TRAFFIC_EVENTS_DDL = `
CREATE TABLE IF NOT EXISTS traffic_events (
ts DateTime,
client_ip String,
country_code Nullable(String),
host String DEFAULT '',
method String DEFAULT '',
uri String DEFAULT '',
status UInt16 DEFAULT 0,
proto String DEFAULT '',
bytes_sent UInt64 DEFAULT 0,
user_agent String DEFAULT '',
is_blocked Bool DEFAULT false
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(ts)
ORDER BY (host, ts)
TTL ts + INTERVAL 90 DAY DELETE
SETTINGS index_granularity = 8192
`;
const WAF_EVENTS_DDL = `
CREATE TABLE IF NOT EXISTS waf_events (
ts DateTime,
host String DEFAULT '',
client_ip String,
country_code Nullable(String),
method String DEFAULT '',
uri String DEFAULT '',
rule_id Nullable(Int32),
rule_message Nullable(String),
severity Nullable(String),
raw_data Nullable(String),
blocked Bool DEFAULT true
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(ts)
ORDER BY (host, ts)
TTL ts + INTERVAL 90 DAY DELETE
SETTINGS index_granularity = 8192
`;
export async function initClickHouse(): Promise<void> {
const ch = getClient();
// Ensure database exists (the default user may need to create it)
await ch.command({ query: `CREATE DATABASE IF NOT EXISTS ${CH_DB}` });
await ch.command({ query: TRAFFIC_EVENTS_DDL });
await ch.command({ query: WAF_EVENTS_DDL });
}
export async function closeClickHouse(): Promise<void> {
if (client) {
await client.close();
client = null;
}
}
// ── Insert helpers ──────────────────────────────────────────────────────────
export interface TrafficEventRow {
ts: number;
client_ip: string;
country_code: string | null;
host: string;
method: string;
uri: string;
status: number;
proto: string;
bytes_sent: number;
user_agent: string;
is_blocked: boolean;
}
export interface WafEventRow {
ts: number;
host: string;
client_ip: string;
country_code: string | null;
rule_id: number | null;
rule_message: string | null;
severity: string | null;
raw_data: string | null;
blocked: boolean;
method: string;
uri: string;
}
export async function insertTrafficEvents(rows: TrafficEventRow[]): Promise<void> {
if (rows.length === 0) return;
const ch = getClient();
// Convert unix timestamp to ClickHouse DateTime string
const values = rows.map(r => ({
...r,
ts: new Date(r.ts * 1000).toISOString().replace('T', ' ').slice(0, 19),
is_blocked: r.is_blocked ? 1 : 0,
}));
await ch.insert({ table: 'traffic_events', values, format: 'JSONEachRow' });
}
export async function insertWafEvents(rows: WafEventRow[]): Promise<void> {
if (rows.length === 0) return;
const ch = getClient();
const values = rows.map(r => ({
...r,
ts: new Date(r.ts * 1000).toISOString().replace('T', ' ').slice(0, 19),
blocked: r.blocked ? 1 : 0,
}));
await ch.insert({ table: 'waf_events', values, format: 'JSONEachRow' });
}
// ── Query helpers ───────────────────────────────────────────────────────────
function hostFilter(hosts: string[]): string {
if (hosts.length === 0) return '';
const escaped = hosts.map(h => `'${h.replace(/'/g, "\\'")}'`).join(',');
return ` AND host IN (${escaped})`;
}
function timeFilter(from: number, to: number): string {
return `ts >= toDateTime(${from}) AND ts <= toDateTime(${to})`;
}
async function queryRows<T>(query: string): Promise<T[]> {
const ch = getClient();
const result = await ch.query({ query, format: 'JSONEachRow' });
return result.json<T>();
}
async function queryRow<T>(query: string): Promise<T | null> {
const rows = await queryRows<T>(query);
return rows[0] ?? null;
}
// ── Analytics queries (same signatures as old analytics-db.ts) ──────────────
export interface AnalyticsSummary {
totalRequests: number;
uniqueIps: number;
blockedRequests: number;
blockedPercent: number;
bytesServed: number;
}
export async function querySummary(from: number, to: number, hosts: string[]): Promise<AnalyticsSummary> {
const hf = hostFilter(hosts);
const traffic = await queryRow<{ total: string; unique_ips: string; blocked: string; bytes: string }>(`
SELECT
count() AS total,
uniq(client_ip) AS unique_ips,
countIf(is_blocked) AS blocked,
sum(bytes_sent) AS bytes
FROM traffic_events
WHERE ${timeFilter(from, to)}${hf}
`);
const wafRow = await queryRow<{ waf_blocked: string }>(`
SELECT count() AS waf_blocked
FROM waf_events
WHERE ${timeFilter(from, to)} AND blocked = true${hf}
`);
const total = Number(traffic?.total ?? 0);
const geoBlocked = Number(traffic?.blocked ?? 0);
const wafBlocked = Number(wafRow?.waf_blocked ?? 0);
const blocked = geoBlocked + wafBlocked;
return {
totalRequests: total,
uniqueIps: Number(traffic?.unique_ips ?? 0),
blockedRequests: blocked,
blockedPercent: total > 0 ? Math.round((blocked / total) * 1000) / 10 : 0,
bytesServed: Number(traffic?.bytes ?? 0),
};
}
export interface TimelineBucket {
ts: number;
total: number;
blocked: number;
}
export function bucketSizeForDuration(seconds: number): number {
if (seconds <= 3600) return 300;
if (seconds <= 43200) return 1800;
if (seconds <= 86400) return 3600;
if (seconds <= 7 * 86400) return 21600;
return 86400;
}
export async function queryTimeline(from: number, to: number, hosts: string[]): Promise<TimelineBucket[]> {
const bucketSize = bucketSizeForDuration(to - from);
const hf = hostFilter(hosts);
const rows = await queryRows<{ bucket: string; total: string; blocked: string }>(`
SELECT
intDiv(toUInt32(ts), ${bucketSize}) AS bucket,
count() AS total,
countIf(is_blocked) AS blocked
FROM traffic_events
WHERE ${timeFilter(from, to)}${hf}
GROUP BY bucket
ORDER BY bucket
`);
return rows.map(r => ({
ts: Number(r.bucket) * bucketSize,
total: Number(r.total),
blocked: Number(r.blocked),
}));
}
export interface CountryStats {
countryCode: string;
total: number;
blocked: number;
}
export async function queryCountries(from: number, to: number, hosts: string[]): Promise<CountryStats[]> {
const hf = hostFilter(hosts);
const rows = await queryRows<{ country_code: string | null; total: string; blocked: string }>(`
SELECT
country_code,
count() AS total,
countIf(is_blocked) AS blocked
FROM traffic_events
WHERE ${timeFilter(from, to)}${hf}
GROUP BY country_code
ORDER BY total DESC
`);
return rows.map(r => ({
countryCode: r.country_code ?? 'XX',
total: Number(r.total),
blocked: Number(r.blocked),
}));
}
export interface ProtoStats {
proto: string;
count: number;
percent: number;
}
export async function queryProtocols(from: number, to: number, hosts: string[]): Promise<ProtoStats[]> {
const hf = hostFilter(hosts);
const rows = await queryRows<{ proto: string; count: string }>(`
SELECT
proto,
count() AS count
FROM traffic_events
WHERE ${timeFilter(from, to)}${hf}
GROUP BY proto
ORDER BY count DESC
`);
const total = rows.reduce((s, r) => s + Number(r.count), 0);
return rows.map(r => ({
proto: r.proto || 'Unknown',
count: Number(r.count),
percent: total > 0 ? Math.round((Number(r.count) / total) * 1000) / 10 : 0,
}));
}
export interface UAStats {
userAgent: string;
count: number;
percent: number;
}
export async function queryUserAgents(from: number, to: number, hosts: string[]): Promise<UAStats[]> {
const hf = hostFilter(hosts);
const rows = await queryRows<{ user_agent: string; count: string }>(`
SELECT
user_agent,
count() AS count
FROM traffic_events
WHERE ${timeFilter(from, to)}${hf}
GROUP BY user_agent
ORDER BY count DESC
LIMIT 10
`);
const total = rows.reduce((s, r) => s + Number(r.count), 0);
return rows.map(r => ({
userAgent: r.user_agent || 'Unknown',
count: Number(r.count),
percent: total > 0 ? Math.round((Number(r.count) / total) * 1000) / 10 : 0,
}));
}
export interface BlockedEvent {
id: number;
ts: number;
clientIp: string;
countryCode: string | null;
method: string;
uri: string;
status: number;
host: string;
}
export interface BlockedPage {
events: BlockedEvent[];
total: number;
page: number;
pages: number;
}
export async function queryBlocked(from: number, to: number, hosts: string[], page: number): Promise<BlockedPage> {
const pageSize = 10;
const hf = hostFilter(hosts);
const where = `${timeFilter(from, to)} AND is_blocked = true${hf}`;
const totalRow = await queryRow<{ total: string }>(`SELECT count() AS total FROM traffic_events WHERE ${where}`);
const total = Number(totalRow?.total ?? 0);
const pages = Math.max(1, Math.ceil(total / pageSize));
const safePage = Math.min(Math.max(1, page), pages);
const rows = await queryRows<{
ts: string; client_ip: string; country_code: string | null;
method: string; uri: string; status: string; host: string;
}>(`
SELECT toUInt32(ts) AS ts, client_ip, country_code, method, uri, status, host
FROM traffic_events
WHERE ${where}
ORDER BY ts DESC
LIMIT ${pageSize} OFFSET ${(safePage - 1) * pageSize}
`);
return {
events: rows.map((r, i) => ({
id: (safePage - 1) * pageSize + i + 1,
ts: Number(r.ts),
clientIp: r.client_ip,
countryCode: r.country_code,
method: r.method,
uri: r.uri,
status: Number(r.status),
host: r.host,
})),
total,
page: safePage,
pages,
};
}
export async function queryDistinctHosts(): Promise<string[]> {
const rows = await queryRows<{ host: string }>(`SELECT DISTINCT host FROM traffic_events WHERE host != ''`);
return rows.map(r => r.host);
}
// ── WAF analytics queries ───────────────────────────────────────────────────
export async function queryWafCount(from: number, to: number): Promise<number> {
const row = await queryRow<{ value: string }>(`
SELECT count() AS value FROM waf_events WHERE ${timeFilter(from, to)}
`);
return Number(row?.value ?? 0);
}
export async function queryWafCountWithSearch(search?: string): Promise<number> {
const where = search ? wafSearchFilter(search) : '1=1';
const row = await queryRow<{ value: string }>(`SELECT count() AS value FROM waf_events WHERE ${where}`);
return Number(row?.value ?? 0);
}
function wafSearchFilter(search: string): string {
const escaped = search.replace(/'/g, "\\'");
return `(host ILIKE '%${escaped}%' OR client_ip ILIKE '%${escaped}%' OR uri ILIKE '%${escaped}%' OR rule_message ILIKE '%${escaped}%')`;
}
export interface TopWafRule {
ruleId: number;
count: number;
message: string | null;
}
export async function queryTopWafRules(from: number, to: number, limit = 10): Promise<TopWafRule[]> {
const rows = await queryRows<{ rule_id: string; count: string; message: string | null }>(`
SELECT
rule_id,
count() AS count,
any(rule_message) AS message
FROM waf_events
WHERE ${timeFilter(from, to)} AND rule_id IS NOT NULL
GROUP BY rule_id
ORDER BY count DESC
LIMIT ${limit}
`);
return rows
.filter(r => r.rule_id != null)
.map(r => ({ ruleId: Number(r.rule_id), count: Number(r.count), message: r.message ?? null }));
}
export interface TopWafRuleWithHosts {
ruleId: number;
count: number;
message: string | null;
hosts: { host: string; count: number }[];
}
export async function queryTopWafRulesWithHosts(from: number, to: number, limit = 10): Promise<TopWafRuleWithHosts[]> {
const topRules = await queryTopWafRules(from, to, limit);
if (topRules.length === 0) return [];
const ruleIds = topRules.map(r => r.ruleId).join(',');
const hostRows = await queryRows<{ rule_id: string; host: string; count: string }>(`
SELECT rule_id, host, count() AS count
FROM waf_events
WHERE ${timeFilter(from, to)} AND rule_id IN (${ruleIds})
GROUP BY rule_id, host
ORDER BY count DESC
`);
return topRules.map(rule => ({
...rule,
hosts: hostRows
.filter(r => Number(r.rule_id) === rule.ruleId)
.map(r => ({ host: r.host, count: Number(r.count) })),
}));
}
export async function queryWafCountries(from: number, to: number): Promise<{ countryCode: string; count: number }[]> {
const rows = await queryRows<{ country_code: string | null; count: string }>(`
SELECT country_code, count() AS count
FROM waf_events
WHERE ${timeFilter(from, to)}
GROUP BY country_code
ORDER BY count DESC
`);
return rows.map(r => ({ countryCode: r.country_code ?? 'XX', count: Number(r.count) }));
}
export async function queryWafRuleMessages(ruleIds: number[]): Promise<Record<number, string | null>> {
if (ruleIds.length === 0) return {};
const rows = await queryRows<{ rule_id: string; message: string | null }>(`
SELECT rule_id, any(rule_message) AS message
FROM waf_events
WHERE rule_id IN (${ruleIds.join(',')})
GROUP BY rule_id
`);
return Object.fromEntries(
rows.filter(r => r.rule_id != null).map(r => [Number(r.rule_id), r.message ?? null])
);
}
export interface WafEvent {
id: number;
ts: number;
host: string;
clientIp: string;
countryCode: string | null;
method: string;
uri: string;
ruleId: number | null;
ruleMessage: string | null;
severity: string | null;
rawData: string | null;
blocked: boolean;
}
export async function queryWafEvents(limit = 50, offset = 0, search?: string): Promise<WafEvent[]> {
const where = search ? wafSearchFilter(search) : '1=1';
const rows = await queryRows<{
ts: string; host: string; client_ip: string; country_code: string | null;
method: string; uri: string; rule_id: string | null; rule_message: string | null;
severity: string | null; raw_data: string | null; blocked: string;
}>(`
SELECT toUInt32(ts) AS ts, host, client_ip, country_code, method, uri,
rule_id, rule_message, severity, raw_data, blocked
FROM waf_events
WHERE ${where}
ORDER BY ts DESC
LIMIT ${limit} OFFSET ${offset}
`);
return rows.map((r, i) => ({
id: offset + i + 1,
ts: Number(r.ts),
host: r.host,
clientIp: r.client_ip,
countryCode: r.country_code ?? null,
method: r.method,
uri: r.uri,
ruleId: r.rule_id != null ? Number(r.rule_id) : null,
ruleMessage: r.rule_message ?? null,
severity: r.severity ?? null,
rawData: r.raw_data ?? null,
blocked: Boolean(Number(r.blocked)),
}));
}

View File

@@ -0,0 +1,39 @@
-- Reference DDL for ClickHouse analytics tables.
-- Tables are created programmatically by client.ts initClickHouse().
-- This file is for documentation only.
CREATE TABLE IF NOT EXISTS traffic_events (
ts DateTime,
client_ip String,
country_code Nullable(String),
host String DEFAULT '',
method String DEFAULT '',
uri String DEFAULT '',
status UInt16 DEFAULT 0,
proto String DEFAULT '',
bytes_sent UInt64 DEFAULT 0,
user_agent String DEFAULT '',
is_blocked Bool DEFAULT false
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(ts)
ORDER BY (host, ts)
TTL ts + INTERVAL 90 DAY DELETE
SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS waf_events (
ts DateTime,
host String DEFAULT '',
client_ip String,
country_code Nullable(String),
method String DEFAULT '',
uri String DEFAULT '',
rule_id Nullable(Int32),
rule_message Nullable(String),
severity Nullable(String),
raw_data Nullable(String),
blocked Bool DEFAULT true
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(ts)
ORDER BY (host, ts)
TTL ts + INTERVAL 90 DAY DELETE
SETTINGS index_granularity = 8192;

View File

@@ -220,55 +220,14 @@ export const linkingTokens = sqliteTable("linking_tokens", {
expiresAt: text("expires_at").notNull()
});
export const trafficEvents = sqliteTable(
'traffic_events',
{
id: integer('id').primaryKey({ autoIncrement: true }),
ts: integer('ts').notNull(),
clientIp: text('client_ip').notNull(),
countryCode: text('country_code'),
host: text('host').notNull().default(''),
method: text('method').notNull().default(''),
uri: text('uri').notNull().default(''),
status: integer('status').notNull().default(0),
proto: text('proto').notNull().default(''),
bytesSent: integer('bytes_sent').notNull().default(0),
userAgent: text('user_agent').notNull().default(''),
isBlocked: integer('is_blocked', { mode: 'boolean' }).notNull().default(false),
},
(table) => ({
tsIdx: index('idx_traffic_events_ts').on(table.ts),
hostTsIdx: index('idx_traffic_events_host_ts').on(table.host, table.ts),
})
);
// traffic_events and waf_events have been migrated to ClickHouse.
// See src/lib/clickhouse/client.ts for the ClickHouse schema.
export const logParseState = sqliteTable('log_parse_state', {
key: text('key').primaryKey(),
value: text('value').notNull(),
});
export const wafEvents = sqliteTable(
'waf_events',
{
id: integer('id').primaryKey({ autoIncrement: true }),
ts: integer('ts').notNull(),
host: text('host').notNull().default(''),
clientIp: text('client_ip').notNull(),
countryCode: text('country_code'),
method: text('method').notNull().default(''),
uri: text('uri').notNull().default(''),
ruleId: integer('rule_id'),
ruleMessage: text('rule_message'),
severity: text('severity'),
rawData: text('raw_data'),
blocked: integer('blocked', { mode: 'boolean' }).notNull().default(true),
},
(table) => ({
tsIdx: index('idx_waf_events_ts').on(table.ts),
hostTsIdx: index('idx_waf_events_host_ts').on(table.host, table.ts),
})
);
export const wafLogParseState = sqliteTable('waf_log_parse_state', {
key: text('key').primaryKey(),
value: text('value').notNull(),

View File

@@ -2,13 +2,13 @@ import { createReadStream, existsSync, statSync } from 'node:fs';
import { createInterface } from 'node:readline';
import maxmind, { CountryResponse } from 'maxmind';
import db from './db';
import { trafficEvents, logParseState } from './db/schema';
import { eq, sql } from 'drizzle-orm';
import { logParseState } from './db/schema';
import { eq } from 'drizzle-orm';
import { insertTrafficEvents, type TrafficEventRow } from './clickhouse/client';
const LOG_FILE = '/logs/access.log';
const GEOIP_DB = '/usr/share/GeoIP/GeoLite2-Country.mmdb';
const BATCH_SIZE = 500;
const RETENTION_DAYS = 90;
// GeoIP reader — null if mmdb not available
let geoReader: Awaited<ReturnType<typeof maxmind.open<CountryResponse>>> | null = null;
@@ -97,7 +97,7 @@ export function collectBlockedSignatures(lines: string[]): Set<string> {
return blocked;
}
export function parseLine(line: string, blocked: Set<string>): typeof trafficEvents.$inferInsert | null {
export function parseLine(line: string, blocked: Set<string>): TrafficEventRow | null {
let entry: CaddyLogEntry;
try {
entry = JSON.parse(line);
@@ -119,16 +119,16 @@ export function parseLine(line: string, blocked: Set<string>): typeof trafficEve
return {
ts,
clientIp,
countryCode: clientIp ? lookupCountry(clientIp) : null,
client_ip: clientIp,
country_code: clientIp ? lookupCountry(clientIp) : null,
host: req.host ?? '',
method,
uri,
status,
proto: req.proto ?? '',
bytesSent: entry.size ?? 0,
userAgent: req.headers?.['User-Agent']?.[0] ?? '',
isBlocked: blocked.has(key),
bytes_sent: entry.size ?? 0,
user_agent: req.headers?.['User-Agent']?.[0] ?? '',
is_blocked: blocked.has(key),
};
}
@@ -153,18 +153,12 @@ async function readLines(startOffset: number): Promise<{ lines: string[]; newOff
});
}
function insertBatch(rows: typeof trafficEvents.$inferInsert[]): void {
async function insertBatch(rows: TrafficEventRow[]): Promise<void> {
for (let i = 0; i < rows.length; i += BATCH_SIZE) {
db.insert(trafficEvents).values(rows.slice(i, i + BATCH_SIZE)).run();
await insertTrafficEvents(rows.slice(i, i + BATCH_SIZE));
}
}
function purgeOldEntries(): void {
const cutoff = Math.floor(Date.now() / 1000) - RETENTION_DAYS * 86400;
// Use parameterized query instead of string interpolation
db.run(sql`DELETE FROM traffic_events WHERE ts < ${cutoff}`);
}
// ── public API ───────────────────────────────────────────────────────────────
export async function initLogParser(): Promise<void> {
@@ -195,15 +189,12 @@ export async function parseNewLogEntries(): Promise<void> {
if (lines.length > 0) {
const blocked = collectBlockedSignatures(lines);
const rows = lines.map(l => parseLine(l, blocked)).filter(r => r !== null);
insertBatch(rows);
await insertBatch(rows);
console.log(`[log-parser] inserted ${rows.length} traffic events (${blocked.size} blocked)`);
}
setState('access_log_offset', String(newOffset));
setState('access_log_size', String(currentSize));
// Purge old entries once per run (cheap since it's indexed)
purgeOldEntries();
} catch (err) {
console.error('[log-parser] error during parse:', err);
}

View File

@@ -1,141 +1,42 @@
import db from "../db";
import { wafEvents } from "../db/schema";
import { desc, like, or, count, and, gte, lte, sql, inArray } from "drizzle-orm";
import {
queryWafCount,
queryWafCountWithSearch,
queryTopWafRules,
queryTopWafRulesWithHosts,
queryWafCountries,
queryWafRuleMessages,
queryWafEvents,
type WafEvent,
type TopWafRule,
type TopWafRuleWithHosts,
} from "../clickhouse/client";
export type WafEvent = {
id: number;
ts: number;
host: string;
clientIp: string;
countryCode: string | null;
method: string;
uri: string;
ruleId: number | null;
ruleMessage: string | null;
severity: string | null;
rawData: string | null;
blocked: boolean;
};
function buildSearch(search?: string) {
if (!search) return undefined;
return or(
like(wafEvents.host, `%${search}%`),
like(wafEvents.clientIp, `%${search}%`),
like(wafEvents.uri, `%${search}%`),
like(wafEvents.ruleMessage, `%${search}%`)
);
}
export type { WafEvent, TopWafRule, TopWafRuleWithHosts };
export async function countWafEvents(search?: string): Promise<number> {
const [row] = await db
.select({ value: count() })
.from(wafEvents)
.where(buildSearch(search));
return row?.value ?? 0;
return queryWafCountWithSearch(search);
}
export async function countWafEventsInRange(from: number, to: number): Promise<number> {
const [row] = await db
.select({ value: count() })
.from(wafEvents)
.where(and(gte(wafEvents.ts, from), lte(wafEvents.ts, to)));
return row?.value ?? 0;
return queryWafCount(from, to);
}
export type TopWafRule = { ruleId: number; count: number; message: string | null };
export async function getTopWafRules(from: number, to: number, limit = 10): Promise<TopWafRule[]> {
const rows = await db
.select({
ruleId: wafEvents.ruleId,
count: count(),
message: sql<string | null>`MAX(${wafEvents.ruleMessage})`,
})
.from(wafEvents)
.where(and(gte(wafEvents.ts, from), lte(wafEvents.ts, to), sql`${wafEvents.ruleId} IS NOT NULL`))
.groupBy(wafEvents.ruleId)
.orderBy(desc(count()))
.limit(limit);
return rows
.filter((r): r is typeof r & { ruleId: number } => r.ruleId != null)
.map((r) => ({ ruleId: r.ruleId, count: r.count, message: r.message ?? null }));
return queryTopWafRules(from, to, limit);
}
export type TopWafRuleWithHosts = {
ruleId: number;
count: number;
message: string | null;
hosts: { host: string; count: number }[];
};
export async function getTopWafRulesWithHosts(from: number, to: number, limit = 10): Promise<TopWafRuleWithHosts[]> {
const topRules = await getTopWafRules(from, to, limit);
if (topRules.length === 0) return [];
const ruleIds = topRules.map(r => r.ruleId);
const hostRows = await db
.select({ ruleId: wafEvents.ruleId, host: wafEvents.host, count: count() })
.from(wafEvents)
.where(and(gte(wafEvents.ts, from), lte(wafEvents.ts, to), inArray(wafEvents.ruleId, ruleIds)))
.groupBy(wafEvents.ruleId, wafEvents.host)
.orderBy(desc(count()));
return topRules.map(rule => ({
...rule,
hosts: hostRows
.filter(r => r.ruleId === rule.ruleId)
.map(r => ({ host: r.host, count: r.count })),
}));
return queryTopWafRulesWithHosts(from, to, limit);
}
export async function getWafEventCountries(from: number, to: number): Promise<{ countryCode: string; count: number }[]> {
const rows = await db
.select({ countryCode: wafEvents.countryCode, count: count() })
.from(wafEvents)
.where(and(gte(wafEvents.ts, from), lte(wafEvents.ts, to)))
.groupBy(wafEvents.countryCode)
.orderBy(desc(count()));
return rows.map(r => ({ countryCode: r.countryCode ?? 'XX', count: r.count }));
return queryWafCountries(from, to);
}
export async function getWafRuleMessages(ruleIds: number[]): Promise<Record<number, string | null>> {
if (ruleIds.length === 0) return {};
const rows = await db
.select({
ruleId: wafEvents.ruleId,
message: sql<string | null>`MAX(${wafEvents.ruleMessage})`,
})
.from(wafEvents)
.where(inArray(wafEvents.ruleId, ruleIds))
.groupBy(wafEvents.ruleId);
return Object.fromEntries(
rows.filter((r): r is typeof r & { ruleId: number } => r.ruleId != null)
.map((r) => [r.ruleId, r.message ?? null])
);
return queryWafRuleMessages(ruleIds);
}
export async function listWafEvents(limit = 50, offset = 0, search?: string): Promise<WafEvent[]> {
const rows = await db
.select()
.from(wafEvents)
.where(buildSearch(search))
.orderBy(desc(wafEvents.ts))
.limit(limit)
.offset(offset);
return rows.map((r) => ({
id: r.id,
ts: r.ts,
host: r.host,
clientIp: r.clientIp,
countryCode: r.countryCode ?? null,
method: r.method,
uri: r.uri,
ruleId: r.ruleId ?? null,
ruleMessage: r.ruleMessage ?? null,
severity: r.severity ?? null,
rawData: r.rawData ?? null,
blocked: r.blocked ?? true,
}));
return queryWafEvents(limit, offset, search);
}

View File

@@ -2,14 +2,14 @@ import { createReadStream, existsSync, statSync } from 'node:fs';
import { createInterface } from 'node:readline';
import maxmind, { CountryResponse } from 'maxmind';
import db from './db';
import { wafEvents, wafLogParseState } from './db/schema';
import { eq, sql } from 'drizzle-orm';
import { wafLogParseState } from './db/schema';
import { eq } from 'drizzle-orm';
import { insertWafEvents, type WafEventRow } from './clickhouse/client';
const AUDIT_LOG = '/logs/waf-audit.log';
const RULES_LOG = '/logs/waf-rules.log';
const GEOIP_DB = '/usr/share/GeoIP/GeoLite2-Country.mmdb';
const BATCH_SIZE = 200;
const RETENTION_DAYS = 90;
let geoReader: Awaited<ReturnType<typeof maxmind.open<CountryResponse>>> | null = null;
const geoCache = new Map<string, string | null>();
@@ -130,7 +130,7 @@ interface CorazaAuditEntry {
};
}
function parseLine(line: string, ruleMap: Map<string, RuleInfo>): typeof wafEvents.$inferInsert | null {
function parseLine(line: string, ruleMap: Map<string, RuleInfo>): WafEventRow | null {
let entry: CorazaAuditEntry;
try {
entry = JSON.parse(line);
@@ -172,14 +172,14 @@ function parseLine(line: string, ruleMap: Map<string, RuleInfo>): typeof wafEven
return {
ts,
host,
clientIp,
countryCode: lookupCountry(clientIp),
client_ip: clientIp,
country_code: lookupCountry(clientIp),
method: req.method ?? '',
uri: req.uri ?? '',
ruleId: ruleInfo?.ruleId ?? null,
ruleMessage: ruleInfo?.ruleMessage ?? null,
rule_id: ruleInfo?.ruleId ?? null,
rule_message: ruleInfo?.ruleMessage ?? null,
severity: ruleInfo?.severity ?? null,
rawData: line,
raw_data: line,
blocked,
};
}
@@ -205,18 +205,12 @@ async function readAuditLog(startOffset: number): Promise<{ lines: string[]; new
});
}
function insertBatch(rows: typeof wafEvents.$inferInsert[]): void {
async function insertBatch(rows: WafEventRow[]): Promise<void> {
for (let i = 0; i < rows.length; i += BATCH_SIZE) {
db.insert(wafEvents).values(rows.slice(i, i + BATCH_SIZE)).run();
await insertWafEvents(rows.slice(i, i + BATCH_SIZE));
}
}
function purgeOldEntries(): void {
const cutoff = Math.floor(Date.now() / 1000) - RETENTION_DAYS * 86400;
// Use parameterized query instead of string interpolation
db.run(sql`DELETE FROM waf_events WHERE ts < ${cutoff}`);
}
// ── public API ────────────────────────────────────────────────────────────────
export async function initWafLogParser(): Promise<void> {
@@ -258,17 +252,15 @@ export async function parseNewWafLogEntries(): Promise<void> {
const { lines, newOffset } = await readAuditLog(startOffset);
if (lines.length > 0) {
const rows = lines.map(l => parseLine(l, ruleMap)).filter((r): r is typeof wafEvents.$inferInsert => r !== null);
const rows = lines.map(l => parseLine(l, ruleMap)).filter((r): r is WafEventRow => r !== null);
if (rows.length > 0) {
insertBatch(rows);
await insertBatch(rows);
console.log(`[waf-log-parser] inserted ${rows.length} WAF events`);
}
}
setState('waf_audit_log_offset', String(newOffset));
setState('waf_audit_log_size', String(currentSize));
purgeOldEntries();
} catch (err) {
console.error('[waf-log-parser] error during parse:', err);
}

View File

@@ -4,6 +4,7 @@ services:
SESSION_SECRET: "test-session-secret-32chars!xxxY"
ADMIN_USERNAME: testadmin
ADMIN_PASSWORD: "TestPassword2026!"
CLICKHOUSE_PASSWORD: "test-clickhouse-password-2026"
BASE_URL: http://localhost:3000
NEXTAUTH_URL: http://localhost:3000
# OAuth via Dex OIDC provider
@@ -16,6 +17,9 @@ services:
OAUTH_TOKEN_URL: http://dex:5556/dex/token
OAUTH_USERINFO_URL: http://dex:5556/dex/userinfo
OAUTH_ALLOW_AUTO_LINKING: "true"
clickhouse:
environment:
CLICKHOUSE_PASSWORD: "test-clickhouse-password-2026"
caddy:
ports:
- "80:80"
@@ -84,3 +88,5 @@ volumes:
name: caddy-logs-test
geoip-data:
name: geoip-data-test
clickhouse-data:
name: clickhouse-data-test

View File

@@ -35,7 +35,7 @@ async function waitForHealth(): Promise<void> {
console.error('[global-setup] Health check timed out. Container logs:');
try {
execFileSync('docker', [...COMPOSE_ARGS, 'logs', '--tail=50'], { stdio: 'inherit', cwd: process.cwd() });
execFileSync('docker', [...COMPOSE_ARGS, 'logs', '--tail=50'], { stdio: 'inherit', cwd: process.cwd(), env: { ...process.env, CLICKHOUSE_PASSWORD: 'test-clickhouse-password-2026' } });
} catch { /* ignore */ }
throw new Error(`App did not become healthy within ${MAX_WAIT_MS}ms`);
@@ -74,6 +74,7 @@ export default async function globalSetup() {
], {
stdio: 'inherit',
cwd: process.cwd(),
env: { ...process.env, CLICKHOUSE_PASSWORD: 'test-clickhouse-password-2026' },
});
console.log('[global-setup] Containers up. Waiting for /api/health...');

View File

@@ -14,6 +14,7 @@ export default async function globalTeardown() {
execFileSync('docker', [...COMPOSE_ARGS, 'down', '-v', '--remove-orphans'], {
stdio: 'inherit',
cwd: process.cwd(),
env: { ...process.env, CLICKHOUSE_PASSWORD: 'test-clickhouse-password-2026' },
});
} catch (err) {
console.warn('[global-teardown] docker compose down failed:', err);

View File

@@ -1,10 +1,6 @@
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { createTestDb, type TestDb } from '../helpers/db';
import { trafficEvents } from '@/src/lib/db/schema';
import { sql, and, gte, lte, eq, inArray } from 'drizzle-orm';
import { describe, it, expect, vi } from 'vitest';
// Mock dependencies so we can import collectBlockedSignatures and parseLine.
// These run in the log-parser module scope on import.
vi.mock('@/src/lib/db', () => ({
default: {
select: vi.fn().mockReturnValue({ from: vi.fn().mockReturnValue({ where: vi.fn().mockReturnValue({ get: vi.fn().mockReturnValue(null) }) }) }),
@@ -19,351 +15,81 @@ vi.mock('node:fs', () => ({
statSync: vi.fn().mockReturnValue({ size: 0 }),
createReadStream: vi.fn(),
}));
vi.mock('@/src/lib/clickhouse/client', () => ({
insertTrafficEvents: vi.fn().mockResolvedValue(undefined),
}));
import { collectBlockedSignatures, parseLine } from '@/src/lib/log-parser';
let db: TestDb;
const NOW = Math.floor(Date.now() / 1000);
beforeEach(() => {
db = createTestDb();
});
/** Insert a traffic event row with sensible defaults. */
function insertEvent(overrides: Partial<typeof trafficEvents.$inferInsert> = {}) {
db.insert(trafficEvents).values({
ts: NOW,
clientIp: '1.2.3.4',
countryCode: 'DE',
host: 'example.com',
method: 'GET',
uri: '/',
status: 200,
proto: 'HTTP/2.0',
bytesSent: 1024,
userAgent: 'Mozilla/5.0',
isBlocked: false,
...overrides,
}).run();
}
// ── Helpers that mirror analytics-db.ts queries ─────────────────────────────
// We duplicate the SQL here intentionally — if the production queries ever
// drift from what the schema supports, these tests will catch it.
function buildWhere(from: number, to: number, hosts: string[]) {
const conditions = [gte(trafficEvents.ts, from), lte(trafficEvents.ts, to)];
if (hosts.length === 1) {
conditions.push(eq(trafficEvents.host, hosts[0]));
} else if (hosts.length > 1) {
conditions.push(inArray(trafficEvents.host, hosts));
}
return and(...conditions);
}
function getSummary(from: number, to: number, hosts: string[] = []) {
const where = buildWhere(from, to, hosts);
return db
.select({
total: sql<number>`count(*)`,
uniqueIps: sql<number>`count(distinct ${trafficEvents.clientIp})`,
blocked: sql<number>`sum(case when ${trafficEvents.isBlocked} then 1 else 0 end)`,
bytes: sql<number>`sum(${trafficEvents.bytesSent})`,
})
.from(trafficEvents)
.where(where)
.get();
}
function getCountries(from: number, to: number, hosts: string[] = []) {
const where = buildWhere(from, to, hosts);
return db
.select({
countryCode: trafficEvents.countryCode,
total: sql<number>`count(*)`,
blocked: sql<number>`sum(case when ${trafficEvents.isBlocked} then 1 else 0 end)`,
})
.from(trafficEvents)
.where(where)
.groupBy(trafficEvents.countryCode)
.orderBy(sql`count(*) desc`)
.all();
}
function getTimeline(from: number, to: number, bucketSize: number, hosts: string[] = []) {
const where = buildWhere(from, to, hosts);
return db
.select({
bucket: sql<number>`(${trafficEvents.ts} / ${sql.raw(String(bucketSize))})`,
total: sql<number>`count(*)`,
blocked: sql<number>`sum(case when ${trafficEvents.isBlocked} then 1 else 0 end)`,
})
.from(trafficEvents)
.where(where)
.groupBy(sql`(${trafficEvents.ts} / ${sql.raw(String(bucketSize))})`)
.orderBy(sql`(${trafficEvents.ts} / ${sql.raw(String(bucketSize))})`)
.all();
}
function getBlockedEvents(from: number, to: number, hosts: string[] = []) {
const where = and(buildWhere(from, to, hosts), eq(trafficEvents.isBlocked, true));
return db
.select({
id: trafficEvents.id,
ts: trafficEvents.ts,
clientIp: trafficEvents.clientIp,
countryCode: trafficEvents.countryCode,
host: trafficEvents.host,
status: trafficEvents.status,
})
.from(trafficEvents)
.where(where)
.orderBy(sql`${trafficEvents.ts} desc`)
.all();
}
// ── Tests ────────────────────────────────────────────────────────────────────
describe('analytics blocked counting', () => {
const from = NOW - 3600;
const to = NOW + 3600;
describe('summary', () => {
it('counts zero blocked when no events are blocked', () => {
insertEvent({ isBlocked: false });
insertEvent({ isBlocked: false });
const row = getSummary(from, to);
expect(row!.total).toBe(2);
expect(row!.blocked).toBe(0);
describe('log-parser blocked detection', () => {
describe('collectBlockedSignatures', () => {
it('collects signatures from caddy-blocker "request blocked" entries', () => {
const lines = [
JSON.stringify({ ts: NOW + 0.01, msg: 'request blocked', plugin: 'caddy-blocker', client_ip: '1.2.3.4', method: 'GET', uri: '/secret' }),
JSON.stringify({ ts: NOW + 0.5, msg: 'handled request', status: 200, request: { client_ip: '5.6.7.8' } }),
];
const set = collectBlockedSignatures(lines);
expect(set.size).toBe(1);
});
it('counts geo-blocked requests correctly', () => {
insertEvent({ isBlocked: true, status: 403, clientIp: '5.6.7.8', countryCode: 'CN' });
insertEvent({ isBlocked: true, status: 403, clientIp: '9.10.11.12', countryCode: 'RU' });
insertEvent({ isBlocked: false, status: 200 });
const row = getSummary(from, to);
expect(row!.total).toBe(3);
expect(row!.blocked).toBe(2);
it('returns empty set when no blocked entries', () => {
const lines = [
JSON.stringify({ ts: NOW, msg: 'handled request', status: 200, request: { client_ip: '1.2.3.4' } }),
];
expect(collectBlockedSignatures(lines).size).toBe(0);
});
it('filters by host', () => {
insertEvent({ isBlocked: true, host: 'blocked.com' });
insertEvent({ isBlocked: false, host: 'blocked.com' });
insertEvent({ isBlocked: true, host: 'other.com' });
const row = getSummary(from, to, ['blocked.com']);
expect(row!.total).toBe(2);
expect(row!.blocked).toBe(1);
it('ignores non-caddy-blocker entries', () => {
const lines = [
JSON.stringify({ ts: NOW, msg: 'request blocked', plugin: 'other-plugin', client_ip: '1.2.3.4', method: 'GET', uri: '/' }),
];
expect(collectBlockedSignatures(lines).size).toBe(0);
});
});
describe('countries', () => {
it('shows blocked count per country', () => {
insertEvent({ isBlocked: true, countryCode: 'CN' });
insertEvent({ isBlocked: true, countryCode: 'CN' });
insertEvent({ isBlocked: false, countryCode: 'CN' });
insertEvent({ isBlocked: true, countryCode: 'RU' });
insertEvent({ isBlocked: false, countryCode: 'US' });
const rows = getCountries(from, to);
const cn = rows.find(r => r.countryCode === 'CN');
const ru = rows.find(r => r.countryCode === 'RU');
const us = rows.find(r => r.countryCode === 'US');
expect(cn!.total).toBe(3);
expect(cn!.blocked).toBe(2);
expect(ru!.total).toBe(1);
expect(ru!.blocked).toBe(1);
expect(us!.total).toBe(1);
expect(us!.blocked).toBe(0);
});
});
describe('timeline', () => {
it('shows blocked count per time bucket', () => {
const bucketSize = 3600;
const bucket1Ts = NOW;
const bucket2Ts = NOW + 3600;
insertEvent({ ts: bucket1Ts, isBlocked: true });
insertEvent({ ts: bucket1Ts, isBlocked: true });
insertEvent({ ts: bucket1Ts, isBlocked: false });
insertEvent({ ts: bucket2Ts, isBlocked: true });
insertEvent({ ts: bucket2Ts, isBlocked: false });
insertEvent({ ts: bucket2Ts, isBlocked: false });
const rows = getTimeline(from, to + 7200, bucketSize);
expect(rows.length).toBe(2);
const b1 = rows[0];
expect(b1.total).toBe(3);
expect(b1.blocked).toBe(2);
const b2 = rows[1];
expect(b2.total).toBe(3);
expect(b2.blocked).toBe(1);
});
});
describe('blocked events list', () => {
it('returns only blocked events', () => {
insertEvent({ isBlocked: true, clientIp: '5.6.7.8', countryCode: 'CN', status: 403 });
insertEvent({ isBlocked: false, clientIp: '1.2.3.4', countryCode: 'US', status: 200 });
insertEvent({ isBlocked: true, clientIp: '9.10.11.12', countryCode: 'RU', status: 403 });
const rows = getBlockedEvents(from, to);
expect(rows.length).toBe(2);
expect(rows.every(r => r.status === 403)).toBe(true);
const ips = rows.map(r => r.clientIp).sort();
expect(ips).toEqual(['5.6.7.8', '9.10.11.12']);
});
it('returns empty list when nothing is blocked', () => {
insertEvent({ isBlocked: false });
insertEvent({ isBlocked: false });
const rows = getBlockedEvents(from, to);
expect(rows.length).toBe(0);
});
it('filters blocked events by host', () => {
insertEvent({ isBlocked: true, host: 'target.com' });
insertEvent({ isBlocked: true, host: 'other.com' });
const rows = getBlockedEvents(from, to, ['target.com']);
expect(rows.length).toBe(1);
expect(rows[0].host).toBe('target.com');
});
});
describe('full pipeline: raw log lines → parseLine → DB → analytics queries', () => {
it('geo-blocked request flows through the entire pipeline', () => {
const ts = NOW;
// Simulate the two log entries that Caddy writes to access.log for a
// geo-blocked request: the blocker's "request blocked" entry followed
// by the standard "handled request" entry.
describe('parseLine', () => {
it('marks blocked request as is_blocked=true', () => {
const blockedLogLine = JSON.stringify({
ts: ts + 0.01,
msg: 'request blocked',
plugin: 'caddy-blocker',
client_ip: '203.0.113.5',
method: 'GET',
uri: '/secret',
ts: NOW + 0.01, msg: 'request blocked', plugin: 'caddy-blocker',
client_ip: '203.0.113.5', method: 'GET', uri: '/secret',
});
const handledBlockedLine = JSON.stringify({
ts: ts + 0.99,
msg: 'handled request',
status: 403,
size: 9,
request: {
client_ip: '203.0.113.5',
host: 'secure.example.com',
method: 'GET',
uri: '/secret',
proto: 'HTTP/2.0',
headers: { 'User-Agent': ['BlockedBot/1.0'] },
},
const handledLine = JSON.stringify({
ts: NOW + 0.99, msg: 'handled request', status: 403, size: 9,
request: { client_ip: '203.0.113.5', host: 'example.com', method: 'GET', uri: '/secret', proto: 'HTTP/2.0' },
});
// A normal allowed request in the same log batch.
const allowedLine = JSON.stringify({
ts: ts + 1.5,
msg: 'handled request',
status: 200,
size: 4096,
request: {
client_ip: '198.51.100.1',
host: 'secure.example.com',
method: 'GET',
uri: '/',
proto: 'HTTP/2.0',
headers: { 'User-Agent': ['GoodBot/2.0'] },
},
});
// Step 1: collectBlockedSignatures builds the blocked set from all lines
const lines = [blockedLogLine, handledBlockedLine, allowedLine];
const blockedSet = collectBlockedSignatures(lines);
expect(blockedSet.size).toBe(1);
// Step 2: parseLine processes each "handled request" line
const blockedRow = parseLine(handledBlockedLine, blockedSet);
const allowedRow = parseLine(allowedLine, blockedSet);
expect(blockedRow).not.toBeNull();
expect(allowedRow).not.toBeNull();
expect(blockedRow!.isBlocked).toBe(true);
expect(allowedRow!.isBlocked).toBe(false);
// Step 3: Insert into DB (as the real log parser does)
db.insert(trafficEvents).values(blockedRow!).run();
db.insert(trafficEvents).values(allowedRow!).run();
// Step 4: Verify all analytics queries reflect the blocked request
// Summary
const summary = getSummary(from, to);
expect(summary!.total).toBe(2);
expect(summary!.blocked).toBe(1);
// Countries (GeoIP is mocked so countryCode is null → grouped together)
const countries = getCountries(from, to);
const group = countries[0];
expect(group.total).toBe(2);
expect(group.blocked).toBe(1);
// Timeline
const timeline = getTimeline(from, to, 3600);
expect(timeline.length).toBe(1);
expect(timeline[0].total).toBe(2);
expect(timeline[0].blocked).toBe(1);
// Blocked events list
const blocked = getBlockedEvents(from, to);
expect(blocked.length).toBe(1);
expect(blocked[0].clientIp).toBe('203.0.113.5');
expect(blocked[0].status).toBe(403);
// Filtered by host
const filteredSummary = getSummary(from, to, ['secure.example.com']);
expect(filteredSummary!.blocked).toBe(1);
const wrongHost = getSummary(from, to, ['other.com']);
expect(wrongHost!.total).toBe(0);
const blockedSet = collectBlockedSignatures([blockedLogLine, handledLine]);
const row = parseLine(handledLine, blockedSet);
expect(row).not.toBeNull();
expect(row!.is_blocked).toBe(true);
expect(row!.client_ip).toBe('203.0.113.5');
expect(row!.host).toBe('example.com');
});
it('non-blocked request does not appear in blocked stats', () => {
const ts = NOW;
// Only a normal "handled request" — no "request blocked" entry
const normalLine = JSON.stringify({
ts: ts + 0.5,
msg: 'handled request',
status: 200,
size: 2048,
request: {
client_ip: '198.51.100.1',
host: 'open.example.com',
method: 'GET',
uri: '/public',
proto: 'HTTP/2.0',
},
it('marks normal request as is_blocked=false', () => {
const line = JSON.stringify({
ts: NOW, msg: 'handled request', status: 200, size: 1024,
request: { client_ip: '1.2.3.4', host: 'example.com', method: 'GET', uri: '/', proto: 'HTTP/2.0' },
});
const row = parseLine(line, new Set());
expect(row).not.toBeNull();
expect(row!.is_blocked).toBe(false);
});
const lines = [normalLine];
const blockedSet = collectBlockedSignatures(lines);
expect(blockedSet.size).toBe(0);
it('skips non-handled-request entries', () => {
const line = JSON.stringify({ ts: NOW, msg: 'request blocked', plugin: 'caddy-blocker' });
expect(parseLine(line, new Set())).toBeNull();
});
const row = parseLine(normalLine, blockedSet);
expect(row!.isBlocked).toBe(false);
db.insert(trafficEvents).values(row!).run();
const summary = getSummary(from, to);
expect(summary!.total).toBe(1);
expect(summary!.blocked).toBe(0);
const blocked = getBlockedEvents(from, to);
expect(blocked.length).toBe(0);
it('extracts user agent from headers', () => {
const line = JSON.stringify({
ts: NOW, msg: 'handled request', status: 200, size: 0,
request: { client_ip: '1.2.3.4', host: 'example.com', method: 'GET', uri: '/', proto: 'HTTP/1.1', headers: { 'User-Agent': ['TestBot/1.0'] } },
});
const row = parseLine(line, new Set());
expect(row!.user_agent).toBe('TestBot/1.0');
});
});
});

View File

@@ -24,6 +24,10 @@ vi.mock('node:fs', () => ({
createReadStream: vi.fn(),
}));
vi.mock('@/src/lib/clickhouse/client', () => ({
insertTrafficEvents: vi.fn().mockResolvedValue(undefined),
}));
import { parseLine, collectBlockedSignatures } from '@/src/lib/log-parser';
describe('log-parser', () => {
@@ -112,15 +116,15 @@ describe('log-parser', () => {
const result = parseLine(entry, emptyBlocked);
expect(result).not.toBeNull();
expect(result!.ts).toBe(1700000100);
expect(result!.clientIp).toBe('10.0.0.1');
expect(result!.client_ip).toBe('10.0.0.1');
expect(result!.host).toBe('example.com');
expect(result!.method).toBe('GET');
expect(result!.uri).toBe('/path');
expect(result!.status).toBe(200);
expect(result!.proto).toBe('HTTP/1.1');
expect(result!.bytesSent).toBe(1234);
expect(result!.userAgent).toBe('Mozilla/5.0');
expect(result!.isBlocked).toBe(false);
expect(result!.bytes_sent).toBe(1234);
expect(result!.user_agent).toBe('Mozilla/5.0');
expect(result!.is_blocked).toBe(false);
});
it('returns null for entries with wrong msg field', () => {
@@ -136,11 +140,11 @@ describe('log-parser', () => {
const entry = JSON.stringify({ ts: 1700000100, msg: 'handled request', status: 200 });
const result = parseLine(entry, emptyBlocked);
expect(result).not.toBeNull();
expect(result!.clientIp).toBe('');
expect(result!.client_ip).toBe('');
expect(result!.host).toBe('');
expect(result!.method).toBe('');
expect(result!.uri).toBe('');
expect(result!.userAgent).toBe('');
expect(result!.user_agent).toBe('');
});
it('marks isBlocked true when signature matches blocked set', () => {
@@ -153,7 +157,7 @@ describe('log-parser', () => {
});
const blocked = new Set([`${ts}|1.2.3.4|GET|/evil`]);
const result = parseLine(entry, blocked);
expect(result!.isBlocked).toBe(true);
expect(result!.is_blocked).toBe(true);
});
it('uses remote_ip as fallback when client_ip is missing', () => {
@@ -164,7 +168,7 @@ describe('log-parser', () => {
request: { remote_ip: '9.8.7.6', host: 'test.com', method: 'GET', uri: '/' },
});
const result = parseLine(entry, emptyBlocked);
expect(result!.clientIp).toBe('9.8.7.6');
expect(result!.client_ip).toBe('9.8.7.6');
});
it('countryCode is null when GeoIP reader is not initialized', () => {
@@ -175,7 +179,7 @@ describe('log-parser', () => {
request: { client_ip: '8.8.8.8', host: 'test.com', method: 'GET', uri: '/' },
});
const result = parseLine(entry, emptyBlocked);
expect(result!.countryCode).toBeNull();
expect(result!.country_code).toBeNull();
});
});
});