Migrate analytics from SQLite to ClickHouse
SQLite was too slow for analytical aggregations on traffic_events and waf_events (millions of rows, GROUP BY, COUNT DISTINCT). ClickHouse is a columnar OLAP database purpose-built for this workload. - Add ClickHouse container to Docker Compose with health check - Create src/lib/clickhouse/client.ts with singleton client, table DDL, insert helpers, and all analytics query functions - Update log-parser.ts and waf-log-parser.ts to write to ClickHouse - Remove purgeOldEntries — ClickHouse TTL handles 90-day retention - Rewrite analytics-db.ts and waf-events.ts to query ClickHouse - Remove trafficEvents/wafEvents from SQLite schema, add migration - CLICKHOUSE_PASSWORD is required (no hardcoded default) - Update .env.example, README, and test infrastructure API response shapes are unchanged — no frontend modifications needed. Parse state (file offsets) remains in SQLite. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -52,6 +52,12 @@ services:
|
||||
OAUTH_TOKEN_URL: ${OAUTH_TOKEN_URL:-}
|
||||
OAUTH_USERINFO_URL: ${OAUTH_USERINFO_URL:-}
|
||||
OAUTH_ALLOW_AUTO_LINKING: ${OAUTH_ALLOW_AUTO_LINKING:-false}
|
||||
|
||||
# ClickHouse analytics database
|
||||
CLICKHOUSE_URL: ${CLICKHOUSE_URL:-http://clickhouse:8123}
|
||||
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-cpm}
|
||||
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:?ERROR - CLICKHOUSE_PASSWORD is required}
|
||||
CLICKHOUSE_DB: ${CLICKHOUSE_DB:-analytics}
|
||||
group_add:
|
||||
- "${CADDY_GID:-10000}" # caddy's GID — lets the web user read /logs/access.log
|
||||
volumes:
|
||||
@@ -61,6 +67,8 @@ services:
|
||||
depends_on:
|
||||
caddy:
|
||||
condition: service_healthy
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- caddy-network
|
||||
healthcheck:
|
||||
@@ -159,6 +167,30 @@ services:
|
||||
networks:
|
||||
- caddy-network
|
||||
|
||||
clickhouse:
|
||||
container_name: caddy-proxy-manager-clickhouse
|
||||
image: clickhouse/clickhouse-server:latest-alpine
|
||||
restart: always
|
||||
environment:
|
||||
CLICKHOUSE_DB: ${CLICKHOUSE_DB:-analytics}
|
||||
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-cpm}
|
||||
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:?ERROR - CLICKHOUSE_PASSWORD is required}
|
||||
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
|
||||
volumes:
|
||||
- clickhouse-data:/var/lib/clickhouse
|
||||
networks:
|
||||
- caddy-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "clickhouse-client --user ${CLICKHOUSE_USER:-cpm} --password ${CLICKHOUSE_PASSWORD:?ERROR - CLICKHOUSE_PASSWORD is required} --query 'SELECT 1'"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
ulimits:
|
||||
nofile:
|
||||
soft: 262144
|
||||
hard: 262144
|
||||
|
||||
geoipupdate:
|
||||
container_name: geoipupdate-${HOSTNAME}
|
||||
image: ghcr.io/maxmind/geoipupdate
|
||||
@@ -184,3 +216,4 @@ volumes:
|
||||
caddy-config:
|
||||
caddy-logs:
|
||||
geoip-data:
|
||||
clickhouse-data:
|
||||
|
||||
Reference in New Issue
Block a user