fix(uptime): implement initial uptime bootstrap logic and related tests

This commit is contained in:
GitHub Actions
2026-03-02 03:40:08 +00:00
parent 8cbd907d82
commit 10259146df
6 changed files with 333 additions and 12 deletions
@@ -9,6 +9,7 @@ import (
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
@@ -68,6 +69,33 @@ func setupTestRouterWithReferenceTables(t *testing.T) (*gin.Engine, *gorm.DB) {
return r, db
}
func setupTestRouterWithUptime(t *testing.T) (*gin.Engine, *gorm.DB) {
t.Helper()
dsn := "file:" + t.Name() + "?mode=memory&cache=shared"
db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{})
require.NoError(t, err)
require.NoError(t, db.AutoMigrate(
&models.ProxyHost{},
&models.Location{},
&models.Notification{},
&models.NotificationProvider{},
&models.UptimeMonitor{},
&models.UptimeHeartbeat{},
&models.UptimeHost{},
&models.Setting{},
))
ns := services.NewNotificationService(db)
us := services.NewUptimeService(db, ns)
h := NewProxyHostHandler(db, nil, ns, us)
r := gin.New()
api := r.Group("/api/v1")
h.RegisterRoutes(api)
return r, db
}
func TestProxyHostHandler_ResolveAccessListReference_TargetedBranches(t *testing.T) {
t.Parallel()
@@ -201,6 +229,35 @@ func TestProxyHostCreate_ReferenceResolution_TargetedBranches(t *testing.T) {
})
}
func TestProxyHostCreate_TriggersAsyncUptimeSyncWhenServiceConfigured(t *testing.T) {
t.Parallel()
router, db := setupTestRouterWithUptime(t)
upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
t.Cleanup(upstream.Close)
domain := strings.TrimPrefix(upstream.URL, "http://")
body := fmt.Sprintf(`{"name":"Uptime Hook","domain_names":"%s","forward_scheme":"http","forward_host":"app-service","forward_port":8080,"enabled":true}`, domain)
req := httptest.NewRequest(http.MethodPost, "/api/v1/proxy-hosts", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
require.Equal(t, http.StatusCreated, resp.Code)
var created models.ProxyHost
require.NoError(t, db.Where("domain_names = ?", domain).First(&created).Error)
var count int64
require.Eventually(t, func() bool {
db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", created.ID).Count(&count)
return count > 0
}, 3*time.Second, 50*time.Millisecond)
}
func TestProxyHostLifecycle(t *testing.T) {
t.Parallel()
router, _ := setupTestRouter(t)
+29 -12
View File
@@ -29,6 +29,29 @@ import (
_ "github.com/Wikid82/charon/backend/pkg/dnsprovider/custom"
)
type uptimeBootstrapService interface {
CleanupStaleFailureCounts() error
SyncMonitors() error
CheckAll()
}
func runInitialUptimeBootstrap(enabled bool, uptimeService uptimeBootstrapService, logWarn func(error, string), logError func(error, string)) {
if !enabled {
return
}
if err := uptimeService.CleanupStaleFailureCounts(); err != nil && logWarn != nil {
logWarn(err, "Failed to cleanup stale failure counts")
}
if err := uptimeService.SyncMonitors(); err != nil && logError != nil {
logError(err, "Failed to sync monitors")
}
// Run initial check immediately after sync to avoid the 90s blind window.
uptimeService.CheckAll()
}
// Register wires up API routes and performs automatic migrations.
func Register(router *gin.Engine, db *gorm.DB, cfg config.Config) error {
// Caddy Manager - created early so it can be used by settings handlers for config reload
@@ -464,18 +487,12 @@ func RegisterWithDeps(router *gin.Engine, db *gorm.DB, cfg config.Config, caddyM
enabled = s.Value == "true"
}
if enabled {
// Clean up stale failure counts from historical bugs before first sync
if err := uptimeService.CleanupStaleFailureCounts(); err != nil {
logger.Log().WithError(err).Warn("Failed to cleanup stale failure counts")
}
if err := uptimeService.SyncMonitors(); err != nil {
logger.Log().WithError(err).Error("Failed to sync monitors")
}
// Run initial check immediately after sync to avoid the 90s blind window
uptimeService.CheckAll()
}
runInitialUptimeBootstrap(
enabled,
uptimeService,
func(err error, msg string) { logger.Log().WithError(err).Warn(msg) },
func(err error, msg string) { logger.Log().WithError(err).Error(msg) },
)
ticker := time.NewTicker(1 * time.Minute)
for range ticker.C {
@@ -73,3 +73,55 @@ func TestRegister_LegacyMigrationErrorIsNonFatal(t *testing.T) {
}
require.True(t, hasHealth)
}
func TestRegister_UptimeFeatureFlagDefaultErrorIsNonFatal(t *testing.T) {
gin.SetMode(gin.TestMode)
router := gin.New()
db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared&_test_uptime_flag_warn"), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
require.NoError(t, err)
const cbName = "routes:test_force_settings_query_error"
err = db.Callback().Query().Before("gorm:query").Register(cbName, func(tx *gorm.DB) {
if tx.Statement != nil && tx.Statement.Table == "settings" {
_ = tx.AddError(errors.New("forced settings query failure"))
}
})
require.NoError(t, err)
t.Cleanup(func() {
_ = db.Callback().Query().Remove(cbName)
})
cfg := config.Config{JWTSecret: "test-secret"}
err = Register(router, db, cfg)
require.NoError(t, err)
}
func TestRegister_SecurityHeaderPresetInitErrorIsNonFatal(t *testing.T) {
gin.SetMode(gin.TestMode)
router := gin.New()
db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared&_test_sec_header_presets_warn"), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
require.NoError(t, err)
const cbName = "routes:test_force_security_header_profile_query_error"
err = db.Callback().Query().Before("gorm:query").Register(cbName, func(tx *gorm.DB) {
if tx.Statement != nil && tx.Statement.Table == "security_header_profiles" {
_ = tx.AddError(errors.New("forced security_header_profiles query failure"))
}
})
require.NoError(t, err)
t.Cleanup(func() {
_ = db.Callback().Query().Remove(cbName)
})
cfg := config.Config{JWTSecret: "test-secret"}
err = Register(router, db, cfg)
require.NoError(t, err)
}
@@ -0,0 +1,107 @@
package routes
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
)
type testUptimeBootstrapService struct {
cleanupErr error
syncErr error
cleanupCalls int
syncCalls int
checkAllCalls int
}
func (s *testUptimeBootstrapService) CleanupStaleFailureCounts() error {
s.cleanupCalls++
return s.cleanupErr
}
func (s *testUptimeBootstrapService) SyncMonitors() error {
s.syncCalls++
return s.syncErr
}
func (s *testUptimeBootstrapService) CheckAll() {
s.checkAllCalls++
}
func TestRunInitialUptimeBootstrap_Disabled_DoesNothing(t *testing.T) {
svc := &testUptimeBootstrapService{}
warnLogs := 0
errorLogs := 0
runInitialUptimeBootstrap(
false,
svc,
func(err error, msg string) { warnLogs++ },
func(err error, msg string) { errorLogs++ },
)
assert.Equal(t, 0, svc.cleanupCalls)
assert.Equal(t, 0, svc.syncCalls)
assert.Equal(t, 0, svc.checkAllCalls)
assert.Equal(t, 0, warnLogs)
assert.Equal(t, 0, errorLogs)
}
func TestRunInitialUptimeBootstrap_Enabled_HappyPath(t *testing.T) {
svc := &testUptimeBootstrapService{}
warnLogs := 0
errorLogs := 0
runInitialUptimeBootstrap(
true,
svc,
func(err error, msg string) { warnLogs++ },
func(err error, msg string) { errorLogs++ },
)
assert.Equal(t, 1, svc.cleanupCalls)
assert.Equal(t, 1, svc.syncCalls)
assert.Equal(t, 1, svc.checkAllCalls)
assert.Equal(t, 0, warnLogs)
assert.Equal(t, 0, errorLogs)
}
func TestRunInitialUptimeBootstrap_Enabled_CleanupError_StillProceeds(t *testing.T) {
svc := &testUptimeBootstrapService{cleanupErr: errors.New("cleanup failed")}
warnLogs := 0
errorLogs := 0
runInitialUptimeBootstrap(
true,
svc,
func(err error, msg string) { warnLogs++ },
func(err error, msg string) { errorLogs++ },
)
assert.Equal(t, 1, svc.cleanupCalls)
assert.Equal(t, 1, svc.syncCalls)
assert.Equal(t, 1, svc.checkAllCalls)
assert.Equal(t, 1, warnLogs)
assert.Equal(t, 0, errorLogs)
}
func TestRunInitialUptimeBootstrap_Enabled_SyncError_StillChecksAll(t *testing.T) {
svc := &testUptimeBootstrapService{syncErr: errors.New("sync failed")}
warnLogs := 0
errorLogs := 0
runInitialUptimeBootstrap(
true,
svc,
func(err error, msg string) { warnLogs++ },
func(err error, msg string) { errorLogs++ },
)
assert.Equal(t, 1, svc.cleanupCalls)
assert.Equal(t, 1, svc.syncCalls)
assert.Equal(t, 1, svc.checkAllCalls)
assert.Equal(t, 0, warnLogs)
assert.Equal(t, 1, errorLogs)
}
@@ -1,6 +1,7 @@
package services
import (
"errors"
"fmt"
"net/http"
"net/http/httptest"
@@ -246,6 +247,63 @@ func TestSyncAndCheckForHost_MissingSetting_StillCreates(t *testing.T) {
assert.Greater(t, count, int64(0), "monitor should be created when setting is missing (default: enabled)")
}
func TestSyncAndCheckForHost_UsesDomainWhenHostNameMissing(t *testing.T) {
db := setupPR1TestDB(t)
enableUptimeFeature(t, db)
svc := NewUptimeService(db, nil)
server := createAlwaysOKServer(t)
domain := hostPortFromServerURL(server.URL)
host := createTestProxyHost(t, db, "", domain, "10.10.10.10")
svc.SyncAndCheckForHost(host.ID)
var monitor models.UptimeMonitor
require.NoError(t, db.Where("proxy_host_id = ?", host.ID).First(&monitor).Error)
assert.Equal(t, domain, monitor.Name)
}
func TestSyncAndCheckForHost_CreateMonitorError_ReturnsWithoutPanic(t *testing.T) {
db := setupPR1TestDB(t)
enableUptimeFeature(t, db)
svc := NewUptimeService(db, nil)
server := createAlwaysOKServer(t)
domain := hostPortFromServerURL(server.URL)
host := createTestProxyHost(t, db, "create-error-host", domain, "10.10.10.11")
callbackName := "test:force_uptime_monitor_create_error"
require.NoError(t, db.Callback().Create().Before("gorm:create").Register(callbackName, func(tx *gorm.DB) {
if tx.Statement != nil && tx.Statement.Schema != nil && tx.Statement.Schema.Name == "UptimeMonitor" {
_ = tx.AddError(errors.New("forced uptime monitor create error"))
}
}))
t.Cleanup(func() {
_ = db.Callback().Create().Remove(callbackName)
})
assert.NotPanics(t, func() {
svc.SyncAndCheckForHost(host.ID)
})
var count int64
db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count)
assert.Equal(t, int64(0), count)
}
func TestSyncAndCheckForHost_QueryMonitorError_ReturnsWithoutPanic(t *testing.T) {
db := setupPR1TestDB(t)
enableUptimeFeature(t, db)
svc := NewUptimeService(db, nil)
host := createTestProxyHost(t, db, "query-error-host", "query-error.example.com", "10.10.10.12")
require.NoError(t, db.Migrator().DropTable(&models.UptimeMonitor{}))
assert.NotPanics(t, func() {
svc.SyncAndCheckForHost(host.ID)
})
}
// --- Fix 4: CleanupStaleFailureCounts ---
func TestCleanupStaleFailureCounts_ResetsStuckMonitors(t *testing.T) {
@@ -360,6 +418,19 @@ func TestCleanupStaleFailureCounts_DoesNotResetDownHosts(t *testing.T) {
assert.Equal(t, "down", h.Status, "cleanup must not reset host status")
}
func TestCleanupStaleFailureCounts_ReturnsErrorWhenDatabaseUnavailable(t *testing.T) {
db := setupPR1TestDB(t)
svc := NewUptimeService(db, nil)
sqlDB, err := db.DB()
require.NoError(t, err)
require.NoError(t, sqlDB.Close())
err = svc.CleanupStaleFailureCounts()
require.Error(t, err)
assert.Contains(t, err.Error(), "cleanup stale failure counts")
}
// setupPR1ConcurrentDB creates a file-based SQLite database with WAL mode and
// busy_timeout to handle concurrent writes without "database table is locked".
func setupPR1ConcurrentDB(t *testing.T) *gorm.DB {
@@ -139,6 +139,23 @@ describe('Uptime page', () => {
expect(screen.getByText('Loading monitors...')).toBeInTheDocument()
})
it('falls back to DOWN status when monitor status is unknown', async () => {
const { getMonitors, getMonitorHistory } = await import('../../api/uptime')
const monitor = {
id: 'm-unknown-status', name: 'UnknownStatusMonitor', url: 'http://example.com', type: 'http', interval: 60, enabled: true,
status: 'mystery', last_check: new Date().toISOString(), latency: 10, max_retries: 3,
}
vi.mocked(getMonitors).mockResolvedValue([monitor])
vi.mocked(getMonitorHistory).mockResolvedValue([])
renderWithQueryClient(<Uptime />)
await waitFor(() => expect(screen.getByText('UnknownStatusMonitor')).toBeInTheDocument())
const badge = screen.getByTestId('status-badge')
expect(badge).toHaveAttribute('data-status', 'down')
expect(badge).toHaveTextContent('DOWN')
})
it('renders empty state when no monitors exist', async () => {
const { getMonitors } = await import('../../api/uptime')
vi.mocked(getMonitors).mockResolvedValue([])