fix(uptime): implement initial uptime bootstrap logic and related tests
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
@@ -68,6 +69,33 @@ func setupTestRouterWithReferenceTables(t *testing.T) (*gin.Engine, *gorm.DB) {
|
||||
return r, db
|
||||
}
|
||||
|
||||
func setupTestRouterWithUptime(t *testing.T) (*gin.Engine, *gorm.DB) {
|
||||
t.Helper()
|
||||
|
||||
dsn := "file:" + t.Name() + "?mode=memory&cache=shared"
|
||||
db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{})
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, db.AutoMigrate(
|
||||
&models.ProxyHost{},
|
||||
&models.Location{},
|
||||
&models.Notification{},
|
||||
&models.NotificationProvider{},
|
||||
&models.UptimeMonitor{},
|
||||
&models.UptimeHeartbeat{},
|
||||
&models.UptimeHost{},
|
||||
&models.Setting{},
|
||||
))
|
||||
|
||||
ns := services.NewNotificationService(db)
|
||||
us := services.NewUptimeService(db, ns)
|
||||
h := NewProxyHostHandler(db, nil, ns, us)
|
||||
r := gin.New()
|
||||
api := r.Group("/api/v1")
|
||||
h.RegisterRoutes(api)
|
||||
|
||||
return r, db
|
||||
}
|
||||
|
||||
func TestProxyHostHandler_ResolveAccessListReference_TargetedBranches(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@@ -201,6 +229,35 @@ func TestProxyHostCreate_ReferenceResolution_TargetedBranches(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestProxyHostCreate_TriggersAsyncUptimeSyncWhenServiceConfigured(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
router, db := setupTestRouterWithUptime(t)
|
||||
|
||||
upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
t.Cleanup(upstream.Close)
|
||||
|
||||
domain := strings.TrimPrefix(upstream.URL, "http://")
|
||||
body := fmt.Sprintf(`{"name":"Uptime Hook","domain_names":"%s","forward_scheme":"http","forward_host":"app-service","forward_port":8080,"enabled":true}`, domain)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/proxy-hosts", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
require.Equal(t, http.StatusCreated, resp.Code)
|
||||
|
||||
var created models.ProxyHost
|
||||
require.NoError(t, db.Where("domain_names = ?", domain).First(&created).Error)
|
||||
|
||||
var count int64
|
||||
require.Eventually(t, func() bool {
|
||||
db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", created.ID).Count(&count)
|
||||
return count > 0
|
||||
}, 3*time.Second, 50*time.Millisecond)
|
||||
}
|
||||
|
||||
func TestProxyHostLifecycle(t *testing.T) {
|
||||
t.Parallel()
|
||||
router, _ := setupTestRouter(t)
|
||||
|
||||
@@ -29,6 +29,29 @@ import (
|
||||
_ "github.com/Wikid82/charon/backend/pkg/dnsprovider/custom"
|
||||
)
|
||||
|
||||
type uptimeBootstrapService interface {
|
||||
CleanupStaleFailureCounts() error
|
||||
SyncMonitors() error
|
||||
CheckAll()
|
||||
}
|
||||
|
||||
func runInitialUptimeBootstrap(enabled bool, uptimeService uptimeBootstrapService, logWarn func(error, string), logError func(error, string)) {
|
||||
if !enabled {
|
||||
return
|
||||
}
|
||||
|
||||
if err := uptimeService.CleanupStaleFailureCounts(); err != nil && logWarn != nil {
|
||||
logWarn(err, "Failed to cleanup stale failure counts")
|
||||
}
|
||||
|
||||
if err := uptimeService.SyncMonitors(); err != nil && logError != nil {
|
||||
logError(err, "Failed to sync monitors")
|
||||
}
|
||||
|
||||
// Run initial check immediately after sync to avoid the 90s blind window.
|
||||
uptimeService.CheckAll()
|
||||
}
|
||||
|
||||
// Register wires up API routes and performs automatic migrations.
|
||||
func Register(router *gin.Engine, db *gorm.DB, cfg config.Config) error {
|
||||
// Caddy Manager - created early so it can be used by settings handlers for config reload
|
||||
@@ -464,18 +487,12 @@ func RegisterWithDeps(router *gin.Engine, db *gorm.DB, cfg config.Config, caddyM
|
||||
enabled = s.Value == "true"
|
||||
}
|
||||
|
||||
if enabled {
|
||||
// Clean up stale failure counts from historical bugs before first sync
|
||||
if err := uptimeService.CleanupStaleFailureCounts(); err != nil {
|
||||
logger.Log().WithError(err).Warn("Failed to cleanup stale failure counts")
|
||||
}
|
||||
|
||||
if err := uptimeService.SyncMonitors(); err != nil {
|
||||
logger.Log().WithError(err).Error("Failed to sync monitors")
|
||||
}
|
||||
// Run initial check immediately after sync to avoid the 90s blind window
|
||||
uptimeService.CheckAll()
|
||||
}
|
||||
runInitialUptimeBootstrap(
|
||||
enabled,
|
||||
uptimeService,
|
||||
func(err error, msg string) { logger.Log().WithError(err).Warn(msg) },
|
||||
func(err error, msg string) { logger.Log().WithError(err).Error(msg) },
|
||||
)
|
||||
|
||||
ticker := time.NewTicker(1 * time.Minute)
|
||||
for range ticker.C {
|
||||
|
||||
@@ -73,3 +73,55 @@ func TestRegister_LegacyMigrationErrorIsNonFatal(t *testing.T) {
|
||||
}
|
||||
require.True(t, hasHealth)
|
||||
}
|
||||
|
||||
func TestRegister_UptimeFeatureFlagDefaultErrorIsNonFatal(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
router := gin.New()
|
||||
|
||||
db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared&_test_uptime_flag_warn"), &gorm.Config{
|
||||
Logger: logger.Default.LogMode(logger.Silent),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
const cbName = "routes:test_force_settings_query_error"
|
||||
err = db.Callback().Query().Before("gorm:query").Register(cbName, func(tx *gorm.DB) {
|
||||
if tx.Statement != nil && tx.Statement.Table == "settings" {
|
||||
_ = tx.AddError(errors.New("forced settings query failure"))
|
||||
}
|
||||
})
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
_ = db.Callback().Query().Remove(cbName)
|
||||
})
|
||||
|
||||
cfg := config.Config{JWTSecret: "test-secret"}
|
||||
|
||||
err = Register(router, db, cfg)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestRegister_SecurityHeaderPresetInitErrorIsNonFatal(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
router := gin.New()
|
||||
|
||||
db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared&_test_sec_header_presets_warn"), &gorm.Config{
|
||||
Logger: logger.Default.LogMode(logger.Silent),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
const cbName = "routes:test_force_security_header_profile_query_error"
|
||||
err = db.Callback().Query().Before("gorm:query").Register(cbName, func(tx *gorm.DB) {
|
||||
if tx.Statement != nil && tx.Statement.Table == "security_header_profiles" {
|
||||
_ = tx.AddError(errors.New("forced security_header_profiles query failure"))
|
||||
}
|
||||
})
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
_ = db.Callback().Query().Remove(cbName)
|
||||
})
|
||||
|
||||
cfg := config.Config{JWTSecret: "test-secret"}
|
||||
|
||||
err = Register(router, db, cfg)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type testUptimeBootstrapService struct {
|
||||
cleanupErr error
|
||||
syncErr error
|
||||
|
||||
cleanupCalls int
|
||||
syncCalls int
|
||||
checkAllCalls int
|
||||
}
|
||||
|
||||
func (s *testUptimeBootstrapService) CleanupStaleFailureCounts() error {
|
||||
s.cleanupCalls++
|
||||
return s.cleanupErr
|
||||
}
|
||||
|
||||
func (s *testUptimeBootstrapService) SyncMonitors() error {
|
||||
s.syncCalls++
|
||||
return s.syncErr
|
||||
}
|
||||
|
||||
func (s *testUptimeBootstrapService) CheckAll() {
|
||||
s.checkAllCalls++
|
||||
}
|
||||
|
||||
func TestRunInitialUptimeBootstrap_Disabled_DoesNothing(t *testing.T) {
|
||||
svc := &testUptimeBootstrapService{}
|
||||
|
||||
warnLogs := 0
|
||||
errorLogs := 0
|
||||
runInitialUptimeBootstrap(
|
||||
false,
|
||||
svc,
|
||||
func(err error, msg string) { warnLogs++ },
|
||||
func(err error, msg string) { errorLogs++ },
|
||||
)
|
||||
|
||||
assert.Equal(t, 0, svc.cleanupCalls)
|
||||
assert.Equal(t, 0, svc.syncCalls)
|
||||
assert.Equal(t, 0, svc.checkAllCalls)
|
||||
assert.Equal(t, 0, warnLogs)
|
||||
assert.Equal(t, 0, errorLogs)
|
||||
}
|
||||
|
||||
func TestRunInitialUptimeBootstrap_Enabled_HappyPath(t *testing.T) {
|
||||
svc := &testUptimeBootstrapService{}
|
||||
|
||||
warnLogs := 0
|
||||
errorLogs := 0
|
||||
runInitialUptimeBootstrap(
|
||||
true,
|
||||
svc,
|
||||
func(err error, msg string) { warnLogs++ },
|
||||
func(err error, msg string) { errorLogs++ },
|
||||
)
|
||||
|
||||
assert.Equal(t, 1, svc.cleanupCalls)
|
||||
assert.Equal(t, 1, svc.syncCalls)
|
||||
assert.Equal(t, 1, svc.checkAllCalls)
|
||||
assert.Equal(t, 0, warnLogs)
|
||||
assert.Equal(t, 0, errorLogs)
|
||||
}
|
||||
|
||||
func TestRunInitialUptimeBootstrap_Enabled_CleanupError_StillProceeds(t *testing.T) {
|
||||
svc := &testUptimeBootstrapService{cleanupErr: errors.New("cleanup failed")}
|
||||
|
||||
warnLogs := 0
|
||||
errorLogs := 0
|
||||
runInitialUptimeBootstrap(
|
||||
true,
|
||||
svc,
|
||||
func(err error, msg string) { warnLogs++ },
|
||||
func(err error, msg string) { errorLogs++ },
|
||||
)
|
||||
|
||||
assert.Equal(t, 1, svc.cleanupCalls)
|
||||
assert.Equal(t, 1, svc.syncCalls)
|
||||
assert.Equal(t, 1, svc.checkAllCalls)
|
||||
assert.Equal(t, 1, warnLogs)
|
||||
assert.Equal(t, 0, errorLogs)
|
||||
}
|
||||
|
||||
func TestRunInitialUptimeBootstrap_Enabled_SyncError_StillChecksAll(t *testing.T) {
|
||||
svc := &testUptimeBootstrapService{syncErr: errors.New("sync failed")}
|
||||
|
||||
warnLogs := 0
|
||||
errorLogs := 0
|
||||
runInitialUptimeBootstrap(
|
||||
true,
|
||||
svc,
|
||||
func(err error, msg string) { warnLogs++ },
|
||||
func(err error, msg string) { errorLogs++ },
|
||||
)
|
||||
|
||||
assert.Equal(t, 1, svc.cleanupCalls)
|
||||
assert.Equal(t, 1, svc.syncCalls)
|
||||
assert.Equal(t, 1, svc.checkAllCalls)
|
||||
assert.Equal(t, 0, warnLogs)
|
||||
assert.Equal(t, 1, errorLogs)
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -246,6 +247,63 @@ func TestSyncAndCheckForHost_MissingSetting_StillCreates(t *testing.T) {
|
||||
assert.Greater(t, count, int64(0), "monitor should be created when setting is missing (default: enabled)")
|
||||
}
|
||||
|
||||
func TestSyncAndCheckForHost_UsesDomainWhenHostNameMissing(t *testing.T) {
|
||||
db := setupPR1TestDB(t)
|
||||
enableUptimeFeature(t, db)
|
||||
svc := NewUptimeService(db, nil)
|
||||
server := createAlwaysOKServer(t)
|
||||
domain := hostPortFromServerURL(server.URL)
|
||||
|
||||
host := createTestProxyHost(t, db, "", domain, "10.10.10.10")
|
||||
|
||||
svc.SyncAndCheckForHost(host.ID)
|
||||
|
||||
var monitor models.UptimeMonitor
|
||||
require.NoError(t, db.Where("proxy_host_id = ?", host.ID).First(&monitor).Error)
|
||||
assert.Equal(t, domain, monitor.Name)
|
||||
}
|
||||
|
||||
func TestSyncAndCheckForHost_CreateMonitorError_ReturnsWithoutPanic(t *testing.T) {
|
||||
db := setupPR1TestDB(t)
|
||||
enableUptimeFeature(t, db)
|
||||
svc := NewUptimeService(db, nil)
|
||||
server := createAlwaysOKServer(t)
|
||||
domain := hostPortFromServerURL(server.URL)
|
||||
|
||||
host := createTestProxyHost(t, db, "create-error-host", domain, "10.10.10.11")
|
||||
|
||||
callbackName := "test:force_uptime_monitor_create_error"
|
||||
require.NoError(t, db.Callback().Create().Before("gorm:create").Register(callbackName, func(tx *gorm.DB) {
|
||||
if tx.Statement != nil && tx.Statement.Schema != nil && tx.Statement.Schema.Name == "UptimeMonitor" {
|
||||
_ = tx.AddError(errors.New("forced uptime monitor create error"))
|
||||
}
|
||||
}))
|
||||
t.Cleanup(func() {
|
||||
_ = db.Callback().Create().Remove(callbackName)
|
||||
})
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
svc.SyncAndCheckForHost(host.ID)
|
||||
})
|
||||
|
||||
var count int64
|
||||
db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count)
|
||||
assert.Equal(t, int64(0), count)
|
||||
}
|
||||
|
||||
func TestSyncAndCheckForHost_QueryMonitorError_ReturnsWithoutPanic(t *testing.T) {
|
||||
db := setupPR1TestDB(t)
|
||||
enableUptimeFeature(t, db)
|
||||
svc := NewUptimeService(db, nil)
|
||||
host := createTestProxyHost(t, db, "query-error-host", "query-error.example.com", "10.10.10.12")
|
||||
|
||||
require.NoError(t, db.Migrator().DropTable(&models.UptimeMonitor{}))
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
svc.SyncAndCheckForHost(host.ID)
|
||||
})
|
||||
}
|
||||
|
||||
// --- Fix 4: CleanupStaleFailureCounts ---
|
||||
|
||||
func TestCleanupStaleFailureCounts_ResetsStuckMonitors(t *testing.T) {
|
||||
@@ -360,6 +418,19 @@ func TestCleanupStaleFailureCounts_DoesNotResetDownHosts(t *testing.T) {
|
||||
assert.Equal(t, "down", h.Status, "cleanup must not reset host status")
|
||||
}
|
||||
|
||||
func TestCleanupStaleFailureCounts_ReturnsErrorWhenDatabaseUnavailable(t *testing.T) {
|
||||
db := setupPR1TestDB(t)
|
||||
svc := NewUptimeService(db, nil)
|
||||
|
||||
sqlDB, err := db.DB()
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, sqlDB.Close())
|
||||
|
||||
err = svc.CleanupStaleFailureCounts()
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "cleanup stale failure counts")
|
||||
}
|
||||
|
||||
// setupPR1ConcurrentDB creates a file-based SQLite database with WAL mode and
|
||||
// busy_timeout to handle concurrent writes without "database table is locked".
|
||||
func setupPR1ConcurrentDB(t *testing.T) *gorm.DB {
|
||||
|
||||
@@ -139,6 +139,23 @@ describe('Uptime page', () => {
|
||||
expect(screen.getByText('Loading monitors...')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('falls back to DOWN status when monitor status is unknown', async () => {
|
||||
const { getMonitors, getMonitorHistory } = await import('../../api/uptime')
|
||||
const monitor = {
|
||||
id: 'm-unknown-status', name: 'UnknownStatusMonitor', url: 'http://example.com', type: 'http', interval: 60, enabled: true,
|
||||
status: 'mystery', last_check: new Date().toISOString(), latency: 10, max_retries: 3,
|
||||
}
|
||||
vi.mocked(getMonitors).mockResolvedValue([monitor])
|
||||
vi.mocked(getMonitorHistory).mockResolvedValue([])
|
||||
|
||||
renderWithQueryClient(<Uptime />)
|
||||
await waitFor(() => expect(screen.getByText('UnknownStatusMonitor')).toBeInTheDocument())
|
||||
|
||||
const badge = screen.getByTestId('status-badge')
|
||||
expect(badge).toHaveAttribute('data-status', 'down')
|
||||
expect(badge).toHaveTextContent('DOWN')
|
||||
})
|
||||
|
||||
it('renders empty state when no monitors exist', async () => {
|
||||
const { getMonitors } = await import('../../api/uptime')
|
||||
vi.mocked(getMonitors).mockResolvedValue([])
|
||||
|
||||
Reference in New Issue
Block a user