diff --git a/.github/workflows/renovate.yml b/.github/workflows/renovate.yml index dd73e2cd..6d17aa86 100644 --- a/.github/workflows/renovate.yml +++ b/.github/workflows/renovate.yml @@ -25,7 +25,7 @@ jobs: fetch-depth: 1 - name: Run Renovate - uses: renovatebot/github-action@8d75b92f43899d483728e9a8a7fd44238020f6e6 # v46.1.2 + uses: renovatebot/github-action@7b4b65bf31e07d4e3e51708d07700fb41bc03166 # v46.1.3 with: configurationFile: .github/renovate.json token: ${{ secrets.RENOVATE_TOKEN || secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/security-pr.yml b/.github/workflows/security-pr.yml index c02e9da2..8eeb9569 100644 --- a/.github/workflows/security-pr.yml +++ b/.github/workflows/security-pr.yml @@ -362,7 +362,7 @@ jobs: - name: Run Trivy filesystem scan (SARIF output) if: steps.check-artifact.outputs.artifact_exists == 'true' || github.event_name == 'push' || github.event_name == 'pull_request' # aquasecurity/trivy-action v0.33.1 - uses: aquasecurity/trivy-action@1bd062560b422f5944df1de50abd05162bea079e + uses: aquasecurity/trivy-action@4c61e6329bab9be735ca35291551614bc663dff3 with: scan-type: 'fs' scan-ref: ${{ steps.extract.outputs.binary_path }} @@ -394,7 +394,7 @@ jobs: - name: Run Trivy filesystem scan (fail on CRITICAL/HIGH) if: steps.check-artifact.outputs.artifact_exists == 'true' || github.event_name == 'push' || github.event_name == 'pull_request' # aquasecurity/trivy-action v0.33.1 - uses: aquasecurity/trivy-action@1bd062560b422f5944df1de50abd05162bea079e + uses: aquasecurity/trivy-action@4c61e6329bab9be735ca35291551614bc663dff3 with: scan-type: 'fs' scan-ref: ${{ steps.extract.outputs.binary_path }} diff --git a/.github/workflows/security-weekly-rebuild.yml b/.github/workflows/security-weekly-rebuild.yml index 62e76a6c..db2916f5 100644 --- a/.github/workflows/security-weekly-rebuild.yml +++ b/.github/workflows/security-weekly-rebuild.yml @@ -6,7 +6,7 @@ name: Weekly Security Rebuild on: schedule: - - cron: '0 2 * * 0' # Sundays at 02:00 UTC + - cron: '0 12 * * 2' # Tuesdays at 12:00 UTC workflow_dispatch: inputs: force_rebuild: diff --git a/.github/workflows/weekly-nightly-promotion.yml b/.github/workflows/weekly-nightly-promotion.yml index d0f57ae4..47ad9fd6 100644 --- a/.github/workflows/weekly-nightly-promotion.yml +++ b/.github/workflows/weekly-nightly-promotion.yml @@ -5,9 +5,9 @@ name: Weekly Nightly to Main Promotion on: schedule: - # Every Monday at 10:30 UTC (5:30am EST / 6:30am EDT) + # Every Monday at 12:00 UTC (7:00am EST / 8:00am EDT) # Offset from nightly sync (09:00 UTC) to avoid schedule race and allow validation completion. - - cron: '30 10 * * 1' + - cron: '0 12 * * 1' workflow_dispatch: inputs: reason: diff --git a/backend/go.mod b/backend/go.mod index b6b8267c..5e60f1f7 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -42,9 +42,9 @@ require ( github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/gabriel-vasile/mimetype v1.4.12 // indirect + github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gin-contrib/sse v1.1.0 // indirect - github.com/glebarez/go-sqlite v1.21.2 // indirect + github.com/glebarez/go-sqlite v1.22.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-playground/locales v0.14.1 // indirect @@ -66,6 +66,7 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/oschwald/maxminddb-golang/v2 v2.1.1 // indirect @@ -73,8 +74,8 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.66.1 // indirect - github.com/prometheus/procfs v0.16.1 // indirect + github.com/prometheus/common v0.67.5 // indirect + github.com/prometheus/procfs v0.20.1 // indirect github.com/quic-go/qpack v0.6.0 // indirect github.com/quic-go/quic-go v0.59.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect @@ -82,20 +83,20 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.3.1 // indirect go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect - go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect + go.opentelemetry.io/otel v1.40.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect - go.opentelemetry.io/otel/metric v1.38.0 // indirect - go.opentelemetry.io/otel/trace v1.38.0 // indirect - go.yaml.in/yaml/v2 v2.4.2 // indirect - golang.org/x/arch v0.22.0 // indirect + go.opentelemetry.io/otel/metric v1.40.0 // indirect + go.opentelemetry.io/otel/trace v1.40.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + golang.org/x/arch v0.24.0 // indirect golang.org/x/sys v0.41.0 // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools/v3 v3.5.2 // indirect - modernc.org/libc v1.22.5 // indirect - modernc.org/mathutil v1.5.0 // indirect - modernc.org/memory v1.5.0 // indirect - modernc.org/sqlite v1.23.1 // indirect + modernc.org/libc v1.69.0 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect + modernc.org/sqlite v1.46.1 // indirect ) diff --git a/backend/go.sum b/backend/go.sum index db8c59b6..489d36a5 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -6,12 +6,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M= github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM= -github.com/bytedance/sonic v1.14.1 h1:FBMC0zVz5XUmE4z9wF4Jey0An5FueFvOsTKKKtwIl7w= -github.com/bytedance/sonic v1.14.1/go.mod h1:gi6uhQLMbTdeP0muCnrjHLeCUPyb70ujhnNlhOylAFc= github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE= github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k= -github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA= -github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE= github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= @@ -41,18 +37,16 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw= -github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= +github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM= +github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= github.com/gin-contrib/gzip v1.2.5 h1:fIZs0S+l17pIu1P5XRJOo/YNqfIuPCrZZ3TWB7pjckI= github.com/gin-contrib/gzip v1.2.5/go.mod h1:aomRgR7ftdZV3uWY0gW/m8rChfxau0n8YVvwlOHONzw= github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w= github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM= -github.com/gin-gonic/gin v1.11.0 h1:OW/6PLjyusp2PPXtyxKHU0RbX6I/l28FTdDlae5ueWk= -github.com/gin-gonic/gin v1.11.0/go.mod h1:+iq/FyxlGzII0KHiBGjuNn4UNENUlKbGlNmc+W50Dls= github.com/gin-gonic/gin v1.12.0 h1:b3YAbrZtnf8N//yjKeU2+MQsh2mY5htkZidOM7O0wG8= github.com/gin-gonic/gin v1.12.0/go.mod h1:VxccKfsSllpKshkBWgVgRniFFAzFb9csfngsqANjnLc= -github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo= -github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= +github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ= +github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc= github.com/glebarez/sqlite v1.11.0 h1:wSG0irqzP6VurnMEpFGer5Li19RpIRi2qvQz++w0GMw= github.com/glebarez/sqlite v1.11.0/go.mod h1:h8/o8j5wiAsqSPoWELDUdJXhjAhsVliSn7bWZjOhrgQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -70,8 +64,6 @@ github.com/go-playground/validator/v10 v10.30.1 h1:f3zDSN/zOma+w6+1Wswgd9fLkdwy0 github.com/go-playground/validator/v10 v10.30.1/go.mod h1:oSuBIQzuJxL//3MelwSLD5hc2Tu889bF0Idm9Dg26cM= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= -github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= @@ -79,14 +71,16 @@ github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArs github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= -github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= @@ -126,6 +120,8 @@ github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= @@ -144,21 +140,20 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= -github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= -github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= -github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc= +github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo= github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8= github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII= github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw= github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU= -github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -169,53 +164,52 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA= -github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4= github.com/ugorji/go/codec v1.3.1 h1:waO7eEiFDwidsBN6agj1vJQ4AG7lh2yqXyOXqhgQuyY= github.com/ugorji/go/codec v1.3.1/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4= go.mongodb.org/mongo-driver/v2 v2.5.0 h1:yXUhImUjjAInNcpTcAlPHiT7bIXhshCTL3jVBkF3xaE= go.mongodb.org/mongo-driver/v2 v2.5.0/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= -go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= -go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4= -go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= -go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= -go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= -go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= -go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= -go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= -go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= -go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= +go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= +go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= +go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= -go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= -go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= -golang.org/x/arch v0.22.0 h1:c/Zle32i5ttqRXjdLyyHZESLD/bB90DCU1g9l/0YBDI= -golang.org/x/arch v0.22.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +golang.org/x/arch v0.24.0 h1:qlJ3M9upxvFfwRM51tTg3Yl+8CP9vCC1E7vlFpgv99Y= +golang.org/x/arch v0.24.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= -golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= -golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= +golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= +golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo= golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= @@ -223,6 +217,8 @@ golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= +golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= @@ -245,11 +241,31 @@ gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg= gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= -modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE= -modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= -modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= -modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= -modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= -modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM= -modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk= +modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= +modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0= +modernc.org/ccgo/v4 v4.31.0 h1:/bsaxqdgX3gy/0DboxcvWrc3NpzH+6wpFfI/ZaA/hrg= +modernc.org/ccgo/v4 v4.31.0/go.mod h1:jKe8kPBjIN/VdGTVqARTQ8N1gAziBmiISY8j5HoKwjg= +modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= +modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo= +modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= +modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= +modernc.org/libc v1.69.0 h1:YQJ5QMSReTgQ3QFmI0dudfjXIjCcYTUxcH8/9P9f0D8= +modernc.org/libc v1.69.0/go.mod h1:YfLLduUEbodNV2xLU5JOnRHBTAHVHsVW3bVYGw0ZCV4= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= +modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= +modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/backend/internal/api/handlers/proxy_host_handler.go b/backend/internal/api/handlers/proxy_host_handler.go index 31750731..5ab90db2 100644 --- a/backend/internal/api/handlers/proxy_host_handler.go +++ b/backend/internal/api/handlers/proxy_host_handler.go @@ -413,6 +413,11 @@ func (h *ProxyHostHandler) Create(c *gin.Context) { ) } + // Trigger immediate uptime monitor creation + health check (non-blocking) + if h.uptimeService != nil { + go h.uptimeService.SyncAndCheckForHost(host.ID) + } + // Generate advisory warnings for private/Docker IPs warnings := generateForwardHostWarnings(host.ForwardHost) @@ -645,11 +650,10 @@ func (h *ProxyHostHandler) Delete(c *gin.Context) { return } - // check if we should also delete associated uptime monitors (query param: delete_uptime=true) - deleteUptime := c.DefaultQuery("delete_uptime", "false") == "true" - - if deleteUptime && h.uptimeService != nil { - // Find all monitors referencing this proxy host and delete each + // Always clean up associated uptime monitors when deleting a proxy host. + // The query param delete_uptime=true is kept for backward compatibility but + // cleanup now runs unconditionally to prevent orphaned monitors. + if h.uptimeService != nil { var monitors []models.UptimeMonitor if err := h.uptimeService.DB.Where("proxy_host_id = ?", host.ID).Find(&monitors).Error; err == nil { for _, m := range monitors { diff --git a/backend/internal/api/handlers/proxy_host_handler_test.go b/backend/internal/api/handlers/proxy_host_handler_test.go index 022f1141..cb2f984f 100644 --- a/backend/internal/api/handlers/proxy_host_handler_test.go +++ b/backend/internal/api/handlers/proxy_host_handler_test.go @@ -9,6 +9,7 @@ import ( "net/http/httptest" "strings" "testing" + "time" "github.com/gin-gonic/gin" "github.com/google/uuid" @@ -68,6 +69,33 @@ func setupTestRouterWithReferenceTables(t *testing.T) (*gin.Engine, *gorm.DB) { return r, db } +func setupTestRouterWithUptime(t *testing.T) (*gin.Engine, *gorm.DB) { + t.Helper() + + dsn := "file:" + t.Name() + "?mode=memory&cache=shared" + db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{}) + require.NoError(t, err) + require.NoError(t, db.AutoMigrate( + &models.ProxyHost{}, + &models.Location{}, + &models.Notification{}, + &models.NotificationProvider{}, + &models.UptimeMonitor{}, + &models.UptimeHeartbeat{}, + &models.UptimeHost{}, + &models.Setting{}, + )) + + ns := services.NewNotificationService(db) + us := services.NewUptimeService(db, ns) + h := NewProxyHostHandler(db, nil, ns, us) + r := gin.New() + api := r.Group("/api/v1") + h.RegisterRoutes(api) + + return r, db +} + func TestProxyHostHandler_ResolveAccessListReference_TargetedBranches(t *testing.T) { t.Parallel() @@ -201,6 +229,35 @@ func TestProxyHostCreate_ReferenceResolution_TargetedBranches(t *testing.T) { }) } +func TestProxyHostCreate_TriggersAsyncUptimeSyncWhenServiceConfigured(t *testing.T) { + t.Parallel() + + router, db := setupTestRouterWithUptime(t) + + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + t.Cleanup(upstream.Close) + + domain := strings.TrimPrefix(upstream.URL, "http://") + body := fmt.Sprintf(`{"name":"Uptime Hook","domain_names":"%s","forward_scheme":"http","forward_host":"app-service","forward_port":8080,"enabled":true}`, domain) + req := httptest.NewRequest(http.MethodPost, "/api/v1/proxy-hosts", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + require.Equal(t, http.StatusCreated, resp.Code) + + var created models.ProxyHost + require.NoError(t, db.Where("domain_names = ?", domain).First(&created).Error) + + var count int64 + require.Eventually(t, func() bool { + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", created.ID).Count(&count) + return count > 0 + }, 3*time.Second, 50*time.Millisecond) +} + func TestProxyHostLifecycle(t *testing.T) { t.Parallel() router, _ := setupTestRouter(t) diff --git a/backend/internal/api/routes/routes.go b/backend/internal/api/routes/routes.go index cbd9881d..2533036d 100644 --- a/backend/internal/api/routes/routes.go +++ b/backend/internal/api/routes/routes.go @@ -29,6 +29,29 @@ import ( _ "github.com/Wikid82/charon/backend/pkg/dnsprovider/custom" ) +type uptimeBootstrapService interface { + CleanupStaleFailureCounts() error + SyncMonitors() error + CheckAll() +} + +func runInitialUptimeBootstrap(enabled bool, uptimeService uptimeBootstrapService, logWarn func(error, string), logError func(error, string)) { + if !enabled { + return + } + + if err := uptimeService.CleanupStaleFailureCounts(); err != nil && logWarn != nil { + logWarn(err, "Failed to cleanup stale failure counts") + } + + if err := uptimeService.SyncMonitors(); err != nil && logError != nil { + logError(err, "Failed to sync monitors") + } + + // Run initial check immediately after sync to avoid the 90s blind window. + uptimeService.CheckAll() +} + // Register wires up API routes and performs automatic migrations. func Register(router *gin.Engine, db *gorm.DB, cfg config.Config) error { // Caddy Manager - created early so it can be used by settings handlers for config reload @@ -410,9 +433,10 @@ func RegisterWithDeps(router *gin.Engine, db *gorm.DB, cfg config.Config, caddyM dockerHandler := handlers.NewDockerHandler(dockerService, remoteServerService) dockerHandler.RegisterRoutes(protected) - // Uptime Service - uptimeSvc := services.NewUptimeService(db, notificationService) - uptimeHandler := handlers.NewUptimeHandler(uptimeSvc) + // Uptime Service — reuse the single uptimeService instance (defined above) + // to share in-memory state (mutexes, notification batching) between + // background checker, ProxyHostHandler, and API handlers. + uptimeHandler := handlers.NewUptimeHandler(uptimeService) protected.GET("/uptime/monitors", uptimeHandler.List) protected.POST("/uptime/monitors", uptimeHandler.Create) protected.GET("/uptime/monitors/:id/history", uptimeHandler.GetHistory) @@ -463,11 +487,12 @@ func RegisterWithDeps(router *gin.Engine, db *gorm.DB, cfg config.Config, caddyM enabled = s.Value == "true" } - if enabled { - if err := uptimeService.SyncMonitors(); err != nil { - logger.Log().WithError(err).Error("Failed to sync monitors") - } - } + runInitialUptimeBootstrap( + enabled, + uptimeService, + func(err error, msg string) { logger.Log().WithError(err).Warn(msg) }, + func(err error, msg string) { logger.Log().WithError(err).Error(msg) }, + ) ticker := time.NewTicker(1 * time.Minute) for range ticker.C { diff --git a/backend/internal/api/routes/routes_coverage_test.go b/backend/internal/api/routes/routes_coverage_test.go index e5e11d82..57939ce7 100644 --- a/backend/internal/api/routes/routes_coverage_test.go +++ b/backend/internal/api/routes/routes_coverage_test.go @@ -73,3 +73,55 @@ func TestRegister_LegacyMigrationErrorIsNonFatal(t *testing.T) { } require.True(t, hasHealth) } + +func TestRegister_UptimeFeatureFlagDefaultErrorIsNonFatal(t *testing.T) { + gin.SetMode(gin.TestMode) + router := gin.New() + + db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared&_test_uptime_flag_warn"), &gorm.Config{ + Logger: logger.Default.LogMode(logger.Silent), + }) + require.NoError(t, err) + + const cbName = "routes:test_force_settings_query_error" + err = db.Callback().Query().Before("gorm:query").Register(cbName, func(tx *gorm.DB) { + if tx.Statement != nil && tx.Statement.Table == "settings" { + _ = tx.AddError(errors.New("forced settings query failure")) + } + }) + require.NoError(t, err) + t.Cleanup(func() { + _ = db.Callback().Query().Remove(cbName) + }) + + cfg := config.Config{JWTSecret: "test-secret"} + + err = Register(router, db, cfg) + require.NoError(t, err) +} + +func TestRegister_SecurityHeaderPresetInitErrorIsNonFatal(t *testing.T) { + gin.SetMode(gin.TestMode) + router := gin.New() + + db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared&_test_sec_header_presets_warn"), &gorm.Config{ + Logger: logger.Default.LogMode(logger.Silent), + }) + require.NoError(t, err) + + const cbName = "routes:test_force_security_header_profile_query_error" + err = db.Callback().Query().Before("gorm:query").Register(cbName, func(tx *gorm.DB) { + if tx.Statement != nil && tx.Statement.Table == "security_header_profiles" { + _ = tx.AddError(errors.New("forced security_header_profiles query failure")) + } + }) + require.NoError(t, err) + t.Cleanup(func() { + _ = db.Callback().Query().Remove(cbName) + }) + + cfg := config.Config{JWTSecret: "test-secret"} + + err = Register(router, db, cfg) + require.NoError(t, err) +} diff --git a/backend/internal/api/routes/routes_uptime_bootstrap_test.go b/backend/internal/api/routes/routes_uptime_bootstrap_test.go new file mode 100644 index 00000000..ac03c221 --- /dev/null +++ b/backend/internal/api/routes/routes_uptime_bootstrap_test.go @@ -0,0 +1,107 @@ +package routes + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +type testUptimeBootstrapService struct { + cleanupErr error + syncErr error + + cleanupCalls int + syncCalls int + checkAllCalls int +} + +func (s *testUptimeBootstrapService) CleanupStaleFailureCounts() error { + s.cleanupCalls++ + return s.cleanupErr +} + +func (s *testUptimeBootstrapService) SyncMonitors() error { + s.syncCalls++ + return s.syncErr +} + +func (s *testUptimeBootstrapService) CheckAll() { + s.checkAllCalls++ +} + +func TestRunInitialUptimeBootstrap_Disabled_DoesNothing(t *testing.T) { + svc := &testUptimeBootstrapService{} + + warnLogs := 0 + errorLogs := 0 + runInitialUptimeBootstrap( + false, + svc, + func(err error, msg string) { warnLogs++ }, + func(err error, msg string) { errorLogs++ }, + ) + + assert.Equal(t, 0, svc.cleanupCalls) + assert.Equal(t, 0, svc.syncCalls) + assert.Equal(t, 0, svc.checkAllCalls) + assert.Equal(t, 0, warnLogs) + assert.Equal(t, 0, errorLogs) +} + +func TestRunInitialUptimeBootstrap_Enabled_HappyPath(t *testing.T) { + svc := &testUptimeBootstrapService{} + + warnLogs := 0 + errorLogs := 0 + runInitialUptimeBootstrap( + true, + svc, + func(err error, msg string) { warnLogs++ }, + func(err error, msg string) { errorLogs++ }, + ) + + assert.Equal(t, 1, svc.cleanupCalls) + assert.Equal(t, 1, svc.syncCalls) + assert.Equal(t, 1, svc.checkAllCalls) + assert.Equal(t, 0, warnLogs) + assert.Equal(t, 0, errorLogs) +} + +func TestRunInitialUptimeBootstrap_Enabled_CleanupError_StillProceeds(t *testing.T) { + svc := &testUptimeBootstrapService{cleanupErr: errors.New("cleanup failed")} + + warnLogs := 0 + errorLogs := 0 + runInitialUptimeBootstrap( + true, + svc, + func(err error, msg string) { warnLogs++ }, + func(err error, msg string) { errorLogs++ }, + ) + + assert.Equal(t, 1, svc.cleanupCalls) + assert.Equal(t, 1, svc.syncCalls) + assert.Equal(t, 1, svc.checkAllCalls) + assert.Equal(t, 1, warnLogs) + assert.Equal(t, 0, errorLogs) +} + +func TestRunInitialUptimeBootstrap_Enabled_SyncError_StillChecksAll(t *testing.T) { + svc := &testUptimeBootstrapService{syncErr: errors.New("sync failed")} + + warnLogs := 0 + errorLogs := 0 + runInitialUptimeBootstrap( + true, + svc, + func(err error, msg string) { warnLogs++ }, + func(err error, msg string) { errorLogs++ }, + ) + + assert.Equal(t, 1, svc.cleanupCalls) + assert.Equal(t, 1, svc.syncCalls) + assert.Equal(t, 1, svc.checkAllCalls) + assert.Equal(t, 0, warnLogs) + assert.Equal(t, 1, errorLogs) +} diff --git a/backend/internal/models/notification_provider.go b/backend/internal/models/notification_provider.go index d31cf5c2..9d6427ec 100644 --- a/backend/internal/models/notification_provider.go +++ b/backend/internal/models/notification_provider.go @@ -14,7 +14,7 @@ type NotificationProvider struct { Type string `json:"type" gorm:"index"` // discord (only supported type in current rollout) URL string `json:"url"` // Discord webhook URL (HTTPS format required) Token string `json:"-"` // Auth token for providers (e.g., Gotify) - never exposed in API - HasToken bool `json:"has_token" gorm:"-"` // Computed: indicates whether a token is set (never exposes raw value) + HasToken bool `json:"has_token" gorm:"-"` // Computed: indicates whether a token is set (never exposes raw value) Engine string `json:"engine,omitempty" gorm:"index"` // notify_v1 (notify-only runtime) Config string `json:"config"` // JSON payload template for custom webhooks ServiceConfig string `json:"service_config,omitempty" gorm:"type:text"` // JSON blob for typed service config diff --git a/backend/internal/services/uptime_service.go b/backend/internal/services/uptime_service.go index 6da26b83..68c5628b 100644 --- a/backend/internal/services/uptime_service.go +++ b/backend/internal/services/uptime_service.go @@ -8,6 +8,7 @@ import ( "net" "net/http" "net/url" + "strconv" "strings" "sync" "time" @@ -372,12 +373,32 @@ func (s *UptimeService) CheckAll() { // Check each host's monitors for hostID, monitors := range hostMonitors { - // If host is down, mark all monitors as down without individual checks + // If host is down, only short-circuit TCP monitors. + // HTTP/HTTPS monitors remain URL-truth authoritative and must still run checkMonitor. if hostID != "" { var uptimeHost models.UptimeHost if err := s.DB.Where("id = ?", hostID).First(&uptimeHost).Error; err == nil { if uptimeHost.Status == "down" { - s.markHostMonitorsDown(monitors, &uptimeHost) + tcpMonitors := make([]models.UptimeMonitor, 0, len(monitors)) + nonTCPMonitors := make([]models.UptimeMonitor, 0, len(monitors)) + + for _, monitor := range monitors { + normalizedType := strings.ToLower(strings.TrimSpace(monitor.Type)) + if normalizedType == "tcp" { + tcpMonitors = append(tcpMonitors, monitor) + continue + } + nonTCPMonitors = append(nonTCPMonitors, monitor) + } + + if len(tcpMonitors) > 0 { + s.markHostMonitorsDown(tcpMonitors, &uptimeHost) + } + + for _, monitor := range nonTCPMonitors { + go s.checkMonitor(monitor) + } + continue } } @@ -1184,3 +1205,112 @@ func (s *UptimeService) DeleteMonitor(id string) error { return nil } + +// SyncAndCheckForHost creates a monitor for the given proxy host (if one +// doesn't already exist) and immediately triggers a health check in a +// background goroutine. It is safe to call from any goroutine. +// +// Designed to be called as `go svc.SyncAndCheckForHost(hostID)` so it +// does not block the API response. +func (s *UptimeService) SyncAndCheckForHost(hostID uint) { + // Check feature flag — bail if uptime is disabled + var setting models.Setting + if err := s.DB.Where("key = ?", "feature.uptime.enabled").First(&setting).Error; err == nil { + if setting.Value != "true" { + return + } + } + + // Per-host lock prevents duplicate monitors when multiple goroutines + // call SyncAndCheckForHost for the same hostID concurrently. + hostKey := fmt.Sprintf("proxy-%d", hostID) + s.hostMutexLock.Lock() + if s.hostMutexes[hostKey] == nil { + s.hostMutexes[hostKey] = &sync.Mutex{} + } + mu := s.hostMutexes[hostKey] + s.hostMutexLock.Unlock() + + mu.Lock() + defer mu.Unlock() + + // Look up the proxy host; it may have been deleted between the API + // response and this goroutine executing. + var host models.ProxyHost + if err := s.DB.Where("id = ?", hostID).First(&host).Error; err != nil { + hostIDStr := strconv.FormatUint(uint64(hostID), 10) + logger.Log().WithField("host_id", hostIDStr).Debug("SyncAndCheckForHost: proxy host not found (may have been deleted)") + return + } + + // Ensure a monitor exists for this host + var monitor models.UptimeMonitor + err := s.DB.Where("proxy_host_id = ?", host.ID).First(&monitor).Error + if errors.Is(err, gorm.ErrRecordNotFound) { + domains := strings.Split(host.DomainNames, ",") + firstDomain := "" + if len(domains) > 0 { + firstDomain = strings.TrimSpace(domains[0]) + } + + scheme := "http" + if host.SSLForced { + scheme = "https" + } + publicURL := fmt.Sprintf("%s://%s", scheme, firstDomain) + upstreamHost := host.ForwardHost + + name := host.Name + if name == "" { + name = firstDomain + } + + uptimeHostID := s.ensureUptimeHost(upstreamHost, name) + + monitor = models.UptimeMonitor{ + ProxyHostID: &host.ID, + UptimeHostID: &uptimeHostID, + Name: name, + Type: "http", + URL: publicURL, + UpstreamHost: upstreamHost, + Interval: 60, + Enabled: true, + Status: "pending", + } + if createErr := s.DB.Create(&monitor).Error; createErr != nil { + logger.Log().WithError(createErr).WithField("host_id", host.ID).Error("SyncAndCheckForHost: failed to create monitor") + return + } + } else if err != nil { + logger.Log().WithError(err).WithField("host_id", host.ID).Error("SyncAndCheckForHost: failed to query monitor") + return + } + + // Run health check immediately + s.checkMonitor(monitor) +} + +// CleanupStaleFailureCounts resets monitors that are stuck in "down" status +// with elevated failure counts from historical bugs (e.g., port mismatch era). +// Only resets monitors with no recent successful heartbeat in the last 24 hours. +func (s *UptimeService) CleanupStaleFailureCounts() error { + result := s.DB.Exec(` + UPDATE uptime_monitors SET failure_count = 0, status = 'pending' + WHERE status = 'down' + AND failure_count > 5 + AND id NOT IN ( + SELECT DISTINCT monitor_id FROM uptime_heartbeats + WHERE status = 'up' AND created_at > datetime('now', '-24 hours') + ) + `) + if result.Error != nil { + return fmt.Errorf("cleanup stale failure counts: %w", result.Error) + } + + if result.RowsAffected > 0 { + logger.Log().WithField("reset_count", result.RowsAffected).Info("Reset stale monitor failure counts") + } + + return nil +} diff --git a/backend/internal/services/uptime_service_pr1_test.go b/backend/internal/services/uptime_service_pr1_test.go new file mode 100644 index 00000000..162077ff --- /dev/null +++ b/backend/internal/services/uptime_service_pr1_test.go @@ -0,0 +1,522 @@ +package services + +import ( + "errors" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gorm.io/driver/sqlite" + "gorm.io/gorm" + + "github.com/Wikid82/charon/backend/internal/models" +) + +// setupPR1TestDB creates an in-memory SQLite database with all models needed +// for PR-1 uptime bug fix tests. +func setupPR1TestDB(t *testing.T) *gorm.DB { + t.Helper() + dir := t.TempDir() + dbPath := filepath.Join(dir, "pr1test.db") + dsn := dbPath + "?_journal_mode=WAL&_busy_timeout=5000" + db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{}) + require.NoError(t, err) + require.NoError(t, db.AutoMigrate( + &models.UptimeMonitor{}, + &models.UptimeHeartbeat{}, + &models.UptimeHost{}, + &models.ProxyHost{}, + &models.Setting{}, + )) + + t.Cleanup(func() { + sqlDB, _ := db.DB() + if sqlDB != nil { + _ = sqlDB.Close() + } + }) + + return db +} + +// enableUptimeFeature sets the feature.uptime.enabled setting to "true". +func enableUptimeFeature(t *testing.T, db *gorm.DB) { + t.Helper() + require.NoError(t, db.Create(&models.Setting{ + Key: "feature.uptime.enabled", + Value: "true", + Type: "bool", + Category: "feature", + }).Error) +} + +// createTestProxyHost creates a minimal proxy host for testing. +func createTestProxyHost(t *testing.T, db *gorm.DB, name, domain, forwardHost string) models.ProxyHost { + t.Helper() + host := models.ProxyHost{ + UUID: uuid.New().String(), + Name: name, + DomainNames: domain, + ForwardScheme: "http", + ForwardHost: forwardHost, + ForwardPort: 80, + Enabled: true, + } + require.NoError(t, db.Create(&host).Error) + return host +} + +func createAlwaysOKServer(t *testing.T) *httptest.Server { + t.Helper() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + t.Cleanup(server.Close) + return server +} + +func hostPortFromServerURL(serverURL string) string { + return strings.TrimPrefix(serverURL, "http://") +} + +// --- Fix 1: Singleton UptimeService --- + +func TestSingletonUptimeService_SharedState(t *testing.T) { + db := setupPR1TestDB(t) + svc := NewUptimeService(db, nil) + + // Verify both pendingNotifications and hostMutexes are the same instance + // by writing to the maps from the shared reference. + svc.pendingNotifications["test-key"] = &pendingHostNotification{} + assert.Contains(t, svc.pendingNotifications, "test-key", + "pendingNotifications should be shared on the same instance") + + // A second reference to the same service should see the same map state. + svc2 := svc // simulate routes.go passing the same pointer + assert.Contains(t, svc2.pendingNotifications, "test-key", + "second reference must share the same pendingNotifications map") +} + +// --- Fix 2: SyncAndCheckForHost --- + +func TestSyncAndCheckForHost_CreatesMonitorAndHeartbeat(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "test-host", domain, "192.168.1.100") + + // Execute synchronously (normally called as goroutine) + svc.SyncAndCheckForHost(host.ID) + + // Verify monitor was created + var monitor models.UptimeMonitor + err := db.Where("proxy_host_id = ?", host.ID).First(&monitor).Error + require.NoError(t, err, "monitor should be created for the proxy host") + assert.Equal(t, "http://"+domain, monitor.URL) + assert.Equal(t, "192.168.1.100", monitor.UpstreamHost) + assert.Contains(t, []string{"up", "down", "pending"}, monitor.Status, "status should be set by checkMonitor") + + // Verify at least one heartbeat was created (from the immediate check) + var hbCount int64 + db.Model(&models.UptimeHeartbeat{}).Where("monitor_id = ?", monitor.ID).Count(&hbCount) + assert.Greater(t, hbCount, int64(0), "at least one heartbeat should exist after SyncAndCheckForHost") +} + +func TestSyncAndCheckForHost_SSLForcedUsesHTTPS(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := models.ProxyHost{ + UUID: uuid.New().String(), + Name: "ssl-host", + DomainNames: domain, + ForwardScheme: "https", + ForwardHost: "192.168.1.200", + ForwardPort: 443, + SSLForced: true, + Enabled: true, + } + require.NoError(t, db.Create(&host).Error) + + svc.SyncAndCheckForHost(host.ID) + + var monitor models.UptimeMonitor + require.NoError(t, db.Where("proxy_host_id = ?", host.ID).First(&monitor).Error) + assert.Equal(t, "https://"+domain, monitor.URL) +} + +func TestSyncAndCheckForHost_DeletedHostNoPanic(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + + // Call with a host ID that doesn't exist — should log and return, not panic + assert.NotPanics(t, func() { + svc.SyncAndCheckForHost(99999) + }) + + // No monitor should be created + var count int64 + db.Model(&models.UptimeMonitor{}).Count(&count) + assert.Equal(t, int64(0), count) +} + +func TestSyncAndCheckForHost_ExistingMonitorSkipsCreate(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "existing-mon", domain, "10.0.0.1") + + // Pre-create a monitor + existingMonitor := models.UptimeMonitor{ + ID: uuid.New().String(), + ProxyHostID: &host.ID, + Name: "pre-existing", + Type: "http", + URL: "http://" + domain, + Interval: 60, + Enabled: true, + Status: "up", + } + require.NoError(t, db.Create(&existingMonitor).Error) + + svc.SyncAndCheckForHost(host.ID) + + // Should still be exactly 1 monitor + var count int64 + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count) + assert.Equal(t, int64(1), count, "should not create a duplicate monitor") +} + +// --- Fix 2 continued: Feature flag test --- + +func TestSyncAndCheckForHost_DisabledFeatureNoop(t *testing.T) { + db := setupPR1TestDB(t) + // Explicitly set feature to disabled + require.NoError(t, db.Create(&models.Setting{ + Key: "feature.uptime.enabled", + Value: "false", + Type: "bool", + Category: "feature", + }).Error) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "disabled-host", domain, "10.0.0.2") + + svc.SyncAndCheckForHost(host.ID) + + // No monitor should be created when feature is disabled + var count int64 + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count) + assert.Equal(t, int64(0), count, "no monitor should be created when feature is disabled") +} + +func TestSyncAndCheckForHost_MissingSetting_StillCreates(t *testing.T) { + db := setupPR1TestDB(t) + // No setting at all — the method should proceed (default: enabled behavior) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "no-setting", domain, "10.0.0.3") + + svc.SyncAndCheckForHost(host.ID) + + var count int64 + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count) + assert.Greater(t, count, int64(0), "monitor should be created when setting is missing (default: enabled)") +} + +func TestSyncAndCheckForHost_UsesDomainWhenHostNameMissing(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "", domain, "10.10.10.10") + + svc.SyncAndCheckForHost(host.ID) + + var monitor models.UptimeMonitor + require.NoError(t, db.Where("proxy_host_id = ?", host.ID).First(&monitor).Error) + assert.Equal(t, domain, monitor.Name) +} + +func TestSyncAndCheckForHost_CreateMonitorError_ReturnsWithoutPanic(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "create-error-host", domain, "10.10.10.11") + + callbackName := "test:force_uptime_monitor_create_error" + require.NoError(t, db.Callback().Create().Before("gorm:create").Register(callbackName, func(tx *gorm.DB) { + if tx.Statement != nil && tx.Statement.Schema != nil && tx.Statement.Schema.Name == "UptimeMonitor" { + _ = tx.AddError(errors.New("forced uptime monitor create error")) + } + })) + t.Cleanup(func() { + _ = db.Callback().Create().Remove(callbackName) + }) + + assert.NotPanics(t, func() { + svc.SyncAndCheckForHost(host.ID) + }) + + var count int64 + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count) + assert.Equal(t, int64(0), count) +} + +func TestSyncAndCheckForHost_QueryMonitorError_ReturnsWithoutPanic(t *testing.T) { + db := setupPR1TestDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + host := createTestProxyHost(t, db, "query-error-host", "query-error.example.com", "10.10.10.12") + + require.NoError(t, db.Migrator().DropTable(&models.UptimeMonitor{})) + + assert.NotPanics(t, func() { + svc.SyncAndCheckForHost(host.ID) + }) +} + +// --- Fix 4: CleanupStaleFailureCounts --- + +func TestCleanupStaleFailureCounts_ResetsStuckMonitors(t *testing.T) { + db := setupPR1TestDB(t) + svc := NewUptimeService(db, nil) + + // Create a "stuck" monitor: down, failure_count > 5, no recent UP heartbeat + stuckMonitor := models.UptimeMonitor{ + ID: uuid.New().String(), + Name: "stuck-monitor", + Type: "http", + URL: "http://stuck.example.com", + Interval: 60, + Enabled: true, + Status: "down", + FailureCount: 10, + } + require.NoError(t, db.Create(&stuckMonitor).Error) + + err := svc.CleanupStaleFailureCounts() + require.NoError(t, err) + + // Verify the monitor was reset + var m models.UptimeMonitor + require.NoError(t, db.First(&m, "id = ?", stuckMonitor.ID).Error) + assert.Equal(t, 0, m.FailureCount, "failure_count should be reset to 0") + assert.Equal(t, "pending", m.Status, "status should be reset to pending") +} + +func TestCleanupStaleFailureCounts_SkipsMonitorsWithRecentUpHeartbeat(t *testing.T) { + db := setupPR1TestDB(t) + svc := NewUptimeService(db, nil) + + // Create a monitor that is "down" with high failure_count BUT has a recent UP heartbeat + healthyMonitor := models.UptimeMonitor{ + ID: uuid.New().String(), + Name: "healthy-monitor", + Type: "http", + URL: "http://healthy.example.com", + Interval: 60, + Enabled: true, + Status: "down", + FailureCount: 10, + } + require.NoError(t, db.Create(&healthyMonitor).Error) + + // Add a recent UP heartbeat + hb := models.UptimeHeartbeat{ + MonitorID: healthyMonitor.ID, + Status: "up", + Latency: 50, + CreatedAt: time.Now().Add(-1 * time.Hour), // 1 hour ago — within 24h window + } + require.NoError(t, db.Create(&hb).Error) + + err := svc.CleanupStaleFailureCounts() + require.NoError(t, err) + + // Monitor should NOT be reset because it has a recent UP heartbeat + var m models.UptimeMonitor + require.NoError(t, db.First(&m, "id = ?", healthyMonitor.ID).Error) + assert.Equal(t, 10, m.FailureCount, "failure_count should NOT be reset since there's a recent UP heartbeat") + assert.Equal(t, "down", m.Status, "status should remain down") +} + +func TestCleanupStaleFailureCounts_SkipsLowFailureCount(t *testing.T) { + db := setupPR1TestDB(t) + svc := NewUptimeService(db, nil) + + // Monitor with failure_count <= 5 — should not be touched + monitor := models.UptimeMonitor{ + ID: uuid.New().String(), + Name: "low-failure-monitor", + Type: "http", + URL: "http://low.example.com", + Interval: 60, + Enabled: true, + Status: "down", + FailureCount: 3, + } + require.NoError(t, db.Create(&monitor).Error) + + err := svc.CleanupStaleFailureCounts() + require.NoError(t, err) + + var m models.UptimeMonitor + require.NoError(t, db.First(&m, "id = ?", monitor.ID).Error) + assert.Equal(t, 3, m.FailureCount, "low failure_count should not be reset") + assert.Equal(t, "down", m.Status) +} + +func TestCleanupStaleFailureCounts_DoesNotResetDownHosts(t *testing.T) { + db := setupPR1TestDB(t) + svc := NewUptimeService(db, nil) + + // Create a host that is currently down. + host := models.UptimeHost{ + ID: uuid.New().String(), + Host: "stuck-host.local", + Name: "stuck-host", + Status: "down", + FailureCount: 10, + } + require.NoError(t, db.Create(&host).Error) + + err := svc.CleanupStaleFailureCounts() + require.NoError(t, err) + + var h models.UptimeHost + require.NoError(t, db.First(&h, "id = ?", host.ID).Error) + assert.Equal(t, 10, h.FailureCount, "cleanup must not reset host failure_count") + assert.Equal(t, "down", h.Status, "cleanup must not reset host status") +} + +func TestCleanupStaleFailureCounts_ReturnsErrorWhenDatabaseUnavailable(t *testing.T) { + db := setupPR1TestDB(t) + svc := NewUptimeService(db, nil) + + sqlDB, err := db.DB() + require.NoError(t, err) + require.NoError(t, sqlDB.Close()) + + err = svc.CleanupStaleFailureCounts() + require.Error(t, err) + assert.Contains(t, err.Error(), "cleanup stale failure counts") +} + +// setupPR1ConcurrentDB creates a file-based SQLite database with WAL mode and +// busy_timeout to handle concurrent writes without "database table is locked". +func setupPR1ConcurrentDB(t *testing.T) *gorm.DB { + t.Helper() + dir := t.TempDir() + dbPath := filepath.Join(dir, "test.db") + dsn := dbPath + "?_journal_mode=WAL&_busy_timeout=5000" + db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{}) + require.NoError(t, err) + require.NoError(t, db.AutoMigrate( + &models.UptimeMonitor{}, + &models.UptimeHeartbeat{}, + &models.UptimeHost{}, + &models.ProxyHost{}, + &models.Setting{}, + )) + + t.Cleanup(func() { + sqlDB, _ := db.DB() + if sqlDB != nil { + _ = sqlDB.Close() + } + _ = os.Remove(dbPath) + }) + + return db +} + +// --- Concurrent access tests --- + +func TestSyncAndCheckForHost_ConcurrentCreates_NoDuplicates(t *testing.T) { + db := setupPR1ConcurrentDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + // Create multiple proxy hosts with unique domains + hosts := make([]models.ProxyHost, 5) + for i := range hosts { + hosts[i] = createTestProxyHost(t, db, + fmt.Sprintf("concurrent-host-%d", i), + domain, + fmt.Sprintf("10.0.0.%d", 100+i), + ) + } + + var wg sync.WaitGroup + for _, h := range hosts { + wg.Add(1) + go func(hostID uint) { + defer wg.Done() + svc.SyncAndCheckForHost(hostID) + }(h.ID) + } + wg.Wait() + + // Each host should have exactly 1 monitor + for _, h := range hosts { + var count int64 + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", h.ID).Count(&count) + assert.Equal(t, int64(1), count, "each proxy host should have exactly 1 monitor") + } +} + +func TestSyncAndCheckForHost_ConcurrentSameHost_NoDuplicates(t *testing.T) { + db := setupPR1ConcurrentDB(t) + enableUptimeFeature(t, db) + svc := NewUptimeService(db, nil) + server := createAlwaysOKServer(t) + domain := hostPortFromServerURL(server.URL) + + host := createTestProxyHost(t, db, "race-host", domain, "10.0.0.200") + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + svc.SyncAndCheckForHost(host.ID) + }() + } + wg.Wait() + + // Should still be exactly 1 monitor even after 10 concurrent calls + var count int64 + db.Model(&models.UptimeMonitor{}).Where("proxy_host_id = ?", host.ID).Count(&count) + assert.Equal(t, int64(1), count, "concurrent SyncAndCheckForHost should not create duplicates") +} diff --git a/backend/internal/services/uptime_service_test.go b/backend/internal/services/uptime_service_test.go index d9fc526a..e5480ce1 100644 --- a/backend/internal/services/uptime_service_test.go +++ b/backend/internal/services/uptime_service_test.go @@ -820,6 +820,277 @@ func TestUptimeService_CheckAll_Errors(t *testing.T) { }) } +func TestUptimeService_CheckAll_HostDown_PartitionsByMonitorType(t *testing.T) { + db := setupUptimeTestDB(t) + ns := NewNotificationService(db) + us := newTestUptimeService(t, db, ns) + + us.config.TCPTimeout = 50 * time.Millisecond + us.config.MaxRetries = 0 + us.config.FailureThreshold = 1 + us.config.CheckTimeout = 2 * time.Second + + listener, err := net.Listen("tcp", "127.0.0.1:0") + assert.NoError(t, err) + addr := listener.Addr().(*net.TCPAddr) + + server := &http.Server{ + Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }), + ReadHeaderTimeout: 10 * time.Second, + } + go func() { _ = server.Serve(listener) }() + t.Cleanup(func() { + _ = server.Close() + _ = listener.Close() + }) + + closedListener, err := net.Listen("tcp", "127.0.0.1:0") + assert.NoError(t, err) + closedPort := closedListener.Addr().(*net.TCPAddr).Port + _ = closedListener.Close() + + uptimeHost := models.UptimeHost{ + Host: "127.0.0.2", + Name: "Down Host", + Status: "pending", + } + err = db.Create(&uptimeHost).Error + assert.NoError(t, err) + + hostID := uptimeHost.ID + httpMonitor := models.UptimeMonitor{ + ID: "hostdown-http-monitor", + Name: "HTTP Monitor", + Type: "http", + URL: fmt.Sprintf("http://127.0.0.1:%d", addr.Port), + Enabled: true, + Status: "pending", + UptimeHostID: &hostID, + MaxRetries: 1, + } + tcpMonitor := models.UptimeMonitor{ + ID: "hostdown-tcp-monitor", + Name: "TCP Monitor", + Type: "tcp", + URL: fmt.Sprintf("127.0.0.2:%d", closedPort), + Enabled: true, + Status: "up", + UptimeHostID: &hostID, + MaxRetries: 1, + } + err = db.Create(&httpMonitor).Error + assert.NoError(t, err) + err = db.Create(&tcpMonitor).Error + assert.NoError(t, err) + + us.CheckAll() + + assert.Eventually(t, func() bool { + var refreshed models.UptimeHost + if db.Where("id = ?", uptimeHost.ID).First(&refreshed).Error != nil { + return false + } + return refreshed.Status == "down" + }, 3*time.Second, 25*time.Millisecond) + + assert.Eventually(t, func() bool { + var refreshed models.UptimeMonitor + if db.Where("id = ?", httpMonitor.ID).First(&refreshed).Error != nil { + return false + } + return refreshed.Status == "up" + }, 3*time.Second, 25*time.Millisecond) + + assert.Eventually(t, func() bool { + var refreshed models.UptimeMonitor + if db.Where("id = ?", tcpMonitor.ID).First(&refreshed).Error != nil { + return false + } + return refreshed.Status == "down" + }, 3*time.Second, 25*time.Millisecond) + + var httpHeartbeat models.UptimeHeartbeat + err = db.Where("monitor_id = ?", httpMonitor.ID).Order("created_at desc").First(&httpHeartbeat).Error + assert.NoError(t, err) + assert.Equal(t, "up", httpHeartbeat.Status) + assert.Contains(t, httpHeartbeat.Message, "HTTP 200") + assert.NotContains(t, httpHeartbeat.Message, "Host unreachable") + + var tcpHeartbeat models.UptimeHeartbeat + err = db.Where("monitor_id = ?", tcpMonitor.ID).Order("created_at desc").First(&tcpHeartbeat).Error + assert.NoError(t, err) + assert.Equal(t, "down", tcpHeartbeat.Status) + assert.Equal(t, "Host unreachable", tcpHeartbeat.Message) +} + +func TestUptimeService_CheckAll_ManualScheduledParity_ForHTTPOnHostDown(t *testing.T) { + db := setupUptimeTestDB(t) + ns := NewNotificationService(db) + us := newTestUptimeService(t, db, ns) + + us.config.TCPTimeout = 50 * time.Millisecond + us.config.MaxRetries = 0 + us.config.FailureThreshold = 1 + us.config.CheckTimeout = 2 * time.Second + + listener, err := net.Listen("tcp", "127.0.0.1:0") + assert.NoError(t, err) + addr := listener.Addr().(*net.TCPAddr) + + server := &http.Server{ + Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }), + ReadHeaderTimeout: 10 * time.Second, + } + go func() { _ = server.Serve(listener) }() + t.Cleanup(func() { + _ = server.Close() + _ = listener.Close() + }) + + uptimeHost := models.UptimeHost{ + Host: "127.0.0.2", + Name: "Parity Host", + Status: "pending", + } + err = db.Create(&uptimeHost).Error + assert.NoError(t, err) + + hostID := uptimeHost.ID + manualMonitor := models.UptimeMonitor{ + ID: "manual-http-parity", + Name: "Manual HTTP", + Type: "http", + URL: fmt.Sprintf("http://127.0.0.1:%d", addr.Port), + Enabled: true, + Status: "pending", + UptimeHostID: &hostID, + MaxRetries: 1, + } + scheduledMonitor := models.UptimeMonitor{ + ID: "scheduled-http-parity", + Name: "Scheduled HTTP", + Type: "http", + URL: fmt.Sprintf("http://127.0.0.1:%d", addr.Port), + Enabled: true, + Status: "pending", + UptimeHostID: &hostID, + MaxRetries: 1, + } + err = db.Create(&manualMonitor).Error + assert.NoError(t, err) + err = db.Create(&scheduledMonitor).Error + assert.NoError(t, err) + + us.CheckMonitor(manualMonitor) + + assert.Eventually(t, func() bool { + var refreshed models.UptimeMonitor + if db.Where("id = ?", manualMonitor.ID).First(&refreshed).Error != nil { + return false + } + return refreshed.Status == "up" + }, 2*time.Second, 25*time.Millisecond) + + us.CheckAll() + + assert.Eventually(t, func() bool { + var refreshed models.UptimeMonitor + if db.Where("id = ?", scheduledMonitor.ID).First(&refreshed).Error != nil { + return false + } + return refreshed.Status == "up" + }, 3*time.Second, 25*time.Millisecond) + + var manualResult models.UptimeMonitor + err = db.Where("id = ?", manualMonitor.ID).First(&manualResult).Error + assert.NoError(t, err) + + var scheduledResult models.UptimeMonitor + err = db.Where("id = ?", scheduledMonitor.ID).First(&scheduledResult).Error + assert.NoError(t, err) + + assert.Equal(t, "up", manualResult.Status) + assert.Equal(t, manualResult.Status, scheduledResult.Status) +} + +func TestUptimeService_CheckAll_ReachableHost_StillUsesHTTPResult(t *testing.T) { + db := setupUptimeTestDB(t) + ns := NewNotificationService(db) + us := newTestUptimeService(t, db, ns) + + us.config.TCPTimeout = 50 * time.Millisecond + us.config.MaxRetries = 0 + us.config.FailureThreshold = 1 + us.config.CheckTimeout = 2 * time.Second + + listener, err := net.Listen("tcp", "127.0.0.1:0") + assert.NoError(t, err) + addr := listener.Addr().(*net.TCPAddr) + + server := &http.Server{ + Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + }), + ReadHeaderTimeout: 10 * time.Second, + } + go func() { _ = server.Serve(listener) }() + t.Cleanup(func() { + _ = server.Close() + _ = listener.Close() + }) + + uptimeHost := models.UptimeHost{ + Host: "127.0.0.1", + Name: "Reachable Host", + Status: "pending", + } + err = db.Create(&uptimeHost).Error + assert.NoError(t, err) + + hostID := uptimeHost.ID + httpMonitor := models.UptimeMonitor{ + ID: "reachable-host-http-fail", + Name: "Reachable Host HTTP Failure", + Type: "http", + URL: fmt.Sprintf("http://127.0.0.1:%d", addr.Port), + Enabled: true, + Status: "pending", + UptimeHostID: &hostID, + MaxRetries: 1, + } + err = db.Create(&httpMonitor).Error + assert.NoError(t, err) + + us.CheckAll() + + assert.Eventually(t, func() bool { + var refreshedHost models.UptimeHost + if db.Where("id = ?", uptimeHost.ID).First(&refreshedHost).Error != nil { + return false + } + return refreshedHost.Status == "up" + }, 3*time.Second, 25*time.Millisecond) + + assert.Eventually(t, func() bool { + var refreshed models.UptimeMonitor + if db.Where("id = ?", httpMonitor.ID).First(&refreshed).Error != nil { + return false + } + return refreshed.Status == "down" + }, 3*time.Second, 25*time.Millisecond) + + var heartbeat models.UptimeHeartbeat + err = db.Where("monitor_id = ?", httpMonitor.ID).Order("created_at desc").First(&heartbeat).Error + assert.NoError(t, err) + assert.Equal(t, "down", heartbeat.Status) + assert.Contains(t, heartbeat.Message, "HTTP 500") + assert.NotContains(t, heartbeat.Message, "Host unreachable") +} + func TestUptimeService_CheckMonitor_EdgeCases(t *testing.T) { t.Run("invalid URL format", func(t *testing.T) { db := setupUptimeTestDB(t) diff --git a/docs/plans/archive/acl_security_headers_hotfix_plan.md b/docs/plans/archive/acl_security_headers_hotfix_plan.md new file mode 100644 index 00000000..81fc1c46 --- /dev/null +++ b/docs/plans/archive/acl_security_headers_hotfix_plan.md @@ -0,0 +1,270 @@ +# ACL + Security Headers Hotfix Plan (Proxy Host Create/Edit) + +## 1. Introduction + +### Overview +Hotfix request: Proxy Host form dropdown selections for Access Control List (ACL) and Security Headers are not being applied/persisted for new or edited hosts. + +Reported behavior: +1. Existing hosts with previously assigned ACL/Security Header profile retain old values. +2. Users cannot reliably remove or change those values in UI. +3. Newly created hosts cannot reliably apply ACL/Security Header profile. + +### Objective +Deliver an urgent but correct root-cause fix across frontend binding and backend persistence flow, with minimum user interruption and full validation gates. + +## 2. Research Findings (Current Architecture + Touchpoints) + +### Frontend Entry Points +1. `frontend/src/pages/ProxyHosts.tsx` + - `handleSubmit(data)` calls `updateHost(editingHost.uuid, data)` or `createHost(data)`. + - Renders `ProxyHostForm` modal for create/edit flows. +2. `frontend/src/components/ProxyHostForm.tsx` + - Local form state initializes `access_list_id` and `security_header_profile_id`. + - ACL control uses `AccessListSelector`. + - Security Headers control uses `Select` with `security_header_profile_id` mapping. + - Submission path: `handleSubmit` -> `onSubmit(payloadWithoutUptime)`. +3. `frontend/src/components/AccessListSelector.tsx` + - Converts select values between `string` and `number | null`. + +### Frontend API/Hooks +1. `frontend/src/hooks/useProxyHosts.ts` + - `createHost` -> `createProxyHost`. + - `updateHost` -> `updateProxyHost`. +2. `frontend/src/api/proxyHosts.ts` + - `createProxyHost(host: Partial)` -> `POST /api/v1/proxy-hosts`. + - `updateProxyHost(uuid, host)` -> `PUT /api/v1/proxy-hosts/:uuid`. + - Contract fields: `access_list_id`, `security_header_profile_id`. + +### Backend Entry/Transformation/Persistence +1. Route registration + - `backend/internal/api/routes/routes.go`: `proxyHostHandler.RegisterRoutes(protected)`. +2. Handler + - `backend/internal/api/handlers/proxy_host_handler.go` + - `Create(c)` uses `ShouldBindJSON(&models.ProxyHost{})`. + - `Update(c)` uses `map[string]any` partial update parsing. + - Target fields: + - `payload["access_list_id"]` -> `parseNullableUintField` -> `host.AccessListID` + - `payload["security_header_profile_id"]` -> typed conversion -> `host.SecurityHeaderProfileID` +3. Service + - `backend/internal/services/proxyhost_service.go` + - `Create(host)` validates + `db.Create(host)`. + - `Update(host)` validates + `db.Model(...).Select("*").Updates(host)`. +4. Model + - `backend/internal/models/proxy_host.go` + - `AccessListID *uint \`json:"access_list_id"\`` + - `SecurityHeaderProfileID *uint \`json:"security_header_profile_id"\`` + +### Existing Tests Relevant to Incident +1. Frontend unit regression coverage already exists: + - `frontend/src/components/__tests__/ProxyHostForm-dropdown-changes.test.tsx` +2. E2E regression spec exists: + - `tests/security-enforcement/acl-dropdown-regression.spec.ts` +3. Backend update and security-header tests exist: + - `backend/internal/api/handlers/proxy_host_handler_update_test.go` + - `backend/internal/api/handlers/proxy_host_handler_security_headers_test.go` + +## 3. Root-Cause-First Trace + +### Trace Model (Mandatory) +1. Entry Point: + - UI dropdown interactions in `ProxyHostForm` and `AccessListSelector`. +2. Transformation: + - Form state conversion (`string` <-> `number | null`) and payload construction in `ProxyHostForm`. + - API serialization via `frontend/src/api/proxyHosts.ts`. +3. Persistence: + - Backend `Update` parser (`proxy_host_handler.go`) and `ProxyHostService.Update` persistence. +4. Exit Point: + - Response body consumed by React Query invalidation/refetch in `useProxyHosts`. + - UI reflects updated values in table/form. + +### Most Likely Failure Zones +1. Frontend select binding/conversion drift (top candidate) + - Shared symptom across ACL and Security Headers points to form/select layer. + - Candidate files: + - `frontend/src/components/ProxyHostForm.tsx` + - `frontend/src/components/AccessListSelector.tsx` + - `frontend/src/components/ui/Select.tsx` +2. Payload mutation or stale form object behavior + - Ensure payload carries updated `access_list_id` / `security_header_profile_id` values at submit time. +3. Backend partial-update parser edge behavior + - Ensure `nil`, numeric string, and number conversions are consistent between ACL and security header profile paths. + +### Investigation Decision +Root-cause verification will be instrumented through failing-first Playwright scenario and targeted handler tests before applying code changes. + +## 4. EARS Requirements + +1. WHEN a user selects an ACL in the Proxy Host create/edit form, THE SYSTEM SHALL persist `access_list_id` and return it in API response. +2. WHEN a user changes ACL from one value to another, THE SYSTEM SHALL replace prior `access_list_id` with the new value. +3. WHEN a user selects "No Access Control", THE SYSTEM SHALL persist `access_list_id = null`. +4. WHEN a user selects a Security Headers profile in the Proxy Host create/edit form, THE SYSTEM SHALL persist `security_header_profile_id` and return it in API response. +5. WHEN a user changes Security Headers profile from one value to another, THE SYSTEM SHALL replace prior `security_header_profile_id` with the new value. +6. WHEN a user selects "None" for Security Headers, THE SYSTEM SHALL persist `security_header_profile_id = null`. +7. IF dropdown interaction fails to update internal form state, THEN THE SYSTEM SHALL prevent stale values from being persisted. +8. WHILE updating Proxy Host settings, THE SYSTEM SHALL maintain existing behavior for unrelated fields and not regress certificate, DNS challenge, or uptime-linked updates. + +Note: User-visible blocking error behavior is deferred unless required by confirmed root cause. + +## 5. Technical Specification (Hotfix Scope) + +### API Contract (No Breaking Change) +1. `POST /api/v1/proxy-hosts` + - Request fields include `access_list_id`, `security_header_profile_id` as nullable numeric fields. +2. `PUT /api/v1/proxy-hosts/:uuid` + - Partial payload accepts nullable updates for both fields. +3. Response must echo persisted values in snake_case: + - `access_list_id` + - `security_header_profile_id` + +### Data Model/DB +No schema migration expected. Existing nullable FK fields in `backend/internal/models/proxy_host.go` are sufficient. + +### Targeted Code Areas for Fix +1. Frontend + - `frontend/src/components/ProxyHostForm.tsx` + - `frontend/src/components/AccessListSelector.tsx` + - `frontend/src/components/ui/Select.tsx` (only if click/select propagation issue confirmed) + - `frontend/src/api/proxyHosts.ts` (only if serialization issue confirmed) +2. Backend + - `backend/internal/api/handlers/proxy_host_handler.go` (only if parsing/persistence mismatch confirmed) + - `backend/internal/services/proxyhost_service.go` (only if update write path proves incorrect) + +## 6. Edge Cases + +1. Edit host with existing ACL/profile and switch to another value. +2. Edit host with existing ACL/profile and clear to null. +3. Create new host with ACL/profile set before first save. +4. Submit with stringified numeric values (defensive compatibility). +5. Submit with null values for both fields simultaneously. +6. Missing/deleted profile or ACL IDs in backend (validation errors). +7. Multiple rapid dropdown changes before save (last selection wins). + +## 7. Risk Analysis + +### High Risk +1. Silent stale-state submission from form controls. +2. Regressing other Proxy Host settings due to broad payload mutation. + +### Medium Risk +1. Partial-update parser divergence between ACL and security profile behavior. +2. UI select portal/z-index interaction causing non-deterministic click handling. + +### Mitigations +1. Reproduce with Playwright first and capture exact failing action path. +2. Add/strengthen focused frontend tests around create/edit/clear flows. +3. Add/strengthen backend tests for nullable + conversion paths. +4. Keep hotfix minimal and avoid unrelated refactors. + +## 8. Implementation Plan (Urgent, Minimal Interruption) + +### Phase 1: Reproduction + Guardrails (Playwright First) +1. Execute targeted E2E spec for dropdown flow and create/edit persistence behavior. +2. Capture exact failure step and confirm whether failure is click binding, payload value, or backend persistence. +3. Add/adjust failing-first test if current suite does not capture observed production regression. + +### Phase 2: Frontend Fix +1. Patch select binding/state mapping for ACL and Security Headers in `ProxyHostForm`/`AccessListSelector`. +2. If needed, patch `ui/Select` interaction layering. +3. Ensure payload contains correct final `access_list_id` and `security_header_profile_id` values at submit. +4. Extend `ProxyHostForm` tests for create/edit/change/remove flows. + +### Phase 3: Backend Hardening (Conditional) +1. Only if frontend payload is correct but persistence is wrong: + - Backend fix MUST use field-scoped partial-update semantics for `access_list_id` and `security_header_profile_id` only (unless separately justified). + - Ensure write path persists null transitions reliably. +2. Add/adjust handler/service regression tests proving no unintended mutation of unrelated proxy host fields during these targeted updates. + +### Phase 4: Integration + Regression +1. Run complete targeted Proxy Host UI flow tests. +2. Validate list refresh and modal reopen reflect persisted values. +3. Validate no regressions in bulk ACL / bulk security-header operations. + +### Phase 5: Documentation + Handoff +1. Update changelog/release notes only for hotfix behavior. +2. Keep architecture docs unchanged unless root cause requires architectural note. +3. Handoff to Supervisor agent for review after plan approval and implementation. + +## 9. Acceptance Criteria + +1. ACL dropdown selection persists on create and edit. +2. Security Headers dropdown selection persists on create and edit. +3. Clearing ACL persists `null` and is reflected after reload. +4. Clearing Security Headers persists `null` and is reflected after reload. +5. Existing hosts can change from one ACL/profile to another without stale value retention. +6. New hosts can apply ACL/profile at creation time. +7. No regressions in unrelated proxy host fields. +8. All validation gates in Section 11 pass. +9. API create response returns persisted `access_list_id` and `security_header_profile_id` matching submitted values (including `null`). +10. API update response returns persisted `access_list_id` and `security_header_profile_id` after `value->value`, `value->null`, and `null->value` transitions. +11. Backend persistence verification confirms unrelated proxy host fields remain unchanged for targeted updates. + +## 10. PR Slicing Strategy + +### Decision +Single PR (hotfix-first), with contingency split only if backend root cause is confirmed late. + +### Rationale +1. Incident impact is immediate user-facing and concentrated in one feature path. +2. Frontend + targeted backend/test changes are tightly coupled for verification. +3. Single PR minimizes release coordination and user interruption. + +### Contingency (Only if split becomes necessary) +1. PR-1: Frontend binding + tests + - Scope: `ProxyHostForm`, `AccessListSelector`, `ui/Select` (if required), related tests. + - Dependency: none. + - Acceptance: UI submit payload verified correct in unit + Playwright. +2. PR-2: Backend parser/persistence + tests (conditional) + - Scope: `proxy_host_handler.go`, `proxyhost_service.go`, handler/service tests. + - Dependency: PR-1 merged or rebased for aligned contract. + - Acceptance: API update/create persist both nullable IDs correctly. +3. PR-3: Regression hardening + docs + - Scope: extra regression coverage, release-note hotfix entry. + - Dependency: PR-1/PR-2. + - Acceptance: full DoD validation sequence passes. + +## 11. Validation Plan (Mandatory Sequence) + +0. E2E environment prerequisite + - Determine rebuild necessity per testing policy: if application/runtime or Docker input changes are present, rebuild is required. + - If rebuild is required or the container is unhealthy, run `.github/skills/scripts/skill-runner.sh docker-rebuild-e2e`. + - Record container health outcome before executing tests. +1. Playwright first + - Run targeted Proxy Host dropdown and create/edit persistence scenarios. +2. Local patch coverage preflight + - Generate `test-results/local-patch-report.md` and `test-results/local-patch-report.json`. +3. Unit and coverage + - Backend coverage run (threshold >= 85%). + - Frontend coverage run (threshold >= 85%). +4. Type checks + - Frontend TypeScript check. +5. Pre-commit + - `pre-commit run --all-files` with zero blocking failures. +6. Security scans + - CodeQL Go + JS (security-and-quality). + - Findings check gate. + - Trivy scan. + - Conditional GORM security scan if model/DB-layer changes are made. +7. Build verification + - Backend build + frontend build pass. + +## 12. File Review: `.gitignore`, `codecov.yml`, `.dockerignore`, `Dockerfile` + +Assessment for this hotfix: +1. `.gitignore`: no required change for ACL/Security Headers hotfix. +2. `codecov.yml`: no required change; current exclusions/thresholds are compatible. +3. `.dockerignore`: no required change unless new hotfix-only artifact paths are introduced. +4. `Dockerfile`: no required change; incident is application logic/UI binding, not image build pipeline. + +If implementation introduces new persistent test artifacts, update ignore files in the same PR. + +## 13. Rollback and Contingency + +1. If hotfix causes regression in proxy host save flow, revert hotfix commit and redeploy prior stable build. +2. If frontend-only fix is insufficient, activate conditional backend phase immediately. +3. If validation gates fail on security/coverage, hold merge until fixed; no partial exception for this incident. +4. Post-rollback smoke checks: + - Create host with ACL/profile. + - Edit to different ACL/profile values. + - Clear both values to `null`. + - Verify persisted values in API response and after UI reload. diff --git a/docs/plans/current_spec.md b/docs/plans/current_spec.md index 81fc1c46..a69a91c1 100644 --- a/docs/plans/current_spec.md +++ b/docs/plans/current_spec.md @@ -1,270 +1,362 @@ -# ACL + Security Headers Hotfix Plan (Proxy Host Create/Edit) +# Uptime Monitoring Regression Investigation (Scheduled vs Manual) -## 1. Introduction +Date: 2026-03-01 +Owner: Planning Agent +Status: Investigation Complete, Fix Plan Proposed +Severity: High (false DOWN states on automated monitoring) -### Overview -Hotfix request: Proxy Host form dropdown selections for Access Control List (ACL) and Security Headers are not being applied/persisted for new or edited hosts. +## 1. Executive Summary -Reported behavior: -1. Existing hosts with previously assigned ACL/Security Header profile retain old values. -2. Users cannot reliably remove or change those values in UI. -3. Newly created hosts cannot reliably apply ACL/Security Header profile. +Two services (Wizarr and Charon) can flip to `DOWN` during scheduled cycles while manual checks immediately return `UP` because scheduled checks use a host-level TCP gate that can short-circuit monitor-level HTTP checks. -### Objective -Deliver an urgent but correct root-cause fix across frontend binding and backend persistence flow, with minimum user interruption and full validation gates. +The scheduled path is: +- `ticker -> CheckAll -> checkAllHosts -> (host status down) -> markHostMonitorsDown` -## 2. Research Findings (Current Architecture + Touchpoints) +The manual path is: +- `POST /api/v1/uptime/monitors/:id/check -> CheckMonitor -> checkMonitor` -### Frontend Entry Points -1. `frontend/src/pages/ProxyHosts.tsx` - - `handleSubmit(data)` calls `updateHost(editingHost.uuid, data)` or `createHost(data)`. - - Renders `ProxyHostForm` modal for create/edit flows. -2. `frontend/src/components/ProxyHostForm.tsx` - - Local form state initializes `access_list_id` and `security_header_profile_id`. - - ACL control uses `AccessListSelector`. - - Security Headers control uses `Select` with `security_header_profile_id` mapping. - - Submission path: `handleSubmit` -> `onSubmit(payloadWithoutUptime)`. -3. `frontend/src/components/AccessListSelector.tsx` - - Converts select values between `string` and `number | null`. +Only the scheduled path runs host precheck gating. If host precheck fails (TCP to upstream host/port), `CheckAll` skips HTTP checks and forcibly writes monitor status to `down` with heartbeat message `Host unreachable`. -### Frontend API/Hooks -1. `frontend/src/hooks/useProxyHosts.ts` - - `createHost` -> `createProxyHost`. - - `updateHost` -> `updateProxyHost`. -2. `frontend/src/api/proxyHosts.ts` - - `createProxyHost(host: Partial)` -> `POST /api/v1/proxy-hosts`. - - `updateProxyHost(uuid, host)` -> `PUT /api/v1/proxy-hosts/:uuid`. - - Contract fields: `access_list_id`, `security_header_profile_id`. +This is a backend state mutation problem (not only UI rendering). -### Backend Entry/Transformation/Persistence -1. Route registration - - `backend/internal/api/routes/routes.go`: `proxyHostHandler.RegisterRoutes(protected)`. -2. Handler - - `backend/internal/api/handlers/proxy_host_handler.go` - - `Create(c)` uses `ShouldBindJSON(&models.ProxyHost{})`. - - `Update(c)` uses `map[string]any` partial update parsing. - - Target fields: - - `payload["access_list_id"]` -> `parseNullableUintField` -> `host.AccessListID` - - `payload["security_header_profile_id"]` -> typed conversion -> `host.SecurityHeaderProfileID` -3. Service - - `backend/internal/services/proxyhost_service.go` - - `Create(host)` validates + `db.Create(host)`. - - `Update(host)` validates + `db.Model(...).Select("*").Updates(host)`. -4. Model - - `backend/internal/models/proxy_host.go` - - `AccessListID *uint \`json:"access_list_id"\`` - - `SecurityHeaderProfileID *uint \`json:"security_header_profile_id"\`` +## 1.1 Monitoring Policy (Authoritative Behavior) -### Existing Tests Relevant to Incident -1. Frontend unit regression coverage already exists: - - `frontend/src/components/__tests__/ProxyHostForm-dropdown-changes.test.tsx` -2. E2E regression spec exists: - - `tests/security-enforcement/acl-dropdown-regression.spec.ts` -3. Backend update and security-header tests exist: - - `backend/internal/api/handlers/proxy_host_handler_update_test.go` - - `backend/internal/api/handlers/proxy_host_handler_security_headers_test.go` +Charon uptime monitoring SHALL follow URL-truth semantics for HTTP/HTTPS monitors, +matching third-party external monitor behavior (Uptime Kuma style) without requiring +any additional service. -## 3. Root-Cause-First Trace +Policy: +- HTTP/HTTPS monitors are URL-truth based. The monitor result is authoritative based + on the configured URL check outcome (status code/timeout/TLS/connectivity from URL + perspective). +- Internal TCP reachability precheck (`ForwardHost:ForwardPort`) is + non-authoritative for HTTP/HTTPS monitor status. +- TCP monitors remain endpoint-socket checks and may rely on direct socket + reachability semantics. +- Host precheck may still be used for optimization, grouping telemetry, and operator + diagnostics, but SHALL NOT force HTTP/HTTPS monitors to DOWN. -### Trace Model (Mandatory) -1. Entry Point: - - UI dropdown interactions in `ProxyHostForm` and `AccessListSelector`. -2. Transformation: - - Form state conversion (`string` <-> `number | null`) and payload construction in `ProxyHostForm`. - - API serialization via `frontend/src/api/proxyHosts.ts`. -3. Persistence: - - Backend `Update` parser (`proxy_host_handler.go`) and `ProxyHostService.Update` persistence. -4. Exit Point: - - Response body consumed by React Query invalidation/refetch in `useProxyHosts`. - - UI reflects updated values in table/form. +## 2. Research Findings -### Most Likely Failure Zones -1. Frontend select binding/conversion drift (top candidate) - - Shared symptom across ACL and Security Headers points to form/select layer. - - Candidate files: - - `frontend/src/components/ProxyHostForm.tsx` - - `frontend/src/components/AccessListSelector.tsx` - - `frontend/src/components/ui/Select.tsx` -2. Payload mutation or stale form object behavior - - Ensure payload carries updated `access_list_id` / `security_header_profile_id` values at submit time. -3. Backend partial-update parser edge behavior - - Ensure `nil`, numeric string, and number conversions are consistent between ACL and security header profile paths. +### 2.1 Execution Path Comparison (Required) -### Investigation Decision -Root-cause verification will be instrumented through failing-first Playwright scenario and targeted handler tests before applying code changes. +### Scheduled path behavior +- Entry: `backend/internal/api/routes/routes.go` (background ticker, calls `uptimeService.CheckAll()`) +- `CheckAll()` calls `checkAllHosts()` first. + - File: `backend/internal/services/uptime_service.go:354` +- `checkAllHosts()` updates each `UptimeHost.Status` via TCP checks in `checkHost()`. + - File: `backend/internal/services/uptime_service.go:395` +- `checkHost()` dials `UptimeHost.Host` + monitor port (prefer `ProxyHost.ForwardPort`, fallback to URL port). + - File: `backend/internal/services/uptime_service.go:437` +- Back in `CheckAll()`, monitors are grouped by `UptimeHostID`. + - File: `backend/internal/services/uptime_service.go:367` +- If `UptimeHost.Status == "down"`, `markHostMonitorsDown()` is called and individual monitor checks are skipped. + - File: `backend/internal/services/uptime_service.go:381` + - File: `backend/internal/services/uptime_service.go:593` -## 4. EARS Requirements +### Manual path behavior +- Entry: `POST /api/v1/uptime/monitors/:id/check`. + - Handler: `backend/internal/api/handlers/uptime_handler.go:107` +- Calls `service.CheckMonitor(*monitor)` asynchronously. + - File: `backend/internal/services/uptime_service.go:707` +- `checkMonitor()` performs direct HTTP/TCP monitor check and updates monitor + heartbeat. + - File: `backend/internal/services/uptime_service.go:711` -1. WHEN a user selects an ACL in the Proxy Host create/edit form, THE SYSTEM SHALL persist `access_list_id` and return it in API response. -2. WHEN a user changes ACL from one value to another, THE SYSTEM SHALL replace prior `access_list_id` with the new value. -3. WHEN a user selects "No Access Control", THE SYSTEM SHALL persist `access_list_id = null`. -4. WHEN a user selects a Security Headers profile in the Proxy Host create/edit form, THE SYSTEM SHALL persist `security_header_profile_id` and return it in API response. -5. WHEN a user changes Security Headers profile from one value to another, THE SYSTEM SHALL replace prior `security_header_profile_id` with the new value. -6. WHEN a user selects "None" for Security Headers, THE SYSTEM SHALL persist `security_header_profile_id = null`. -7. IF dropdown interaction fails to update internal form state, THEN THE SYSTEM SHALL prevent stale values from being persisted. -8. WHILE updating Proxy Host settings, THE SYSTEM SHALL maintain existing behavior for unrelated fields and not regress certificate, DNS challenge, or uptime-linked updates. +### Key divergence +- Scheduled: host-gated (precheck can override monitor) +- Manual: direct monitor check (no host gate) -Note: User-visible blocking error behavior is deferred unless required by confirmed root cause. +## 3. Root Cause With Evidence -## 5. Technical Specification (Hotfix Scope) +## 3.1 Primary Root Cause: Host Precheck Overrides HTTP Success in Scheduled Cycles -### API Contract (No Breaking Change) -1. `POST /api/v1/proxy-hosts` - - Request fields include `access_list_id`, `security_header_profile_id` as nullable numeric fields. -2. `PUT /api/v1/proxy-hosts/:uuid` - - Partial payload accepts nullable updates for both fields. -3. Response must echo persisted values in snake_case: - - `access_list_id` - - `security_header_profile_id` +When `UptimeHost` is marked `down`, scheduled checks do not run `checkMonitor()` for that host's monitors. Instead they call `markHostMonitorsDown()` which: +- sets each monitor `Status = "down"` +- writes `UptimeHeartbeat{Status: "down", Message: "Host unreachable"}` +- maxes failure count (`FailureCount = MaxRetries`) -### Data Model/DB -No schema migration expected. Existing nullable FK fields in `backend/internal/models/proxy_host.go` are sufficient. +Evidence: +- Short-circuit: `backend/internal/services/uptime_service.go:381` +- Forced down write: `backend/internal/services/uptime_service.go:610` +- Forced heartbeat message: `backend/internal/services/uptime_service.go:624` -### Targeted Code Areas for Fix -1. Frontend - - `frontend/src/components/ProxyHostForm.tsx` - - `frontend/src/components/AccessListSelector.tsx` - - `frontend/src/components/ui/Select.tsx` (only if click/select propagation issue confirmed) - - `frontend/src/api/proxyHosts.ts` (only if serialization issue confirmed) -2. Backend - - `backend/internal/api/handlers/proxy_host_handler.go` (only if parsing/persistence mismatch confirmed) - - `backend/internal/services/proxyhost_service.go` (only if update write path proves incorrect) +This exactly matches symptom pattern: +1. Manual refresh sets monitor `UP` via direct HTTP check. +2. Next scheduler cycle can force it back to `DOWN` from host precheck path. -## 6. Edge Cases +## 3.2 Hypothesis Check: TCP precheck can fail while public URL HTTP check succeeds -1. Edit host with existing ACL/profile and switch to another value. -2. Edit host with existing ACL/profile and clear to null. -3. Create new host with ACL/profile set before first save. -4. Submit with stringified numeric values (defensive compatibility). -5. Submit with null values for both fields simultaneously. -6. Missing/deleted profile or ACL IDs in backend (validation errors). -7. Multiple rapid dropdown changes before save (last selection wins). +Confirmed as plausible by design: +- `checkHost()` tests upstream reachability (`ForwardHost:ForwardPort`) from Charon runtime. +- `checkMonitor()` tests monitor URL (public domain URL, often via Caddy/public routing). -## 7. Risk Analysis +A service can be publicly reachable by monitor URL while upstream TCP precheck fails due to network namespace/routing/DNS/hairpin differences. -### High Risk -1. Silent stale-state submission from form controls. -2. Regressing other Proxy Host settings due to broad payload mutation. +This is especially likely for: +- self-referential routes (Charon monitoring Charon via public hostname) +- host/container networking asymmetry +- services reachable through proxy path but not directly on upstream socket from current runtime context -### Medium Risk -1. Partial-update parser divergence between ACL and security profile behavior. -2. UI select portal/z-index interaction causing non-deterministic click handling. +## 3.3 Recent Change Correlation (Required) -### Mitigations -1. Reproduce with Playwright first and capture exact failing action path. -2. Add/strengthen focused frontend tests around create/edit/clear flows. -3. Add/strengthen backend tests for nullable + conversion paths. -4. Keep hotfix minimal and avoid unrelated refactors. +### `SyncAndCheckForHost` (regression amplifier) +- Introduced in commit `2cd19d89` and called from proxy host create path. +- Files: + - `backend/internal/services/uptime_service.go:1195` + - `backend/internal/api/handlers/proxy_host_handler.go:418` +- Behavior: creates/syncs monitor and immediately runs `checkMonitor()`. -## 8. Implementation Plan (Urgent, Minimal Interruption) +Impact: makes monitors quickly show `UP` after create/manual, then scheduler can flip to `DOWN` if host precheck fails. This increased visibility of scheduled/manual inconsistency. -### Phase 1: Reproduction + Guardrails (Playwright First) -1. Execute targeted E2E spec for dropdown flow and create/edit persistence behavior. -2. Capture exact failure step and confirm whether failure is click binding, payload value, or backend persistence. -3. Add/adjust failing-first test if current suite does not capture observed production regression. +### `CleanupStaleFailureCounts` +- Introduced in `2cd19d89`, refined in `7a12ab79`. +- File: `backend/internal/services/uptime_service.go:1277` +- It runs at startup and resets stale monitor states only; not per-cycle override logic. +- Not root cause of recurring per-cycle flip. -### Phase 2: Frontend Fix -1. Patch select binding/state mapping for ACL and Security Headers in `ProxyHostForm`/`AccessListSelector`. -2. If needed, patch `ui/Select` interaction layering. -3. Ensure payload contains correct final `access_list_id` and `security_header_profile_id` values at submit. -4. Extend `ProxyHostForm` tests for create/edit/change/remove flows. +### Frontend effective status changes +- Latest commit `0241de69` refactors `effectiveStatus` handling. +- File: `frontend/src/pages/Uptime.tsx`. +- Backend evidence proves this is not visual-only: scheduler writes `down` heartbeats/messages directly in DB. -### Phase 3: Backend Hardening (Conditional) -1. Only if frontend payload is correct but persistence is wrong: - - Backend fix MUST use field-scoped partial-update semantics for `access_list_id` and `security_header_profile_id` only (unless separately justified). - - Ensure write path persists null transitions reliably. -2. Add/adjust handler/service regression tests proving no unintended mutation of unrelated proxy host fields during these targeted updates. +## 3.4 Grouping Logic Analysis (`UptimeHost`/`UpstreamHost`) -### Phase 4: Integration + Regression -1. Run complete targeted Proxy Host UI flow tests. -2. Validate list refresh and modal reopen reflect persisted values. -3. Validate no regressions in bulk ACL / bulk security-header operations. +Monitors are grouped by `UptimeHostID` in `CheckAll()`. `UptimeHost` is derived from `ProxyHost.ForwardHost` in sync flows. -### Phase 5: Documentation + Handoff -1. Update changelog/release notes only for hotfix behavior. -2. Keep architecture docs unchanged unless root cause requires architectural note. -3. Handoff to Supervisor agent for review after plan approval and implementation. +Relevant code: +- group map by `UptimeHostID`: `backend/internal/services/uptime_service.go:367` +- host linkage in sync: `backend/internal/services/uptime_service.go:189`, `backend/internal/services/uptime_service.go:226` +- sync single-host update path: `backend/internal/services/uptime_service.go:1023` -## 9. Acceptance Criteria +Risk: one host precheck failure can mark all grouped monitors down without URL-level validation. -1. ACL dropdown selection persists on create and edit. -2. Security Headers dropdown selection persists on create and edit. -3. Clearing ACL persists `null` and is reflected after reload. -4. Clearing Security Headers persists `null` and is reflected after reload. -5. Existing hosts can change from one ACL/profile to another without stale value retention. -6. New hosts can apply ACL/profile at creation time. -7. No regressions in unrelated proxy host fields. -8. All validation gates in Section 11 pass. -9. API create response returns persisted `access_list_id` and `security_header_profile_id` matching submitted values (including `null`). -10. API update response returns persisted `access_list_id` and `security_header_profile_id` after `value->value`, `value->null`, and `null->value` transitions. -11. Backend persistence verification confirms unrelated proxy host fields remain unchanged for targeted updates. +## 4. Technical Specification (Fix Plan) + +## 4.1 Minimal Proper Fix (First) + +Goal: eliminate false DOWN while preserving existing behavior as much as possible. + +Change `CheckAll()` host-down branch to avoid hard override for HTTP/HTTPS monitors. + +Mandatory hotfix rule: +- WHEN a host precheck is `down`, THE SYSTEM SHALL partition host monitors by type inside `CheckAll()`. +- `markHostMonitorsDown` MUST be invoked only for `tcp` monitors. +- `http`/`https` monitors MUST still run through `checkMonitor()` and MUST NOT be force-written `down` by the host precheck path. +- Host precheck outcomes MAY be recorded for optimization/telemetry/grouping, but MUST NOT be treated as final status for `http`/`https` monitors. + +Proposed rule: +1. If host is down: + - For `http`/`https` monitors: still run `checkMonitor()` (do not force down). + - For `tcp` monitors: keep current host-down fast-path (`markHostMonitorsDown`) or direct tcp check. +2. If host is not down: + - Keep existing behavior (run `checkMonitor()` for all monitors). + +Rationale: +- Aligns scheduled behavior with manual for URL-based monitors. +- Preserves reverse proxy product semantics where public URL availability is the source of truth. +- Minimal code delta in `CheckAll()` decision branch. +- Preserves optimization for true TCP-only monitors. + +### Exact file/function targets +- `backend/internal/services/uptime_service.go` + - `CheckAll()` + - add small helper (optional): `partitionMonitorsByType(...)` + +## 4.2 Long-Term Robust Fix (Deferred) + +Introduce host precheck as advisory signal, not authoritative override. + +Design: +1. Add `HostReachability` result to run context (not persisted as forced monitor status). +2. Always execute per-monitor checks, but use host precheck to: + - tune retries/backoff + - annotate failure reason + - optimize notification batching +3. Optionally add feature flag: + - `feature.uptime.strict_host_precheck` (default `false`) + - allows legacy strict gating in environments that want it. + +Benefits: +- Removes false DOWN caused by precheck mismatch. +- Keeps performance and batching controls. +- More explicit semantics for operators. + +## 5. API/Schema Impact + +No API contract change required for minimal fix. +No database migration required for minimal fix. + +Long-term fix may add one feature flag setting only. + +## 6. EARS Requirements + +### Ubiquitous +- THE SYSTEM SHALL evaluate HTTP/HTTPS monitor availability using URL-level checks as the authoritative signal. + +### Event-driven +- WHEN the scheduled uptime cycle runs, THE SYSTEM SHALL execute HTTP/HTTPS monitor checks regardless of internal host precheck state. +- WHEN the scheduled uptime cycle runs and host precheck is down, THE SYSTEM SHALL apply host-level forced-down logic only to TCP monitors. + +### State-driven +- WHILE a monitor type is `http` or `https`, THE SYSTEM SHALL NOT force monitor status to `down` solely from internal host precheck failure. +- WHILE a monitor type is `tcp`, THE SYSTEM SHALL evaluate status using endpoint socket reachability semantics. + +### Unwanted behavior +- IF internal host precheck is unreachable AND URL-level HTTP/HTTPS check returns success, THEN THE SYSTEM SHALL set monitor status to `up`. +- IF internal host precheck is reachable AND URL-level HTTP/HTTPS check fails, THEN THE SYSTEM SHALL set monitor status to `down`. + +### Optional +- WHERE host precheck telemetry is enabled, THE SYSTEM SHALL record host-level reachability for diagnostics and grouping without overriding HTTP/HTTPS monitor final state. + +## 7. Implementation Plan + +### Phase 1: Reproduction Lock-In (Tests First) +- Add backend service test proving current regression: + - host precheck fails + - monitor URL check would succeed + - scheduled `CheckAll()` currently writes down (existing behavior) +- File: `backend/internal/services/uptime_service_test.go` (new test block) + +### Phase 2: Minimal Backend Fix +- Update `CheckAll()` branch logic to run HTTP/HTTPS monitors even when host is down. +- Make monitor partitioning explicit and mandatory in `CheckAll()` host-down branch. +- Add an implementation guard before partitioning: normalize monitor type using + `strings.TrimSpace` + `strings.ToLower` to prevent `HTTP`/`HTTPS` case + regressions and whitespace-related misclassification. +- Ensure `markHostMonitorsDown` is called only for TCP monitor partitions. +- File: `backend/internal/services/uptime_service.go` + +### Phase 3: Backend Validation +- Add/adjust tests: + - scheduled path no longer forces down when HTTP succeeds + - manual and scheduled reach same final state for HTTP monitors + - internal host unreachable + public URL HTTP 200 => monitor is `UP` + - internal host reachable + public URL failure => monitor is `DOWN` + - TCP monitor behavior unchanged under host-down conditions +- Files: + - `backend/internal/services/uptime_service_test.go` + - `backend/internal/services/uptime_service_race_test.go` (if needed for concurrency side-effects) + +### Phase 4: Integration/E2E Coverage +- Add targeted API-level integration test for scheduler vs manual parity. +- Add Playwright scenario for: + - monitor set UP by manual check + - remains UP after scheduled cycle when URL is reachable +- Add parity scenario for: + - internal TCP precheck unreachable + URL returns 200 => `UP` + - internal TCP precheck reachable + URL failure => `DOWN` +- Files: + - `backend/internal/api/routes/routes_test.go` (or uptime handler integration suite) + - `tests/monitoring/uptime-monitoring.spec.ts` (or equivalent uptime spec file) + +Scope note: +- This hotfix plan is intentionally limited to backend behavior correction and + regression tests (unit/integration/E2E). +- Dedicated documentation-phase work is deferred and out of scope for this + hotfix PR. + +## 8. Test Plan (Unit / Integration / E2E) + +Duplicate notification definition (hotfix acceptance/testing): +- A duplicate notification means the same `(monitor_id, status, + scheduler_tick_id)` is emitted more than once within a single scheduler run. + +## Unit Tests +1. `CheckAll_HostDown_DoesNotForceDown_HTTPMonitor_WhenHTTPCheckSucceeds` +2. `CheckAll_HostDown_StillHandles_TCPMonitor_Conservatively` +3. `CheckAll_ManualAndScheduledParity_HTTPMonitor` +4. `CheckAll_InternalHostUnreachable_PublicURL200_HTTPMonitorEndsUp` (blocking) +5. `CheckAll_InternalHostReachable_PublicURLFail_HTTPMonitorEndsDown` (blocking) + +## Integration Tests +1. Scheduler endpoint (`/api/v1/system/uptime/check`) parity with monitor check endpoint. +2. Verify DB heartbeat message is real HTTP result (not `Host unreachable`) for HTTP monitors where URL is reachable. +3. Verify when host precheck is down, HTTP monitor heartbeat/notification output is derived from `checkMonitor()` (not synthetic host-path `Host unreachable`). +4. Verify no duplicate notifications are emitted from host+monitor paths for the same scheduler run, where duplicate is defined as repeated `(monitor_id, status, scheduler_tick_id)`. +5. Verify internal host precheck unreachable + public URL 200 still resolves monitor `UP`. +6. Verify internal host precheck reachable + public URL failure resolves monitor `DOWN`. + +## E2E Tests +1. Create/sync monitor scenario where manual refresh returns `UP`. +2. Wait one scheduler interval. +3. Assert monitor remains `UP` and latest heartbeat is not forced `Host unreachable` for reachable URL. +4. Assert scenario: internal host precheck unreachable + public URL 200 => monitor remains `UP`. +5. Assert scenario: internal host precheck reachable + public URL failure => monitor is `DOWN`. + +## Regression Guardrails +- Add a test explicitly asserting that host precheck must not unconditionally override HTTP monitor checks. +- Add explicit assertions that HTTP monitors under host-down precheck emit + check-derived heartbeat messages and do not produce duplicate notifications + under the `(monitor_id, status, scheduler_tick_id)` rule within a single + scheduler run. + +## 9. Risks and Rollback + +## Risks +1. More HTTP checks under true host outage may increase check volume. +2. Notification patterns may shift from single host-level event to monitor-level batched events. +3. Edge cases for mixed-type monitor groups (HTTP + TCP) need deterministic behavior. + +## Mitigations +1. Preserve batching (`queueDownNotification`) and existing retry thresholds. +2. Keep TCP strict path unchanged in minimal fix. +3. Add explicit log fields and targeted tests for mixed groups. + +## Rollback Plan +1. Revert the `CheckAll()` branch change only (single-file rollback). +2. Keep added tests; mark expected behavior as legacy if temporary rollback needed. +3. If necessary, introduce temporary feature toggle to switch between strict and tolerant host gating. ## 10. PR Slicing Strategy -### Decision -Single PR (hotfix-first), with contingency split only if backend root cause is confirmed late. +Decision: Single focused PR (hotfix + tests) -### Rationale -1. Incident impact is immediate user-facing and concentrated in one feature path. -2. Frontend + targeted backend/test changes are tightly coupled for verification. -3. Single PR minimizes release coordination and user interruption. +Trigger reasons: +- High-severity runtime behavior fix requiring minimal blast radius +- Fast review/rollback with behavior-only delta plus regression coverage +- Avoid scope creep into optional hardening/feature-flag work -### Contingency (Only if split becomes necessary) -1. PR-1: Frontend binding + tests - - Scope: `ProxyHostForm`, `AccessListSelector`, `ui/Select` (if required), related tests. - - Dependency: none. - - Acceptance: UI submit payload verified correct in unit + Playwright. -2. PR-2: Backend parser/persistence + tests (conditional) - - Scope: `proxy_host_handler.go`, `proxyhost_service.go`, handler/service tests. - - Dependency: PR-1 merged or rebased for aligned contract. - - Acceptance: API update/create persist both nullable IDs correctly. -3. PR-3: Regression hardening + docs - - Scope: extra regression coverage, release-note hotfix entry. - - Dependency: PR-1/PR-2. - - Acceptance: full DoD validation sequence passes. +### PR-1 (Hotfix + Tests) +Scope: +- `CheckAll()` host-down branch adjustment for HTTP/HTTPS +- Unit/integration/E2E regression tests for URL-truth semantics -## 11. Validation Plan (Mandatory Sequence) +Files: +- `backend/internal/services/uptime_service.go` +- `backend/internal/services/uptime_service_test.go` +- `backend/internal/api/routes/routes_test.go` (or equivalent) +- `tests/monitoring/uptime-monitoring.spec.ts` (or equivalent) -0. E2E environment prerequisite - - Determine rebuild necessity per testing policy: if application/runtime or Docker input changes are present, rebuild is required. - - If rebuild is required or the container is unhealthy, run `.github/skills/scripts/skill-runner.sh docker-rebuild-e2e`. - - Record container health outcome before executing tests. -1. Playwright first - - Run targeted Proxy Host dropdown and create/edit persistence scenarios. -2. Local patch coverage preflight - - Generate `test-results/local-patch-report.md` and `test-results/local-patch-report.json`. -3. Unit and coverage - - Backend coverage run (threshold >= 85%). - - Frontend coverage run (threshold >= 85%). -4. Type checks - - Frontend TypeScript check. -5. Pre-commit - - `pre-commit run --all-files` with zero blocking failures. -6. Security scans - - CodeQL Go + JS (security-and-quality). - - Findings check gate. - - Trivy scan. - - Conditional GORM security scan if model/DB-layer changes are made. -7. Build verification - - Backend build + frontend build pass. +Validation gates: +- backend unit tests pass +- targeted uptime integration tests pass +- targeted uptime E2E tests pass +- no behavior regression in existing `CheckAll` tests -## 12. File Review: `.gitignore`, `codecov.yml`, `.dockerignore`, `Dockerfile` +Rollback: +- single revert of PR-1 commit -Assessment for this hotfix: -1. `.gitignore`: no required change for ACL/Security Headers hotfix. -2. `codecov.yml`: no required change; current exclusions/thresholds are compatible. -3. `.dockerignore`: no required change unless new hotfix-only artifact paths are introduced. -4. `Dockerfile`: no required change; incident is application logic/UI binding, not image build pipeline. +## 11. Acceptance Criteria (DoD) -If implementation introduces new persistent test artifacts, update ignore files in the same PR. +1. Scheduled and manual checks produce consistent status for HTTP/HTTPS monitors. +2. A reachable monitor URL is not forced to `DOWN` solely by host precheck failure. +3. New regression tests fail before fix and pass after fix. +4. No break in TCP monitor behavior expectations. +5. No new critical/high security findings in touched paths. +6. Blocking parity case passes: internal host precheck unreachable + public URL 200 => scheduled result is `UP`. +7. Blocking parity case passes: internal host precheck reachable + public URL failure => scheduled result is `DOWN`. +8. Under host-down precheck, HTTP monitors produce check-derived heartbeat messages (not synthetic `Host unreachable` from host path). +9. No duplicate notifications are produced by host+monitor paths within a + single scheduler run, where duplicate is defined as repeated + `(monitor_id, status, scheduler_tick_id)`. -## 13. Rollback and Contingency +## 12. Implementation Risks -1. If hotfix causes regression in proxy host save flow, revert hotfix commit and redeploy prior stable build. -2. If frontend-only fix is insufficient, activate conditional backend phase immediately. -3. If validation gates fail on security/coverage, hold merge until fixed; no partial exception for this incident. -4. Post-rollback smoke checks: - - Create host with ACL/profile. - - Edit to different ACL/profile values. - - Clear both values to `null`. - - Verify persisted values in API response and after UI reload. +1. Increased scheduler workload during host-precheck failures because HTTP/HTTPS checks continue to run. +2. Notification cadence may change due to check-derived monitor outcomes replacing host-forced synthetic downs. +3. Mixed monitor groups (TCP + HTTP/HTTPS) require strict ordering/partitioning to avoid regression. + +Mitigations: +- Keep change localized to `CheckAll()` host-down branch decisioning. +- Add explicit regression tests for both parity directions and mixed monitor types. +- Keep rollback path as single-commit revert. diff --git a/docs/reports/qa_report.md b/docs/reports/qa_report.md index 77915271..b2dc9a57 100644 --- a/docs/reports/qa_report.md +++ b/docs/reports/qa_report.md @@ -1,85 +1,80 @@ -double check our caddy version# QA Report: Nightly Workflow Fix Audit +## QA Report - PR #779 -- Date: 2026-02-27 -- Scope: - - `.github/workflows/nightly-build.yml` - 1. `pr_number` failure avoidance in nightly dispatch path - 2. Deterministic Syft SBOM generation with fallback - - `.github/workflows/security-pr.yml` contract check (`pr_number` required) +- Date: 2026-03-01 +- Scope: Post-remediation merge-readiness gates after Caddy Import E2E fix -## Findings (Ordered by Severity) +## E2E Status -### ✅ No blocking findings in audited scope +- Command status provided by current PR context: + `npx playwright test --project=chromium --project=firefox --project=webkit tests/core/caddy-import` +- Result: `106 passed, 0 failed, 0 skipped` +- Gate: PASS -1. `actionlint` validation passed for modified workflow. - - Command: `actionlint .github/workflows/nightly-build.yml` - - Result: PASS (no diagnostics) +## Patch Report Status -2. `pr_number` nightly dispatch failure path is avoided by excluding PR-only workflow from nightly fan-out. - - `security-pr.yml` removed from dispatch list in `.github/workflows/nightly-build.yml:103` - - Explicit log note added at `.github/workflows/nightly-build.yml:110` +- Command: `bash scripts/local-patch-report.sh` +- Artifacts: + - `test-results/local-patch-report.md` (present) + - `test-results/local-patch-report.json` (present) +- Result: PASS (artifacts generated) +- Notes: + - Warning: overall patch coverage `81.7%` below advisory threshold `90.0%` + - Warning: backend patch coverage `81.6%` below advisory threshold `85.0%` -3. SBOM generation is now deterministic with explicit primary pin and verified fallback. - - Primary action pins Syft version at `.github/workflows/nightly-build.yml:231` - - Fallback installs pinned `v1.42.1` with checksum verification at `.github/workflows/nightly-build.yml:245` - - Mandatory artifact verification added at `.github/workflows/nightly-build.yml:268` +## Backend Coverage -4. No permission broadening in modified sections. - - Dispatch job permissions remain `actions: write`, `contents: read` at `.github/workflows/nightly-build.yml:84` - - Build job permissions remain `contents: read`, `packages: write`, `id-token: write` at `.github/workflows/nightly-build.yml:145` - - Diff review confirms no `permissions` changes in the modified hunk. +- Command: `.github/skills/scripts/skill-runner.sh test-backend-coverage` +- Result: PASS +- Metrics: + - Statement coverage: `87.5%` + - Line coverage: `87.7%` + - Gate threshold observed in run: `87%` -5. Action pinning remains SHA-based in modified sections. - - `actions/github-script` pinned SHA at `.github/workflows/nightly-build.yml:89` - - `anchore/sbom-action` pinned SHA at `.github/workflows/nightly-build.yml:226` - - `actions/upload-artifact` pinned SHA at `.github/workflows/nightly-build.yml:283` +## Frontend Coverage -6. `security-pr.yml` contract still requires `pr_number`. - - `workflow_dispatch.inputs.pr_number.required: true` at `.github/workflows/security-pr.yml:14` +- Command: `.github/skills/scripts/skill-runner.sh test-frontend-coverage` +- Result: FAIL +- Failure root cause: + - Test timeout at `frontend/src/components/__tests__/ProxyHostForm.test.tsx:1419` + - Failing test: `maps remote docker container to remote host and public port` + - Error: `Test timed out in 5000ms` +- Coverage snapshot produced before failure: + - Statements: `88.95%` + - Lines: `89.62%` + - Functions: `86.05%` + - Branches: `81.3%` -## Pass/Fail Decision +## Typecheck -- QA Status: **PASS with caveats** -- Reason: All requested static validations pass and the scoped workflow logic changes satisfy the audit requirements. +- Command: `npm --prefix frontend run type-check` +- Result: PASS -## Residual Risks +## Pre-commit -1. Fallback integrity uses checksum file from the same release origin as the tarball. - - Impact: If release origin is compromised, checksum verification alone may not detect tampering. - - Suggested hardening: verify signed release metadata or verify Syft artifact signature (Cosign/GitHub attestations) in fallback path. +- Command: `pre-commit run --all-files` +- Result: PASS +- Notable hooks: `golangci-lint (Fast Linters - BLOCKING)`, `Frontend TypeScript Check`, `Frontend Lint (Fix)` all passed -2. Runtime behavior is not fully proven by local static checks. - - Impact: Dispatch and SBOM behavior still require a real GitHub Actions run to prove end-to-end execution. +## Security Scans -## Remote Execution Limitation and Manual Verification +- Trivy filesystem scan: + - Command: `.github/skills/scripts/skill-runner.sh security-scan-trivy` + - Result: PASS + - Critical/High findings: `0/0` -I did not execute remote nightly runs for this exact local diff in this audit. Local `actionlint` and source inspection were performed. To validate end-to-end behavior on GitHub Actions, run: +- Docker image scan: + - Command: `.github/skills/scripts/skill-runner.sh security-scan-docker-image` + - Result: PASS + - Critical/High findings: `0/0` + - Additional findings: `10 medium`, `3 low` (non-blocking) -```bash -cd /projects/Charon +## Remediation Required Before Merge -# 1) Syntax/lint (already run locally) -actionlint .github/workflows/nightly-build.yml +1. Stabilize the timed-out frontend test at `frontend/src/components/__tests__/ProxyHostForm.test.tsx:1419`. +2. Re-run `.github/skills/scripts/skill-runner.sh test-frontend-coverage` until the suite is fully green. +3. Optional quality improvement: raise patch coverage warnings (`81.7%` overall, `81.6%` backend) with targeted tests on uncovered changed lines from `test-results/local-patch-report.md`. -# 2) Trigger nightly workflow (manual) -gh workflow run nightly-build.yml --ref nightly -f reason="qa-nightly-audit" -f skip_tests=true +## Final Merge Recommendation -# 3) Inspect latest nightly run -gh run list --workflow "Nightly Build & Package" --branch nightly --limit 1 -gh run view --log - -# 4) Confirm no security-pr dispatch error in nightly logs -# Expectation: no "Missing required input 'pr_number' not provided" - -# 5) Confirm security-pr contract still enforced -gh workflow run security-pr.yml --ref nightly -# Expectation: dispatch rejected due to required missing input pr_number - -# 6) Positive contract check with explicit pr_number -gh workflow run security-pr.yml --ref nightly -f pr_number= -``` - -Expected outcomes: -- Nightly run completes dispatch phase without `pr_number` input failure. -- SBOM generation succeeds via primary or fallback path and uploads `sbom-nightly.json`. -- `security-pr.yml` continues enforcing required `pr_number` for manual dispatch. +- Recommendation: **NO-GO** +- Reason: Required frontend coverage gate did not pass due to a deterministic test timeout. diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 505b725f..b8d9823b 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -41,7 +41,7 @@ "@testing-library/jest-dom": "^6.9.1", "@testing-library/react": "^16.3.2", "@testing-library/user-event": "^14.6.1", - "@types/node": "^25.3.2", + "@types/node": "^25.3.3", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", "@typescript-eslint/eslint-plugin": "^8.56.1", @@ -3565,9 +3565,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "25.3.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.2.tgz", - "integrity": "sha512-RpV6r/ij22zRRdyBPcxDeKAzH43phWVKEjL2iksqo1Vz3CuBUrgmPpPhALKiRfU7OMCmeeO9vECBMsV0hMTG8Q==", + "version": "25.3.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.3.tgz", + "integrity": "sha512-DpzbrH7wIcBaJibpKo9nnSQL0MTRdnWttGyE5haGwK86xgMOkFLp7vEyfQPGLOJh5wNYiJ3V9PmUMDhV9u8kkQ==", "dev": true, "license": "MIT", "dependencies": { @@ -4350,9 +4350,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001774", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001774.tgz", - "integrity": "sha512-DDdwPGz99nmIEv216hKSgLD+D4ikHQHjBC/seF98N9CPqRX4M5mSxT9eTV6oyisnJcuzxtZy4n17yKKQYmYQOA==", + "version": "1.0.30001775", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001775.tgz", + "integrity": "sha512-s3Qv7Lht9zbVKE9XoTyRG6wVDCKdtOFIjBGg3+Yhn6JaytuNKPIjBMTMIY1AnOH3seL5mvF+x33oGAyK3hVt3A==", "dev": true, "funding": [ { diff --git a/frontend/package.json b/frontend/package.json index ccafb968..79ec151e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -60,7 +60,7 @@ "@testing-library/jest-dom": "^6.9.1", "@testing-library/react": "^16.3.2", "@testing-library/user-event": "^14.6.1", - "@types/node": "^25.3.2", + "@types/node": "^25.3.3", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", "@typescript-eslint/eslint-plugin": "^8.56.1", diff --git a/frontend/src/components/__tests__/ProxyHostForm.test.tsx b/frontend/src/components/__tests__/ProxyHostForm.test.tsx index 9e7f57b8..c579f072 100644 --- a/frontend/src/components/__tests__/ProxyHostForm.test.tsx +++ b/frontend/src/components/__tests__/ProxyHostForm.test.tsx @@ -1440,12 +1440,17 @@ describe('ProxyHostForm', () => { ) - await userEvent.type(screen.getByLabelText(/^Name/), 'Remote Mapping') - await userEvent.type(screen.getByPlaceholderText('example.com, www.example.com'), 'remote.existing.com') + fireEvent.change(screen.getByLabelText(/^Name/), { target: { value: 'Remote Mapping' } }) + fireEvent.change(screen.getByPlaceholderText('example.com, www.example.com'), { target: { value: 'remote.existing.com' } }) await selectComboboxOption('Source', 'Local Docker Registry (localhost)') await selectComboboxOption('Containers', 'remote-app (nginx:latest)') + await waitFor(() => { + expect(screen.getByLabelText(/^Host$/)).toHaveValue('localhost') + expect(screen.getByLabelText(/^Port$/)).toHaveValue(18080) + }) + await userEvent.click(screen.getByText('Save')) await waitFor(() => { @@ -1454,7 +1459,7 @@ describe('ProxyHostForm', () => { forward_port: 18080, })) }) - }) + }, 15000) it('updates domain using selected container when base domain changes', async () => { const { useDocker } = await import('../../hooks/useDocker') diff --git a/frontend/src/locales/de/translation.json b/frontend/src/locales/de/translation.json index e8610749..e40b3da1 100644 --- a/frontend/src/locales/de/translation.json +++ b/frontend/src/locales/de/translation.json @@ -423,7 +423,9 @@ "triggerCheck": "Sofortige Gesundheitsprüfung auslösen", "healthCheckTriggered": "Gesundheitsprüfung ausgelöst", "monitorDeleted": "Monitor gelöscht", - "deleteConfirm": "Diesen Monitor löschen? Dies kann nicht rückgängig gemacht werden." + "deleteConfirm": "Diesen Monitor löschen? Dies kann nicht rückgängig gemacht werden.", + "pending": "PRÜFUNG...", + "pendingFirstCheck": "Warten auf erste Prüfung..." }, "domains": { "title": "Domänen", diff --git a/frontend/src/locales/en/translation.json b/frontend/src/locales/en/translation.json index f90c22c3..04eca004 100644 --- a/frontend/src/locales/en/translation.json +++ b/frontend/src/locales/en/translation.json @@ -498,7 +498,9 @@ "monitorUrl": "URL", "monitorTypeHttp": "HTTP", "monitorTypeTcp": "TCP", - "urlPlaceholder": "https://example.com or tcp://host:port" + "urlPlaceholder": "https://example.com or tcp://host:port", + "pending": "CHECKING...", + "pendingFirstCheck": "Waiting for first check..." }, "domains": { "title": "Domains", diff --git a/frontend/src/locales/es/translation.json b/frontend/src/locales/es/translation.json index 07593570..a9067bbe 100644 --- a/frontend/src/locales/es/translation.json +++ b/frontend/src/locales/es/translation.json @@ -423,7 +423,9 @@ "triggerCheck": "Activar verificación de salud inmediata", "healthCheckTriggered": "Verificación de salud activada", "monitorDeleted": "Monitor eliminado", - "deleteConfirm": "¿Eliminar este monitor? Esto no se puede deshacer." + "deleteConfirm": "¿Eliminar este monitor? Esto no se puede deshacer.", + "pending": "VERIFICANDO...", + "pendingFirstCheck": "Esperando primera verificación..." }, "domains": { "title": "Dominios", diff --git a/frontend/src/locales/fr/translation.json b/frontend/src/locales/fr/translation.json index 9853dffc..525cec3f 100644 --- a/frontend/src/locales/fr/translation.json +++ b/frontend/src/locales/fr/translation.json @@ -423,7 +423,9 @@ "triggerCheck": "Déclencher une vérification de santé immédiate", "healthCheckTriggered": "Vérification de santé déclenchée", "monitorDeleted": "Moniteur supprimé", - "deleteConfirm": "Supprimer ce moniteur? Cette action est irréversible." + "deleteConfirm": "Supprimer ce moniteur? Cette action est irréversible.", + "pending": "VÉRIFICATION...", + "pendingFirstCheck": "En attente de la première vérification..." }, "domains": { "title": "Domaines", diff --git a/frontend/src/locales/zh/translation.json b/frontend/src/locales/zh/translation.json index 09e96cdd..885d64b9 100644 --- a/frontend/src/locales/zh/translation.json +++ b/frontend/src/locales/zh/translation.json @@ -423,7 +423,9 @@ "triggerCheck": "触发即时健康检查", "healthCheckTriggered": "健康检查已触发", "monitorDeleted": "监控器已删除", - "deleteConfirm": "删除此监控器?此操作无法撤销。" + "deleteConfirm": "删除此监控器?此操作无法撤销。", + "pending": "检查中...", + "pendingFirstCheck": "等待首次检查..." }, "domains": { "title": "域名", diff --git a/frontend/src/pages/Uptime.tsx b/frontend/src/pages/Uptime.tsx index 25cd4871..8bbcfada 100644 --- a/frontend/src/pages/Uptime.tsx +++ b/frontend/src/pages/Uptime.tsx @@ -2,10 +2,22 @@ import { useMemo, useState, type FC, type FormEvent } from 'react'; import { useTranslation } from 'react-i18next'; import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; import { getMonitors, getMonitorHistory, updateMonitor, deleteMonitor, checkMonitor, createMonitor, syncMonitors, UptimeMonitor } from '../api/uptime'; -import { Activity, ArrowUp, ArrowDown, Settings, X, Pause, RefreshCw, Plus } from 'lucide-react'; +import { Activity, ArrowUp, ArrowDown, Settings, X, Pause, RefreshCw, Plus, Loader } from 'lucide-react'; import { toast } from 'react-hot-toast' import { formatDistanceToNow } from 'date-fns'; +type BaseMonitorStatus = 'up' | 'down' | 'pending'; +type EffectiveMonitorStatus = BaseMonitorStatus | 'paused'; + +const normalizeMonitorStatus = (status: string | undefined): BaseMonitorStatus => { + const normalized = status?.toLowerCase(); + if (normalized === 'up' || normalized === 'down' || normalized === 'pending') { + return normalized; + } + + return 'down'; +}; + const MonitorCard: FC<{ monitor: UptimeMonitor; onEdit: (monitor: UptimeMonitor) => void; t: (key: string, options?: Record) => string }> = ({ monitor, onEdit, t }) => { const { data: history } = useQuery({ queryKey: ['uptimeHistory', monitor.id], @@ -64,24 +76,33 @@ const MonitorCard: FC<{ monitor: UptimeMonitor; onEdit: (monitor: UptimeMonitor) ? history.reduce((a, b) => new Date(a.created_at) > new Date(b.created_at) ? a : b) : null - const isUp = latestBeat ? latestBeat.status === 'up' : monitor.status === 'up'; + const hasHistory = Boolean(history && history.length > 0); const isPaused = monitor.enabled === false; + const effectiveStatus: EffectiveMonitorStatus = isPaused + ? 'paused' + : latestBeat + ? (latestBeat.status === 'up' ? 'up' : 'down') + : monitor.status === 'pending' && !hasHistory + ? 'pending' + : normalizeMonitorStatus(monitor.status); return ( -
+
{/* Top Row: Name (left), Badge (center-right), Settings (right) */}

{monitor.name}

- {isPaused ? : isUp ? : } - {isPaused ? t('uptime.paused') : monitor.status.toUpperCase()} + : effectiveStatus === 'pending' + ? 'bg-amber-100 text-amber-800 dark:bg-amber-900 dark:text-amber-200 animate-pulse motion-reduce:animate-none' + : effectiveStatus === 'up' + ? 'bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200' + : 'bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200' + }`} data-testid="status-badge" data-status={effectiveStatus} role="status" aria-label={effectiveStatus === 'paused' ? t('uptime.paused') : effectiveStatus === 'pending' ? t('uptime.pending') : effectiveStatus === 'up' ? 'UP' : 'DOWN'}> + {effectiveStatus === 'paused' ? : effectiveStatus === 'pending' ?
diff --git a/frontend/src/pages/__tests__/Uptime.spec.tsx b/frontend/src/pages/__tests__/Uptime.spec.tsx index b86ed566..924fb785 100644 --- a/frontend/src/pages/__tests__/Uptime.spec.tsx +++ b/frontend/src/pages/__tests__/Uptime.spec.tsx @@ -230,4 +230,59 @@ describe('Uptime page', () => { expect(screen.getByText('RemoteMon')).toBeInTheDocument() expect(screen.getByText('OtherMon')).toBeInTheDocument() }) + + it('shows CHECKING... state for pending monitor with no history', async () => { + const monitor = { + id: 'm13', name: 'PendingMonitor', url: 'http://example.com', type: 'http', interval: 60, enabled: true, + status: 'pending', last_check: null, latency: 0, max_retries: 3, + } + vi.mocked(uptimeApi.getMonitors).mockResolvedValue([monitor]) + vi.mocked(uptimeApi.getMonitorHistory).mockResolvedValue([]) + + renderWithProviders() + await waitFor(() => expect(screen.getByText('PendingMonitor')).toBeInTheDocument()) + const badge = screen.getByTestId('status-badge') + expect(badge).toHaveAttribute('data-status', 'pending') + expect(badge).toHaveAttribute('role', 'status') + expect(badge.textContent).toContain('CHECKING...') + expect(badge.className).toContain('bg-amber-100') + expect(badge.className).toContain('animate-pulse') + expect(screen.getByText('Waiting for first check...')).toBeInTheDocument() + }) + + it('treats pending monitor with heartbeat history as normal (not pending)', async () => { + const monitor = { + id: 'm14', name: 'PendingWithHistory', url: 'http://example.com', type: 'http', interval: 60, enabled: true, + status: 'pending', last_check: new Date().toISOString(), latency: 10, max_retries: 3, + } + const history = [ + { id: 1, monitor_id: 'm14', status: 'up', latency: 10, message: 'OK', created_at: new Date().toISOString() }, + ] + vi.mocked(uptimeApi.getMonitors).mockResolvedValue([monitor]) + vi.mocked(uptimeApi.getMonitorHistory).mockResolvedValue(history) + + renderWithProviders() + await waitFor(() => expect(screen.getByText('PendingWithHistory')).toBeInTheDocument()) + await waitFor(() => { + const badge = screen.getByTestId('status-badge') + expect(badge.textContent).not.toContain('CHECKING...') + expect(badge.className).toContain('bg-green-100') + }) + }) + + it('shows DOWN indicator for down monitor (no regression)', async () => { + const monitor = { + id: 'm15', name: 'DownMonitor', url: 'http://example.com', type: 'http', interval: 60, enabled: true, + status: 'down', last_check: new Date().toISOString(), latency: 0, max_retries: 3, + } + vi.mocked(uptimeApi.getMonitors).mockResolvedValue([monitor]) + vi.mocked(uptimeApi.getMonitorHistory).mockResolvedValue([]) + + renderWithProviders() + await waitFor(() => expect(screen.getByText('DownMonitor')).toBeInTheDocument()) + const badge = screen.getByTestId('status-badge') + expect(badge).toHaveAttribute('data-status', 'down') + expect(badge.textContent).toContain('DOWN') + expect(badge.className).toContain('bg-red-100') + }) }) diff --git a/frontend/src/pages/__tests__/Uptime.test.tsx b/frontend/src/pages/__tests__/Uptime.test.tsx index 53776e7b..96b0e93d 100644 --- a/frontend/src/pages/__tests__/Uptime.test.tsx +++ b/frontend/src/pages/__tests__/Uptime.test.tsx @@ -139,6 +139,23 @@ describe('Uptime page', () => { expect(screen.getByText('Loading monitors...')).toBeInTheDocument() }) + it('falls back to DOWN status when monitor status is unknown', async () => { + const { getMonitors, getMonitorHistory } = await import('../../api/uptime') + const monitor = { + id: 'm-unknown-status', name: 'UnknownStatusMonitor', url: 'http://example.com', type: 'http', interval: 60, enabled: true, + status: 'mystery', last_check: new Date().toISOString(), latency: 10, max_retries: 3, + } + vi.mocked(getMonitors).mockResolvedValue([monitor]) + vi.mocked(getMonitorHistory).mockResolvedValue([]) + + renderWithQueryClient() + await waitFor(() => expect(screen.getByText('UnknownStatusMonitor')).toBeInTheDocument()) + + const badge = screen.getByTestId('status-badge') + expect(badge).toHaveAttribute('data-status', 'down') + expect(badge).toHaveTextContent('DOWN') + }) + it('renders empty state when no monitors exist', async () => { const { getMonitors } = await import('../../api/uptime') vi.mocked(getMonitors).mockResolvedValue([]) diff --git a/go.work.sum b/go.work.sum index 7e4b3b20..468746d5 100644 --- a/go.work.sum +++ b/go.work.sum @@ -6,6 +6,7 @@ github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjH github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= github.com/containerd/typeurl/v2 v2.2.0 h1:6NBDbQzr7I5LHgp34xAXYF5DOTQDn05X58lsPEmzLso= @@ -70,28 +71,31 @@ github.com/spf13/viper v1.15.0/go.mod h1:fFcTBJxvhhzSJiZy8n+PeW6t8l+KeT/uTARa0jH github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8= github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.2.0/go.mod h1:3dlrS0iBaWKYVt2ZfA4cj48umJZ+cAEbR6/SjLA88I8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= -golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/oauth2 v0.6.0 h1:Lh8GPgSKBfWSwFvtuWOfeI3aAAnbXTSutYxJiOJFgIw= golang.org/x/oauth2 v0.6.0/go.mod h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= @@ -107,10 +111,10 @@ golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= -golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= @@ -121,8 +125,11 @@ gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= lukechampine.com/uint128 v1.2.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +lukechampine.com/uint128 v1.3.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= modernc.org/cc/v3 v3.40.0/go.mod h1:/bTg4dnWkSXowUO6ssQKnOV0yMVxDYNIsIrzqTFDGH0= +modernc.org/cc/v3 v3.41.0/go.mod h1:Ni4zjJYJ04CDOhG7dn640WGfwBzfE0ecX8TyMB0Fv0Y= modernc.org/ccgo/v3 v3.16.13/go.mod h1:2Quk+5YgpImhPjv2Qsob1DnZ/4som1lJTodubIcoUkY= +modernc.org/ccgo/v3 v3.16.15/go.mod h1:yT7B+/E2m43tmMOT51GMoM98/MtHIcQQSleGnddkUNI= modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= modernc.org/strutil v1.1.3/go.mod h1:MEHNA7PdEnEwLvspRMtWTNnp2nnyvMfkimT1NKNAGbw= diff --git a/package-lock.json b/package-lock.json index 23f89488..3143f390 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,7 @@ "devDependencies": { "@bgotink/playwright-coverage": "^0.3.2", "@playwright/test": "^1.58.2", - "@types/node": "^25.3.2", + "@types/node": "^25.3.3", "dotenv": "^17.3.1", "markdownlint-cli2": "^0.21.0", "prettier": "^3.8.1", @@ -937,9 +937,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "25.3.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.2.tgz", - "integrity": "sha512-RpV6r/ij22zRRdyBPcxDeKAzH43phWVKEjL2iksqo1Vz3CuBUrgmPpPhALKiRfU7OMCmeeO9vECBMsV0hMTG8Q==", + "version": "25.3.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.3.tgz", + "integrity": "sha512-DpzbrH7wIcBaJibpKo9nnSQL0MTRdnWttGyE5haGwK86xgMOkFLp7vEyfQPGLOJh5wNYiJ3V9PmUMDhV9u8kkQ==", "devOptional": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 7c640572..b46bfeb2 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "devDependencies": { "@bgotink/playwright-coverage": "^0.3.2", "@playwright/test": "^1.58.2", - "@types/node": "^25.3.2", + "@types/node": "^25.3.3", "dotenv": "^17.3.1", "markdownlint-cli2": "^0.21.0", "prettier": "^3.8.1", diff --git a/playwright.config.js b/playwright.config.js index 1c6cd9ee..aa82818d 100644 --- a/playwright.config.js +++ b/playwright.config.js @@ -270,7 +270,13 @@ export default defineConfig({ }, dependencies: browserDependencies, testMatch: /.*\.spec\.(ts|js)$/, - testIgnore: ['**/frontend/**', '**/node_modules/**', '**/backend/**', '**/security-enforcement/**', '**/security/**'], + testIgnore: [ + '**/frontend/**', + '**/node_modules/**', + '**/backend/**', + '**/security-enforcement/**', + '**/security/**', + ], }, { @@ -281,7 +287,13 @@ export default defineConfig({ }, dependencies: browserDependencies, testMatch: /.*\.spec\.(ts|js)$/, - testIgnore: ['**/frontend/**', '**/node_modules/**', '**/backend/**', '**/security-enforcement/**', '**/security/**'], + testIgnore: [ + '**/frontend/**', + '**/node_modules/**', + '**/backend/**', + '**/security-enforcement/**', + '**/security/**', + ], }, { @@ -292,7 +304,13 @@ export default defineConfig({ }, dependencies: browserDependencies, testMatch: /.*\.spec\.(ts|js)$/, - testIgnore: ['**/frontend/**', '**/node_modules/**', '**/backend/**', '**/security-enforcement/**', '**/security/**'], + testIgnore: [ + '**/frontend/**', + '**/node_modules/**', + '**/backend/**', + '**/security-enforcement/**', + '**/security/**', + ], }, /* Test against mobile viewports. */ diff --git a/scripts/go_update.sh b/scripts/go_update.sh new file mode 100755 index 00000000..8df50953 --- /dev/null +++ b/scripts/go_update.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# This script updates Go module dependencies for the project. + +cd /projects/Charon/backend || exit + +echo "Updating Go module dependencies..." + +go get -u ./... +go mod tidy +go mod verify +go vet ./... +go list -m -u all +go build ./... + +echo "Go module dependencies updated successfully." diff --git a/tests/core/caddy-import/caddy-import-cross-browser.spec.ts b/tests/core/caddy-import/caddy-import-cross-browser.spec.ts index 0afa8346..703bbbd6 100644 --- a/tests/core/caddy-import/caddy-import-cross-browser.spec.ts +++ b/tests/core/caddy-import/caddy-import-cross-browser.spec.ts @@ -184,7 +184,9 @@ async function setupImportMocks( } async function gotoImportPageWithAuthRecovery(page: Page, adminUser: TestUser): Promise { - await ensureImportUiPreconditions(page, adminUser); + await expect(async () => { + await ensureImportUiPreconditions(page, adminUser); + }).toPass({ timeout: 15000 }); } test.describe('Caddy Import - Cross-Browser @cross-browser', () => { diff --git a/tests/core/caddy-import/caddy-import-firefox.spec.ts b/tests/core/caddy-import/caddy-import-firefox.spec.ts index b1df798f..1885da0f 100644 --- a/tests/core/caddy-import/caddy-import-firefox.spec.ts +++ b/tests/core/caddy-import/caddy-import-firefox.spec.ts @@ -20,11 +20,11 @@ import { test, expect } from '../../fixtures/auth-fixtures'; import { Page } from '@playwright/test'; -import { ensureImportUiPreconditions, resetImportSession, waitForSuccessfulImportResponse } from './import-page-helpers'; - -function firefoxOnly(browserName: string) { - test.skip(browserName !== 'firefox', 'This suite only runs on Firefox'); -} +import { + ensureImportUiPreconditions, + resetImportSession, + waitForSuccessfulImportResponse, +} from './import-page-helpers'; /** * Helper to set up import API mocks @@ -91,10 +91,6 @@ async function setupImportMocks(page: Page, success: boolean = true) { } test.describe('Caddy Import - Firefox-Specific @firefox-only', () => { - test.beforeEach(async ({ browserName }) => { - firefoxOnly(browserName); - }); - /** * TEST 1: Event listener attachment verification * Ensures the Parse button has proper click handlers in Firefox @@ -213,10 +209,12 @@ test.describe('Caddy Import - Firefox-Specific @firefox-only', () => { await textarea.fill('cors-test.example.com { reverse_proxy localhost:3000 }'); const parseButton = page.getByRole('button', { name: /parse|review/i }); - await parseButton.click(); - - // Wait for response - await page.waitForResponse((r) => r.url().includes('/api/v1/import/upload'), { timeout: 5000 }); + await waitForSuccessfulImportResponse( + page, + () => parseButton.click(), + 'firefox-cors-same-origin', + /\/api\/v1\/import\/upload/i + ); // Verify no CORS issues expect(corsIssues).toHaveLength(0); @@ -247,21 +245,26 @@ test.describe('Caddy Import - Firefox-Specific @firefox-only', () => { await textarea.fill('auth-test.example.com { reverse_proxy localhost:3000 }'); const parseButton = page.getByRole('button', { name: /parse|review/i }); - await parseButton.click(); - - // Wait for request to complete - await page.waitForResponse((r) => r.url().includes('/api/v1/import/upload'), { timeout: 5000 }); + const uploadResponse = await waitForSuccessfulImportResponse( + page, + () => parseButton.click(), + 'firefox-auth-headers', + /\/api\/v1\/import\/upload/i + ); // Verify headers were captured - expect(Object.keys(requestHeaders).length).toBeGreaterThan(0); + const sentHeaders = Object.keys(requestHeaders).length > 0 + ? requestHeaders + : uploadResponse.request().headers(); + expect(Object.keys(sentHeaders).length).toBeGreaterThan(0); // Verify cookie or authorization header present - const hasCookie = !!requestHeaders['cookie']; - const hasAuth = !!requestHeaders['authorization']; + const hasCookie = !!sentHeaders['cookie']; + const hasAuth = !!sentHeaders['authorization']; expect(hasCookie || hasAuth).toBeTruthy(); // Verify content-type is correct - expect(requestHeaders['content-type']).toContain('application/json'); + expect(sentHeaders['content-type']).toContain('application/json'); }); }); diff --git a/tests/core/caddy-import/caddy-import-gaps.spec.ts b/tests/core/caddy-import/caddy-import-gaps.spec.ts index 79fa8c52..ac64fe9c 100644 --- a/tests/core/caddy-import/caddy-import-gaps.spec.ts +++ b/tests/core/caddy-import/caddy-import-gaps.spec.ts @@ -64,6 +64,14 @@ async function clickParseAndWaitForUpload(page: Page, context: string): Promise< } } +async function resetImportSessionWithRetry(page: Page): Promise { + // WebKit can occasionally throw a transient internal navigation error during + // route transitions; a bounded retry keeps hooks deterministic. + await expect(async () => { + await resetImportSession(page); + }).toPass({ timeout: 20000 }); +} + /** * Helper: Complete the full import flow from paste to success modal * Reusable across multiple tests to reduce duplication @@ -106,11 +114,13 @@ async function completeImportFlow( test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { test.beforeEach(async ({ page }) => { - await resetImportSession(page); + await resetImportSessionWithRetry(page); }); test.afterEach(async ({ page }) => { - await resetImportSession(page); + await resetImportSessionWithRetry(page).catch(() => { + // Best-effort cleanup only; preserve primary test failure signal. + }); }); // ========================================================================= @@ -318,7 +328,7 @@ test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { // Gap 3: Overwrite Resolution Flow // ========================================================================= test.describe('Overwrite Resolution Flow', () => { - test('3.1: should update existing host when selecting Replace with Imported resolution', async ({ page, request, testData }) => { + test('3.1: should update existing host when selecting Replace with Imported resolution', async ({ page, request, testData, browserName, adminUser }) => { // Create existing host with initial config const result = await testData.createProxyHost({ domain: 'overwrite-test.example.com', @@ -331,6 +341,11 @@ test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { await test.step('Navigate to import page and parse conflicting Caddyfile', async () => { await page.goto('/tasks/import/caddyfile'); + if (browserName === 'webkit') { + await ensureAuthenticatedImportFormReady(page, adminUser); + } else { + await ensureImportFormReady(page); + } // Import with different config (new-server:9000) const caddyfile = `${namespacedDomain} { reverse_proxy new-server:9000 }`; await fillCaddyfileTextarea(page, caddyfile); @@ -391,7 +406,7 @@ test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { // Gap 4: Session Resume via Banner // ========================================================================= test.describe('Session Resume via Banner', () => { - test('4.1: should show pending session banner when returning to import page', async ({ page, testData }) => { + test('4.1: should show pending session banner when returning to import page', async ({ page, testData, browserName, adminUser }) => { const domain = generateDomain(testData, 'session-resume-test'); const caddyfile = `${domain} { reverse_proxy localhost:4000 }`; let resumeSessionId = ''; @@ -419,7 +434,12 @@ test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { }); await test.step('Create import session by parsing content', async () => { - await page.goto('/tasks/import/caddyfile'); + await page.goto('/tasks/import/caddyfile', { waitUntil: 'domcontentloaded' }); + if (browserName === 'webkit') { + await ensureAuthenticatedImportFormReady(page, adminUser); + } else { + await ensureImportFormReady(page); + } await fillCaddyfileTextarea(page, caddyfile); const uploadPromise = page.waitForResponse( @@ -470,32 +490,112 @@ test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { // Review table should NOT be visible initially (until clicking Review Changes) await expect(page.getByTestId('import-review-table')).not.toBeVisible(); }); + + await test.step('Cleanup mocked routes', async () => { + await page.unroute('**/api/v1/import/status'); + }); }); - test('4.2: should restore review table with previous content when clicking Review Changes', async ({ page, testData }) => { - // SKIP: Browser-uploaded import sessions are transient (file-based only) and not persisted - // to the database. Session resume only works for Docker-mounted Caddyfiles. - // See test 4.1 skip reason for details. + test('4.2: should restore review table with previous content when clicking Review Changes', async ({ page, testData, browserName, adminUser }) => { const domain = generateDomain(testData, 'review-changes-test'); const caddyfile = `${domain} { reverse_proxy localhost:5000 }`; + let resumeSessionId = ''; + let shouldMockPendingStatus = false; + + await page.route('**/api/v1/import/status', async (route) => { + if (!shouldMockPendingStatus || !resumeSessionId) { + await route.continue(); + return; + } + + await route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + has_pending: true, + session: { + id: resumeSessionId, + state: 'reviewing', + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + }, + }), + }); + }); + + await page.route('**/api/v1/import/preview**', async (route) => { + if (!shouldMockPendingStatus || !resumeSessionId) { + await route.continue(); + return; + } + + await route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + session: { + id: resumeSessionId, + state: 'reviewing', + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + }, + preview: { + hosts: [ + { + domain_names: domain, + forward_scheme: 'http', + forward_host: 'localhost', + forward_port: 5000, + name: domain, + }, + ], + conflicts: [], + warnings: [], + }, + caddyfile_content: caddyfile, + conflict_details: {}, + }), + }); + }); await test.step('Create import session', async () => { - await page.goto('/tasks/import/caddyfile'); + await page.goto('/tasks/import/caddyfile', { waitUntil: 'domcontentloaded' }); + if (browserName === 'webkit') { + await ensureAuthenticatedImportFormReady(page, adminUser); + } else { + await ensureImportFormReady(page); + } await fillCaddyfileTextarea(page, caddyfile); - await clickParseAndWaitForUpload(page, 'session-review-changes'); + const uploadPromise = page.waitForResponse( + r => r.url().includes('/api/v1/import/upload') && r.status() === 200, + { timeout: 15000 } + ); + await page.getByRole('button', { name: /parse|review/i }).click(); + const uploadResponse = await uploadPromise; + const uploadBody = (await uploadResponse.json().catch(() => ({}))) as { + session?: { id?: string }; + }; + resumeSessionId = uploadBody?.session?.id || ''; + expect(resumeSessionId).toBeTruthy(); await expect(page.getByTestId('import-review-table')).toBeVisible(); }); await test.step('Navigate away and back', async () => { await page.goto('/proxy-hosts'); - // Wait for status API to be called after navigation - const statusPromise = page.waitForResponse(r => - r.url().includes('/api/v1/import/status') && r.status() === 200 - ); - await page.goto('/tasks/import/caddyfile'); - await statusPromise; + shouldMockPendingStatus = true; + + // WebKit can throw a transient internal navigation error; retry deterministically. + await expect(async () => { + const statusPromise = page.waitForResponse( + r => r.url().includes('/api/v1/import/status') && r.status() === 200, + { timeout: 10000 } + ); + await page.goto('/tasks/import/caddyfile', { waitUntil: 'domcontentloaded' }); + await statusPromise; + }).toPass({ timeout: 15000 }); + await expect(page.getByTestId('import-banner')).toBeVisible({ timeout: 10000 }); }); @@ -515,6 +615,11 @@ test.describe('Caddy Import Gap Coverage @caddy-import-gaps', () => { // Note: Some implementations keep banner visible but change its content // If banner remains, it should show different text }); + + await test.step('Cleanup mocked routes', async () => { + await page.unroute('**/api/v1/import/status'); + await page.unroute('**/api/v1/import/preview**'); + }); }); }); diff --git a/tests/core/caddy-import/caddy-import-webkit.spec.ts b/tests/core/caddy-import/caddy-import-webkit.spec.ts index 860dab95..2b6dfe8e 100644 --- a/tests/core/caddy-import/caddy-import-webkit.spec.ts +++ b/tests/core/caddy-import/caddy-import-webkit.spec.ts @@ -27,10 +27,6 @@ import { waitForSuccessfulImportResponse, } from './import-page-helpers'; -function webkitOnly(browserName: string) { - test.skip(browserName !== 'webkit', 'This suite only runs on WebKit'); -} - const WEBKIT_TEST_EMAIL = process.env.E2E_TEST_EMAIL || 'e2e-test@example.com'; const WEBKIT_TEST_PASSWORD = process.env.E2E_TEST_PASSWORD || 'TestPassword123!'; @@ -151,8 +147,7 @@ async function setupImportMocks(page: Page, success: boolean = true) { test.describe('Caddy Import - WebKit-Specific @webkit-only', () => { const diagnosticsByPage = new WeakMap void>(); - test.beforeEach(async ({ browserName, page, adminUser }) => { - webkitOnly(browserName); + test.beforeEach(async ({ page, adminUser }) => { diagnosticsByPage.set(page, attachImportDiagnostics(page, 'caddy-import-webkit')); await setupImportMocks(page); await ensureWebkitAuthSession(page); diff --git a/tests/monitoring/uptime-monitoring.spec.ts b/tests/monitoring/uptime-monitoring.spec.ts index 10c8c0cd..34a26361 100644 --- a/tests/monitoring/uptime-monitoring.spec.ts +++ b/tests/monitoring/uptime-monitoring.spec.ts @@ -93,11 +93,16 @@ const mockMonitors: UptimeMonitor[] = [ /** * Generate mock heartbeat history */ -const generateMockHistory = (monitorId: string, count: number = 60): UptimeHeartbeat[] => { +const generateMockHistory = ( + monitorId: string, + count: number = 60, + latestStatus: 'up' | 'down' = 'up' +): UptimeHeartbeat[] => { return Array.from({ length: count }, (_, i) => ({ id: i, monitor_id: monitorId, - status: i % 5 === 0 ? 'down' : 'up', + // Keep the newest heartbeat aligned with the monitor's expected current state. + status: i === 0 ? latestStatus : i % 5 === 0 ? 'down' : 'up', latency: Math.floor(Math.random() * 100), message: 'OK', created_at: new Date(Date.now() - i * 60000).toISOString(), @@ -180,7 +185,8 @@ async function setupMonitorsWithHistory( await setupMonitorsAPI(page, monitors); for (const monitor of monitors) { - const history = generateMockHistory(monitor.id, 60); + const latestStatus = monitor.status === 'down' ? 'down' : 'up'; + const history = generateMockHistory(monitor.id, 60, latestStatus); await setupHistoryAPI(page, monitor.id, history); } } diff --git a/tests/proxy-host-dropdown-fix.spec.ts b/tests/proxy-host-dropdown-fix.spec.ts deleted file mode 100644 index 65fa857d..00000000 --- a/tests/proxy-host-dropdown-fix.spec.ts +++ /dev/null @@ -1,186 +0,0 @@ -import { test, expect } from '@playwright/test' - -type SelectionPair = { - aclLabel: string - securityHeadersLabel: string -} - -async function dismissDomainDialog(page: import('@playwright/test').Page): Promise { - const noThanksButton = page.getByRole('button', { name: /no, thanks/i }) - if (await noThanksButton.isVisible({ timeout: 1200 }).catch(() => false)) { - await noThanksButton.click() - } -} - -async function openCreateModal(page: import('@playwright/test').Page): Promise { - const addButton = page.getByRole('button', { name: /add.*proxy.*host|create/i }).first() - await expect(addButton).toBeEnabled() - await addButton.click() - await expect(page.getByRole('dialog')).toBeVisible() -} - -async function selectFirstUsableOption( - page: import('@playwright/test').Page, - trigger: import('@playwright/test').Locator, - skipPattern: RegExp -): Promise { - await trigger.click() - const listbox = page.getByRole('listbox') - await expect(listbox).toBeVisible() - - const options = listbox.getByRole('option') - const optionCount = await options.count() - expect(optionCount).toBeGreaterThan(0) - - for (let i = 0; i < optionCount; i++) { - const option = options.nth(i) - const rawLabel = (await option.textContent())?.trim() || '' - const isDisabled = (await option.getAttribute('aria-disabled')) === 'true' - - if (isDisabled || !rawLabel || skipPattern.test(rawLabel)) { - continue - } - - await option.click() - return rawLabel - } - - throw new Error('No selectable non-default option found in dropdown') -} - -async function selectOptionByName( - page: import('@playwright/test').Page, - trigger: import('@playwright/test').Locator, - optionName: RegExp -): Promise { - await trigger.click() - const listbox = page.getByRole('listbox') - await expect(listbox).toBeVisible() - - const option = listbox.getByRole('option', { name: optionName }).first() - await expect(option).toBeVisible() - const label = ((await option.textContent()) || '').trim() - await option.click() - return label -} - -async function saveProxyHost(page: import('@playwright/test').Page): Promise { - await dismissDomainDialog(page) - - const saveButton = page - .getByTestId('proxy-host-save') - .or(page.getByRole('button', { name: /^save$/i })) - .first() - await expect(saveButton).toBeEnabled() - await saveButton.click() - - const confirmSave = page.getByRole('button', { name: /yes.*save/i }).first() - if (await confirmSave.isVisible({ timeout: 1200 }).catch(() => false)) { - await confirmSave.click() - } - - await expect(page.getByRole('dialog')).not.toBeVisible({ timeout: 10000 }) -} - -async function openEditModalForDomain(page: import('@playwright/test').Page, domain: string): Promise { - const row = page.locator('tbody tr').filter({ hasText: domain }).first() - await expect(row).toBeVisible({ timeout: 10000 }) - - const editButton = row.getByRole('button', { name: /edit proxy host|edit/i }).first() - await expect(editButton).toBeVisible() - await editButton.click() - await expect(page.getByRole('dialog')).toBeVisible() -} - -async function selectNonDefaultPair( - page: import('@playwright/test').Page, - dialog: import('@playwright/test').Locator -): Promise { - const aclTrigger = dialog.getByRole('combobox', { name: /access control list/i }) - const securityHeadersTrigger = dialog.getByRole('combobox', { name: /security headers/i }) - - const aclLabel = await selectFirstUsableOption(page, aclTrigger, /no access control|public/i) - await expect(aclTrigger).toContainText(aclLabel) - - const securityHeadersLabel = await selectFirstUsableOption(page, securityHeadersTrigger, /none \(no security headers\)/i) - await expect(securityHeadersTrigger).toContainText(securityHeadersLabel) - - return { aclLabel, securityHeadersLabel } -} - -test.describe.skip('ProxyHostForm ACL/Security Headers Regression (moved to security shard)', () => { - test('should keep ACL and Security Headers behavior equivalent across create/edit flows', async ({ page }) => { - const suffix = Date.now() - const proxyName = `Dropdown Regression ${suffix}` - const proxyDomain = `dropdown-${suffix}.test.local` - - await test.step('Navigate to Proxy Hosts', async () => { - await page.goto('/proxy-hosts') - await page.waitForLoadState('networkidle') - await expect(page.getByRole('heading', { name: /proxy hosts/i })).toBeVisible() - }) - - await test.step('Create flow: select ACL + Security Headers and verify immediate form state', async () => { - await openCreateModal(page) - const dialog = page.getByRole('dialog') - - await dialog.locator('#proxy-name').fill(proxyName) - await dialog.locator('#domain-names').click() - await page.keyboard.type(proxyDomain) - await page.keyboard.press('Tab') - await dismissDomainDialog(page) - - await dialog.locator('#forward-host').fill('127.0.0.1') - await dialog.locator('#forward-port').fill('8080') - - const initialSelection = await selectNonDefaultPair(page, dialog) - - await saveProxyHost(page) - - await openEditModalForDomain(page, proxyDomain) - const reopenDialog = page.getByRole('dialog') - await expect(reopenDialog.getByRole('combobox', { name: /access control list/i })).toContainText(initialSelection.aclLabel) - await expect(reopenDialog.getByRole('combobox', { name: /security headers/i })).toContainText(initialSelection.securityHeadersLabel) - await reopenDialog.getByRole('button', { name: /cancel/i }).click() - await expect(reopenDialog).not.toBeVisible({ timeout: 5000 }) - }) - - await test.step('Edit flow: change ACL + Security Headers and verify persisted updates', async () => { - await openEditModalForDomain(page, proxyDomain) - const dialog = page.getByRole('dialog') - - const updatedSelection = await selectNonDefaultPair(page, dialog) - await saveProxyHost(page) - - await openEditModalForDomain(page, proxyDomain) - const reopenDialog = page.getByRole('dialog') - await expect(reopenDialog.getByRole('combobox', { name: /access control list/i })).toContainText(updatedSelection.aclLabel) - await expect(reopenDialog.getByRole('combobox', { name: /security headers/i })).toContainText(updatedSelection.securityHeadersLabel) - await reopenDialog.getByRole('button', { name: /cancel/i }).click() - await expect(reopenDialog).not.toBeVisible({ timeout: 5000 }) - }) - - await test.step('Edit flow: clear both to none/null and verify persisted clearing', async () => { - await openEditModalForDomain(page, proxyDomain) - const dialog = page.getByRole('dialog') - - const aclTrigger = dialog.getByRole('combobox', { name: /access control list/i }) - const securityHeadersTrigger = dialog.getByRole('combobox', { name: /security headers/i }) - - const aclNoneLabel = await selectOptionByName(page, aclTrigger, /no access control \(public\)/i) - await expect(aclTrigger).toContainText(aclNoneLabel) - - const securityNoneLabel = await selectOptionByName(page, securityHeadersTrigger, /none \(no security headers\)/i) - await expect(securityHeadersTrigger).toContainText(securityNoneLabel) - - await saveProxyHost(page) - - await openEditModalForDomain(page, proxyDomain) - const reopenDialog = page.getByRole('dialog') - await expect(reopenDialog.getByRole('combobox', { name: /access control list/i })).toContainText(/no access control \(public\)/i) - await expect(reopenDialog.getByRole('combobox', { name: /security headers/i })).toContainText(/none \(no security headers\)/i) - await reopenDialog.getByRole('button', { name: /cancel/i }).click() - await expect(reopenDialog).not.toBeVisible({ timeout: 5000 }) - }) - }) -})