diff --git a/backend/internal/services/mail_service_test.go b/backend/internal/services/mail_service_test.go index fd420470..b1d04f13 100644 --- a/backend/internal/services/mail_service_test.go +++ b/backend/internal/services/mail_service_test.go @@ -9,12 +9,15 @@ import ( "crypto/x509" "crypto/x509/pkix" "encoding/pem" + "errors" "math/big" "net" "net/mail" "os" + "path/filepath" "strconv" "strings" + "sync" "testing" "time" @@ -26,6 +29,61 @@ import ( "gorm.io/gorm/logger" ) +// TestMain sets up the SSL_CERT_FILE environment variable globally BEFORE any tests run. +// This ensures x509.SystemCertPool() initializes with our test CA, which is critical for +// parallel test execution with -race flag where cert pool initialization timing matters. +func TestMain(m *testing.M) { + // Initialize shared test CA and write stable cert file + initializeTestCAForSuite() + + // Set SSL_CERT_FILE globally so cert pool initialization uses our CA + if err := os.Setenv("SSL_CERT_FILE", testCAFile); err != nil { + panic("failed to set SSL_CERT_FILE: " + err.Error()) + } + + // Run tests + exitCode := m.Run() + + // Cleanup (optional, OS will clean /tmp on reboot) + _ = os.Remove(testCAFile) + + os.Exit(exitCode) +} + +// initializeTestCAForSuite is called once by TestMain to set up the shared CA infrastructure. +func initializeTestCAForSuite() { + testCAOnce.Do(func() { + var err error + testCAKey, err = rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + panic("GenerateKey failed: " + err.Error()) + } + + testCATemplate = &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + CommonName: "charon-test-ca", + }, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(24 * 365 * time.Hour), // 24 years + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, + BasicConstraintsValid: true, + IsCA: true, + } + + caDER, err := x509.CreateCertificate(rand.Reader, testCATemplate, testCATemplate, &testCAKey.PublicKey, testCAKey) + if err != nil { + panic("CreateCertificate failed: " + err.Error()) + } + testCAPEM = pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: caDER}) + + testCAFile = filepath.Join(os.TempDir(), "charon-test-ca-mail-service.pem") + if err := os.WriteFile(testCAFile, testCAPEM, 0o600); err != nil { + panic("WriteFile failed: " + err.Error()) + } + }) +} + func setupMailTestDB(t *testing.T) *gorm.DB { db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{ Logger: logger.Default.LogMode(logger.Silent), @@ -774,6 +832,60 @@ func TestEncodeSubject_RejectsCRLF(t *testing.T) { require.ErrorIs(t, err, errEmailHeaderInjection) } +// Shared test CA infrastructure to work around Go's cert pool caching. +// When tests run with -count=N, Go caches x509.SystemCertPool() after the first run. +// Generating a new CA per test causes failures because cached pool references old CA. +// Solution: Generate CA once, reuse across runs, and use stable cert file path. +var ( + testCAOnce sync.Once + testCAPEM []byte + testCAKey *rsa.PrivateKey + testCATemplate *x509.Certificate + testCAFile string +) + +func initTestCA(t *testing.T) { + t.Helper() + // Delegate to the suite-level initialization (already called by TestMain) + initializeTestCAForSuite() +} + +func newTestTLSConfigShared(t *testing.T) (*tls.Config, []byte) { + t.Helper() + + // Ensure shared CA is initialized + initTestCA(t) + + // Generate leaf certificate signed by shared CA + leafKey, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err) + + leafTemplate := &x509.Certificate{ + SerialNumber: big.NewInt(time.Now().UnixNano()), // Unique serial per leaf + Subject: pkix.Name{ + CommonName: "127.0.0.1", + }, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(24 * time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + BasicConstraintsValid: true, + DNSNames: []string{"localhost"}, + IPAddresses: []net.IP{net.ParseIP("127.0.0.1")}, + } + + leafDER, err := x509.CreateCertificate(rand.Reader, leafTemplate, testCATemplate, &leafKey.PublicKey, testCAKey) + require.NoError(t, err) + + leafCertPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: leafDER}) + leafKeyPEM := pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(leafKey)}) + + cert, err := tls.X509KeyPair(leafCertPEM, leafKeyPEM) + require.NoError(t, err) + + return &tls.Config{Certificates: []tls.Certificate{cert}, MinVersion: tls.VersionTLS12}, testCAPEM +} + func TestMailService_GetSMTPConfig_DBError(t *testing.T) { t.Parallel() @@ -870,8 +982,10 @@ func TestMailService_sendSTARTTLS_DialFailure(t *testing.T) { } func TestMailService_TestConnection_StartTLSSuccessWithAuth(t *testing.T) { - tlsConf, certPEM := newTestTLSConfig(t) - trustTestCertificate(t, certPEM) + t.Parallel() + + tlsConf, _ := newTestTLSConfigShared(t) + trustTestCertificate(t, nil) addr, cleanup := startMockSMTPServer(t, tlsConf, true, true) defer cleanup() @@ -919,8 +1033,10 @@ func TestMailService_TestConnection_NoneSuccess(t *testing.T) { } func TestMailService_SendEmail_STARTTLSSuccess(t *testing.T) { - tlsConf, certPEM := newTestTLSConfig(t) - trustTestCertificate(t, certPEM) + t.Parallel() + + tlsConf, _ := newTestTLSConfigShared(t) + trustTestCertificate(t, nil) addr, cleanup := startMockSMTPServer(t, tlsConf, true, true) defer cleanup() @@ -940,14 +1056,16 @@ func TestMailService_SendEmail_STARTTLSSuccess(t *testing.T) { Encryption: "starttls", })) + // With fixed cert trust, STARTTLS connection and email send succeed err = svc.SendEmail("recipient@example.com", "Subject", "Body") - require.Error(t, err) - assert.Contains(t, err.Error(), "STARTTLS failed") + require.NoError(t, err) } func TestMailService_SendEmail_SSLSuccess(t *testing.T) { - tlsConf, certPEM := newTestTLSConfig(t) - trustTestCertificate(t, certPEM) + t.Parallel() + + tlsConf, _ := newTestTLSConfigShared(t) + trustTestCertificate(t, nil) addr, cleanup := startMockSSLSMTPServer(t, tlsConf, true) defer cleanup() @@ -967,9 +1085,9 @@ func TestMailService_SendEmail_SSLSuccess(t *testing.T) { Encryption: "ssl", })) + // With fixed cert trust, SSL connection and email send succeed err = svc.SendEmail("recipient@example.com", "Subject", "Body") - require.Error(t, err) - assert.Contains(t, err.Error(), "SSL connection failed") + require.NoError(t, err) } func newTestTLSConfig(t *testing.T) (*tls.Config, []byte) { @@ -1025,10 +1143,9 @@ func newTestTLSConfig(t *testing.T) (*tls.Config, []byte) { func trustTestCertificate(t *testing.T, certPEM []byte) { t.Helper() - - caFile := t.TempDir() + "/ca-cert.pem" - require.NoError(t, os.WriteFile(caFile, certPEM, 0o600)) - t.Setenv("SSL_CERT_FILE", caFile) + // SSL_CERT_FILE is already set globally by TestMain. + // This function kept for API compatibility but no longer needs to set environment. + initTestCA(t) // Ensure CA is initialized (already done by TestMain, but safe to call) } func startMockSMTPServer(t *testing.T, tlsConf *tls.Config, supportStartTLS bool, requireAuth bool) (string, func()) { @@ -1038,21 +1155,60 @@ func startMockSMTPServer(t *testing.T, tlsConf *tls.Config, supportStartTLS bool require.NoError(t, err) done := make(chan struct{}) + var wg sync.WaitGroup + var connsMu sync.Mutex + var conns []net.Conn + go func() { defer close(done) - conn, acceptErr := listener.Accept() - if acceptErr != nil { - return + for { + conn, acceptErr := listener.Accept() + if acceptErr != nil { + // Expected shutdown path: listener closed + if errors.Is(acceptErr, net.ErrClosed) || strings.Contains(acceptErr.Error(), "use of closed network connection") { + return + } + // Unexpected accept error - signal test failure + t.Errorf("unexpected accept error: %v", acceptErr) + return + } + + connsMu.Lock() + conns = append(conns, conn) + connsMu.Unlock() + + wg.Add(1) + go func(c net.Conn) { + defer wg.Done() + defer func() { _ = c.Close() }() + handleSMTPConn(c, tlsConf, supportStartTLS, requireAuth) + }(conn) } - defer func() { _ = conn.Close() }() - handleSMTPConn(conn, tlsConf, supportStartTLS, requireAuth) }() cleanup := func() { _ = listener.Close() + + // Close all active connections to unblock handlers + connsMu.Lock() + for _, conn := range conns { + _ = conn.Close() + } + connsMu.Unlock() + + // Wait for accept-loop exit and active handlers with timeout + cleanupDone := make(chan struct{}) + go func() { + <-done + wg.Wait() + close(cleanupDone) + }() + select { - case <-done: + case <-cleanupDone: + // Success case <-time.After(2 * time.Second): + t.Errorf("cleanup timeout: server did not shut down cleanly") } } @@ -1066,21 +1222,60 @@ func startMockSSLSMTPServer(t *testing.T, tlsConf *tls.Config, requireAuth bool) require.NoError(t, err) done := make(chan struct{}) + var wg sync.WaitGroup + var connsMu sync.Mutex + var conns []net.Conn + go func() { defer close(done) - conn, acceptErr := listener.Accept() - if acceptErr != nil { - return + for { + conn, acceptErr := listener.Accept() + if acceptErr != nil { + // Expected shutdown path: listener closed + if errors.Is(acceptErr, net.ErrClosed) || strings.Contains(acceptErr.Error(), "use of closed network connection") { + return + } + // Unexpected accept error - signal test failure + t.Errorf("unexpected accept error: %v", acceptErr) + return + } + + connsMu.Lock() + conns = append(conns, conn) + connsMu.Unlock() + + wg.Add(1) + go func(c net.Conn) { + defer wg.Done() + defer func() { _ = c.Close() }() + handleSMTPConn(c, tlsConf, false, requireAuth) + }(conn) } - defer func() { _ = conn.Close() }() - handleSMTPConn(conn, tlsConf, false, requireAuth) }() cleanup := func() { _ = listener.Close() + + // Close all active connections to unblock handlers + connsMu.Lock() + for _, conn := range conns { + _ = conn.Close() + } + connsMu.Unlock() + + // Wait for accept-loop exit and active handlers with timeout + cleanupDone := make(chan struct{}) + go func() { + <-done + wg.Wait() + close(cleanupDone) + }() + select { - case <-done: + case <-cleanupDone: + // Success case <-time.After(2 * time.Second): + t.Errorf("cleanup timeout: server did not shut down cleanly") } } diff --git a/docs/issues/manual_test_smtp_mock_flakiness_fix.md b/docs/issues/manual_test_smtp_mock_flakiness_fix.md new file mode 100644 index 00000000..76a1d363 --- /dev/null +++ b/docs/issues/manual_test_smtp_mock_flakiness_fix.md @@ -0,0 +1,48 @@ +--- +title: Manual Test Plan - SMTP Mock Server Flakiness Fix +status: Open +priority: High +assignee: QA +labels: testing, backend, reliability +--- + +# Test Objective +Confirm the SMTP mock server flakiness fix improves mail test reliability without changing production mail behavior. + +# Scope +- In scope: test reliability for SMTP mock server flows used by backend mail tests. +- Out of scope: production SMTP sending behavior and user-facing mail features. + +# Prerequisites +- Charon repository is up to date. +- Backend test environment is available. +- Ability to run backend tests repeatedly. + +# Manual Scenarios + +## 1) Target flaky test repeated run +- [ ] Run `TestMailService_TestConnection_StartTLSSuccessWithAuth` repeatedly (at least 20 times). +- [ ] Record pass/fail count and any intermittent errors. + +## 2) Mail service targeted subset run +- [ ] Run mail service connection and send test subset once. +- [ ] Confirm no new intermittent failures appear in related tests. + +## 3) Race-focused verification +- [ ] Run targeted mail service tests with race detection enabled. +- [ ] Confirm no race warnings or hangs occur. + +## 4) Cleanup/shutdown stability check +- [ ] Repeat targeted runs and watch for stuck test processes or timeout behavior. +- [ ] Confirm test execution exits cleanly each run. + +# Expected Results +- Repeated target test runs complete with zero flaky failures. +- Related mail service test subset remains stable. +- No race detector findings for targeted scenarios. +- No hangs during test cleanup/shutdown. + +# Regression Checks (No Production Impact) +- [ ] Confirm only test reliability behavior changed; no production mail behavior changes are required. +- [ ] Confirm no production endpoints, settings, or user-facing mail flows are affected. +- [ ] Confirm standard backend test workflow still completes successfully after this fix. diff --git a/docs/plans/current_spec.md b/docs/plans/current_spec.md index 39188bb3..d04a1c43 100644 --- a/docs/plans/current_spec.md +++ b/docs/plans/current_spec.md @@ -1,520 +1,308 @@ --- -post_title: "Current Spec: Discord Notify-Only Dispatch (No Legacy Fallback)" +post_title: "Current Spec: Stabilize Flaky SMTP STARTTLS+AUTH Unit Test" categories: - actions - testing - - security + - backend + - reliability tags: - - discord - - notifications - - notify-rollout - - migration - - e2e -summary: "Authoritative implementation plan for Discord Notify-only dispatch with explicit retirement of legacy fallback ambiguity, deterministic migration behavior, and proof-oriented testing." -post_date: 2026-02-21 + - go + - smtp + - starttls + - unit-tests + - ci-stability +summary: "Implementation plan to remove CI flakiness in MailService STARTTLS+AUTH tests by hardening mock SMTP server lifecycle and connection handling in test helpers only." +post_date: 2026-02-22 --- -## Active Plan: Discord Notify-Only Dispatch (No Legacy Fallback) +## Active Plan: Stabilize Flaky SMTP STARTTLS+AUTH Unit Test -Date: 2026-02-21 +Date: 2026-02-22 Status: Active and authoritative -Scope Type: Product implementation plan (frontend + backend + migration + verification) +Scope Type: Test reliability hardening (backend unit tests only) ## 1) Introduction -This plan defines the implementation to satisfy the updated requirement direction: +This plan addresses flakiness in backend unit test +`TestMailService_TestConnection_StartTLSSuccessWithAuth` by improving mock SMTP +test helpers used by `backend/internal/services/mail_service_test.go`. -- Do not rely on legacy fallback engine for Discord dispatch. -- Discord payloads saved in DB must be sent by Notify engine directly. -- Remove ambiguity where delivery success could still come through legacy path. -- Keep rollout scope focused on Discord-only cutover without unrelated refactors. +Root-cause hypothesis to validate and fix: -This plan is intentionally scoped to Discord-only dispatch/runtime behavior, fallback retirement, and auditable proof that no hidden legacy path remains. +- Existing mock SMTP server helpers accept only one connection, then exit. +- STARTTLS + AUTH flows in `net/smtp` are negotiation-heavy and can involve + additional command/connection behavior under CI timing variance. +- Single-accept test server behavior creates race-prone shutdown windows. + +Goal: + +- Make test helper servers robust and deterministic by accepting connections in + a loop until cleanup, handling each connection in its own goroutine, and + enforcing deterministic shutdown that waits for accept-loop exit and active + handler goroutines (explicit per-connection synchronization via waitgroup). + +Non-goal: + +- No production mail service behavior changes. ## 2) Research Findings -### 2.1 Frontend provider-type control points (exact files) +### 2.1 Current flaky target and helper topology -- `frontend/src/pages/Notifications.tsx` - - Provider type is normalized to Discord (`DISCORD_PROVIDER_TYPE`) before submit/test/preview. - - Provider dropdown currently renders Discord-only option, and request payloads are forced to Discord. -- `frontend/src/api/notifications.ts` - - `withDiscordType(...)` normalizes outbound payload type to Discord. - - Interface remains `type: string`, requiring backend to remain authoritative for no-fallback invariants. -- `frontend/src/pages/__tests__/Notifications.test.tsx` - - Must verify Discord-only create/edit/test behavior and avoid implying fallback rescue semantics. -- `tests/settings/notifications.spec.ts` - - Includes coverage for Discord behavior and deprecated non-Discord rendering semantics. +Primary target test: -### 2.2 Backend/provider type validation/default/control points (exact files) +- `backend/internal/services/mail_service_test.go` + - `TestMailService_TestConnection_StartTLSSuccessWithAuth` -- `backend/internal/api/handlers/notification_provider_handler.go` - - Enforces Discord-only on create/update and blocks enabling deprecated non-Discord providers. -- `backend/internal/services/notification_service.go` - - Enforces Discord-only for provider lifecycle/test and skips non-Discord dispatch. - - Contains explicit legacy-fallback-disabled sentinel/error path that must stay fail-closed. - - Includes `EnsureNotifyOnlyProviderMigration(...)` for deterministic cutover state. -- `backend/internal/services/enhanced_security_notification_service.go` - - `SendViaProviders(...)` dispatches only to Discord providers in current rollout stage. - - `dispatchToProvider(...)` still has non-Discord branches and is a key ambiguity-removal target. -- `backend/internal/models/notification_provider.go` - - `Type` comment still lists non-Discord types. - - `Engine` and URL comments still include retired legacy-notifier URL semantics. -- `backend/internal/notifications/engine.go` - - Defines a retired legacy notification engine constant. -- `backend/internal/notifications/router.go` - - Legacy engine branch remains. -- `backend/internal/notifications/feature_flags.go` - - Retired fallback flag key remains in feature-flag wiring. -- `backend/internal/api/handlers/feature_flags_handler.go` - - Still exposes a retired legacy fallback key in defaults and update path guards. -- `backend/internal/api/routes/routes.go` - - Notification services are wired via both `NotificationService` and - `EnhancedSecurityNotificationService`; migration path still references transitional behavior. +Helper functions in same file (current behavior): -### 2.3 Exact backend/frontend files impacted by fallback removal +- `startMockSMTPServer(...)` + - Single `Accept()` call, single connection handler, then goroutine exits. +- `startMockSSLSMTPServer(...)` + - Single `Accept()` call, single connection handler, then goroutine exits. +- `handleSMTPConn(...)` + - Handles SMTP protocol conversation for each accepted connection. -Backend files: +### 2.2 Flakiness vector summary -- `backend/internal/notifications/engine.go` -- `backend/internal/notifications/router.go` -- `backend/internal/notifications/feature_flags.go` -- `backend/internal/api/handlers/feature_flags_handler.go` -- `backend/internal/services/notification_service.go` -- `backend/internal/services/enhanced_security_notification_service.go` -- `backend/internal/api/handlers/notification_provider_handler.go` -- `backend/internal/api/routes/routes.go` -- `backend/internal/models/notification_provider.go` +- Current single-accept model is fragile for tests where client behavior may + include additional negotiation/timing paths. +- Cleanup currently closes listener and waits on a done signal, but done is + tied to a single accept goroutine rather than a full server lifecycle. +- Under CI contention, this can surface nondeterministic failures in tests + expecting reliable STARTTLS + AUTH handshake behavior. -Frontend files: +### 2.3 Repo config review requested by user -- `frontend/src/pages/Notifications.tsx` -- `frontend/src/api/notifications.ts` +Reviewed files: -### 2.4 Dependency/runtime current state (legacy notifier) +- `.gitignore` +- `codecov.yml` +- `.dockerignore` +- `Dockerfile` -- `backend/go.mod`: no active legacy notifier dependency. -- `backend/go.sum`: no active legacy notifier entry found. -- Direct import/call in backend source currently not present. -- Legacy strings/constants/tests remain in runtime code paths and tests. -- Historical artifacts and cached module content may contain legacy notifier references but are not - production dependencies. +Conclusion for this scope: -### 2.5 Firefox failing specs context - -- Artifact `playwright-output/manual-dns-targeted/.last-run.json` contains 16 failed test IDs only; suite attribution must come from Playwright report/traces, not this file alone. -- Separate summary `FIREFOX_E2E_FIXES_SUMMARY.md` references prior Firefox remediation work. -- User-reported “5 failing Firefox specs” is treated as active triage input and must be - classified at execution time into in-scope vs out-of-scope suites (defined below). +- No changes required for this test-only helper stabilization. +- Existing ignore/coverage/docker config does not block this plan. ## 3) Requirements (EARS) -- R1: WHEN opening notification provider create/edit UI, THE SYSTEM SHALL offer only `discord` - in provider type selection for this rollout. -- R2: WHEN create/update/test notification provider API endpoints receive a non-Discord type, - THE SYSTEM SHALL reject with deterministic validation error. -- R3: WHILE legacy non-Discord providers exist in DB, THE SYSTEM SHALL handle them per explicit - compatibility policy without enabling dispatch. -- R4: WHEN verifying runtime and dependency state, THE SYSTEM SHALL produce auditable evidence - that the retired legacy notifier is not installed, linked, or used by active runtime code paths. -- R5: IF additional services are introduced in future, THEN THE SYSTEM SHALL enable them - behind explicit validation gates and staged rollout controls. -- R6: WHEN Discord notifications are delivered successfully, THE SYSTEM SHALL provide evidence that Notify path handled dispatch and not a legacy fallback path. -- R7: IF rollback is required, THEN THE SYSTEM SHALL use explicit rollback controls without reintroducing hidden legacy fallback behavior. +- R1: WHEN mock SMTP test helpers are started, THE SYSTEM SHALL accept + connections in a loop until explicit cleanup closes the listener. +- R2: WHEN a connection is accepted, THE SYSTEM SHALL handle it in its own + goroutine so one slow session cannot block new accepts. +- R3: WHEN cleanup is invoked, THE SYSTEM SHALL close the listener and await + deterministic server-loop termination plus completion of active connection + handlers (done channel + waitgroup synchronization). +- R4: IF cleanup wait exceeds timeout, THEN THE SYSTEM SHALL report a failure + signal and return without hanging test execution. +- R5: WHEN this fix is implemented, THE SYSTEM SHALL keep production code + untouched and restrict changes to test helper scope. +- R6: WHEN targeted backend tests run, THE SYSTEM SHALL pass reliably in local + and CI-like conditions. ## 4) Technical Specification -### 4.1 Discord-only enforcement strategy (defense in depth) +### 4.1 Exact target files and functions (minimal diff scope) -#### Layer A: UI/input restriction +Primary file to edit: -- `frontend/src/pages/Notifications.tsx` - - Replace provider type dropdown options with a single option: `discord`. - - Keep visible label semantics intact for accessibility. - - Prevent stale-form values from preserving non-Discord types in edit mode. +- `backend/internal/services/mail_service_test.go` -#### Layer B: Frontend request contract hardening +Functions in scope: -- `frontend/src/api/notifications.ts` - - Frontend request contract SHALL submit `type` as `discord` only; client must reject or normalize any stale non-discord value before submit. +- `startMockSMTPServer(t *testing.T, tlsConf *tls.Config, supportStartTLS bool, requireAuth bool) (string, func())` +- `startMockSSLSMTPServer(t *testing.T, tlsConf *tls.Config, requireAuth bool) (string, func())` -#### Layer C: API handler validation (authoritative gate) +Related function (read-only unless strictly necessary): -- `backend/internal/api/handlers/notification_provider_handler.go` - - Enforce `req.Type == "discord"` for all create/update paths (not only security-event cases). - - Return stable error code/message for non-Discord type attempts. +- `handleSMTPConn(conn net.Conn, tlsConf *tls.Config, supportStartTLS bool, requireAuth bool)` -#### Layer D: Service-layer invariant +Out of scope: -- `backend/internal/services/notification_service.go` - - Add service-level type guard for create/update/test/send paths to reject non-Discord. - - Keep this guard even if handler validation exists (defense in depth). +- `backend/internal/services/mail_service.go` +- Any non-mail-service test files. -#### Layer E: Dispatch/runtime invariant +### 4.2 Planned helper behavior changes -- `backend/internal/services/enhanced_security_notification_service.go` - - Maintain dispatch as Discord-only. - - Remove non-Discord dispatch branches from active runtime logic for this phase. +For both helper server starters: -### 4.2 Feature-flag policy change for legacy fallback +1. Replace single `Accept()` with accept loop: + - Continue accepting until listener close returns an error. + - Treat listener-close accept errors as normal shutdown path. +2. Spawn per-connection handler goroutine: + - Each accepted connection handled independently. + - Connection closed within goroutine after handling. +3. Deterministic lifecycle signaling: + - Keep `done` channel signaling tied to server accept-loop exit. + - Ensure exactly one close of `done` from server goroutine. + - Track active connection-handler goroutines with explicit waitgroup sync. +4. Cleanup contract: + - `cleanup()` closes listener first. + - Wait for accept-loop exit and active handler completion with bounded timeout (`2s` currently acceptable). + - Never block indefinitely. + - Timeout path is a test failure signal (non-hanging), not a silent pass. -Policy for `feature.notifications.legacy.fallback_enabled`: +### 4.3 Concurrency and race safety notes -- Deprecate now. -- Force false in reads and writes. -- Immutable false: never true at runtime. -- Reject `true` update attempts with deterministic API error. -- Keep compatibility aliases read-only and forced false during transition. -- Remove `feature.notifications.legacy.fallback_enabled` from all public/default flag responses in PR-3 of this plan, and remove all retired env aliases in the same PR. No compatibility-window extension is allowed without a new approved spec revision. +- Accept-loop ownership: + - One goroutine owns listener accept loop and is the only goroutine that may + close `done`. +- Connection handler isolation: + - One goroutine per accepted connection; no shared mutable state required for + protocol behavior in this helper. +- Per-connection synchronization: + - A waitgroup must increment before each handler goroutine starts and must + decrement on handler exit so cleanup can deterministically wait for active + handlers to finish. +- Listener close semantics: + - `listener.Close()` from cleanup is expected to break `Accept()`. + - Exit condition should avoid noisy test failures on intentional close. +- Cleanup timeout behavior: + - Timeout remains defensive to prevent suite hangs under pathological CI + resource starvation. + - Timeout branch must report failure (e.g., `t.Errorf`/`t.Fatalf` policy) and + return; no panic and no silent success. -Migration implications: +### 4.4 Error handling policy in helpers -- Existing settings row for legacy fallback is normalized to false if present. -- Transition period allows key visibility as false-only for compatibility. -- Final state keeps this key out of runtime dispatch decisions; there is no fallback runtime path. +- Treat only expected shutdown accept errors (listener close path) as normal. +- Surface unexpected `Accept()` failures as test failure signals. +- Keep helper logic simple and deterministic; avoid over-engineered retry logic. -Legacy fallback control surface policy: any API write attempting to set legacy fallback true SHALL return `400` with code `LEGACY_FALLBACK_REMOVED`; any legacy fallback read endpoint/field retained temporarily SHALL return hard-false only and SHALL be removed in PR-3. No hidden route or config path may alter dispatch away from Notify, and Notify failures SHALL NOT reroute to a fallback engine. +## 5) Implementation Plan -### 4.3 Runtime dispatch contract (Discord-only Notify path) +### Phase 1: Testing protocol sequencing note -Contract: +- Policy remains E2E-first globally. +- Explicit exception rationale for this change: scope is backend test-helper-only + (`mail_service_test.go`) with no production/frontend/runtime behavior delta, + so targeted backend test-first verification is authorized for this plan. +- Mandatory preflight before unit/coverage steps: + - `bash scripts/local-patch-report.sh` + - Artifacts: `test-results/local-patch-report.md` and `test-results/local-patch-report.json` -- Discord dispatch source of truth is Notify engine path (`notify_v1`) only. -- `NotificationService.SendExternal(...)` and `EnhancedSecurityNotificationService.SendViaProviders(...)` must fail closed for non-Discord delivery attempts. -- No implicit retry/reroute to legacy engine on Notify failure. -- Any successful Discord delivery during rollout must be attributable to Notify path execution. +### Phase 2: Baseline confirmation and failure reproduction context -#### Layer F: Optional DB invariant (recommended) +- Capture current helper behavior and flaky test target: + - `go test ./backend/internal/services -run TestMailService_TestConnection_StartTLSSuccessWithAuth -count=1 -v` -- Add migration with DB-level check constraint for notification provider type to `discord` - for newly created/updated rows in this rollout. - - If DB constraint is deferred, service/API guards remain mandatory. +### Phase 3: Helper lifecycle hardening -### 4.4 Data migration and compatibility policy for existing non-Discord providers +- Update `startMockSMTPServer` and `startMockSSLSMTPServer` to loop accept. +- Add per-connection goroutine handling. +- Preserve/strengthen deterministic cleanup using listener close + done wait + + per-connection waitgroup completion. -Policy decision for this rollout: **Deprecate + read-only visibility + non-dispatch**. +### Phase 4: Targeted validation -- Existing non-Discord rows are preserved for audit/history but cannot be newly created - or converted back to active dispatch in this phase. -- Compatibility behavior: - - `enabled` forced false for non-Discord providers during migration reconciliation. - - `migration_state` remains failed/deprecated for non-supported providers. - - UI list behavior: render non-Discord rows as deprecated read-only rows with a clear non-dispatch badge. -- API contract: non-Discord existing rows are always non-dispatch, non-enable, deletable, and return deterministic validation error on enable/type mutation attempts. -- Explicitly block enable and type mutation attempts for non-Discord rows via API with deterministic validation errors. -- Allow deletion of deprecated rows. +- Re-run target test repeatedly to validate stability: + - `go test ./backend/internal/services -run TestMailService_TestConnection_StartTLSSuccessWithAuth -count=20` +- Run mail service test subset: + - `go test ./backend/internal/services -run TestMailService_ -count=1` +- Run race-focused targeted validation: + - `go test -race ./backend/internal/services -run 'TestMailService_(TestConnection|Send)' -count=1` -Migration safety requirements: +### Phase 5: Backend coverage validation -- Migration SHALL be idempotent. -- Migration SHALL execute within an explicit transaction boundary. -- Migration SHALL require a pre-migration backup before mutating provider rows. -- Migration SHALL define and document a rollback procedure. -- Migration SHALL persist audit log fields for every mutated provider row (including provider identifier, previous values, new values, mutation timestamp, and operation identifier). +- Run backend coverage task/script required by repo workflow: + - Preferred script: `scripts/go-test-coverage.sh` + - VS Code equivalent: backend coverage task if available. -Migration implementation candidates: +## 6) Validation Matrix -- `backend/internal/services/notification_service.go` (`EnsureNotifyOnlyProviderMigration`) -- `backend/internal/services/enhanced_security_notification_service.go` -- new migration file under backend migration path if schema/data migration is introduced. +| Validation Item | Command / Task | Scope | Pass Criteria | +|---|---|---|---| +| Targeted flaky test | `go test ./backend/internal/services -run TestMailService_TestConnection_StartTLSSuccessWithAuth -count=20` | Direct flaky test | No failures across repeated runs | +| Mail service subset | `go test ./backend/internal/services -run TestMailService_ -count=1` | Nearby regression safety | All selected tests pass | +| Race-focused targeted tests | `go test -race ./backend/internal/services -run 'TestMailService_(TestConnection|Send)' -count=1` | Concurrency/race safety | No race reports; tests pass | +| Package sanity | `go test ./backend/internal/services -count=1` | Service package confidence | Package tests pass | +| Backend coverage gate | `scripts/go-test-coverage.sh` | Repo-required backend coverage check | Meets configured minimum threshold (85% default) | + +Notes: + +- E2E-first protocol remains project-wide policy. This plan uses the explicit + backend-only targeted-test exception because scope is confined to test helper + internals with no production/UI behavior changes. +- Local patch report preflight is required before unit/coverage gates. + +## 7) Risk Assessment + +Risk level: Low. + +- Change type: test-only. +- Production code: untouched. +- Runtime behavior: unchanged for shipped binary. +- Primary risk: helper lifecycle bug causing test hangs. +- Mitigation: bounded cleanup timeout, accept-loop exit on listener close, + focused repeated-run validation. + +## 8) PR Slicing Strategy -### 4.5 Legacy notifier retirement/removal scope +Decision: Single PR. -Targets to remove from active runtime/config surface: +Rationale: -- `backend/internal/notifications/engine.go` legacy engine constant -- `backend/internal/notifications/router.go` legacy engine branch -- `backend/internal/notifications/feature_flags.go` legacy flag constant -- `backend/internal/api/handlers/feature_flags_handler.go` retired legacy flag exposure -- `backend/internal/models/notification_provider.go` retired legacy notifier comments/engine semantics -- `backend/internal/services/notification_service.go` legacy naming/comments/hooks -- `backend/internal/services/enhanced_security_notification_service.go` non-Discord dispatch branches in `dispatchToProvider` -- `frontend/src/pages/Notifications.tsx` and `frontend/src/api/notifications.ts` to keep Discord-only UX/request contract aligned with backend fallback retirement +- Extremely small scope (one test file, two helper functions). +- No cross-domain dependencies. +- Easier review and rollback. -Out of scope for blocking completion: +### PR-1 (single slice) -- Archived docs/history artifacts under `docs/plans/archive/**` and similar archive/report folders. +- Scope: + - `backend/internal/services/mail_service_test.go` helper lifecycle updates. +- Dependencies: + - None. +- Validation gates: + - Validation matrix in Section 6. +- Rollback contingency: + - Revert single PR if instability increases. -## 5) Dependency and Runtime Audit Plan (verify Discord-only runtime) +## 9) Config File Review Outcome -Run and record all commands in implementation evidence. - -Task-aligned proof gates (mandatory): - -- Run task `Security: Verify SBOM` and capture output evidence. -- Run task `Security: Scan Docker Image (Local)` and capture output evidence. -- Store reproducibility evidence under `test-results/notify-rollout-audit/` (task outputs, command transcripts, and generated reports). - -### 5.1 Dependency manifests - -- `cd /projects/Charon/backend && go mod graph | grep -i "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" || true` -- `grep -i "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" /projects/Charon/backend/go.mod /projects/Charon/backend/go.sum || true` -- `grep -i "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" /projects/Charon/package-lock.json /projects/Charon/frontend/package-lock.json 2>/dev/null || true` - -### 5.2 Source/runtime references (non-archive) - -- `grep -RIn --exclude-dir=.git --exclude-dir=.cache --exclude-dir=docs --exclude-dir=playwright-output --exclude-dir=test-results "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" /projects/Charon/backend /projects/Charon/frontend || true` - -### 5.3 Built artifact/container runtime checks - -- Build image: `docker build -t charon:discord-only-audit /projects/Charon` -- Module metadata in binary: - - `docker run --rm charon:discord-only-audit sh -lc 'go version -m /app/charon 2>/dev/null | grep -i "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" || true'` -- Runtime filesystem/strings scan: - - `docker run --rm charon:discord-only-audit sh -lc 'find /app /usr/local/bin -maxdepth 4 -type f | xargs grep -Iin "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" 2>/dev/null || true'` - -### 5.4 Documentation references (active docs only) - -- `grep -RIn "legacy_shoutrrr|feature.notifications.legacy.fallback_enabled|EngineLegacy|legacySendFunc|shoutrrr" /projects/Charon/README.md /projects/Charon/ARCHITECTURE.md /projects/Charon/docs/features /projects/Charon/docs/security* || true` - -Audit pass criteria: - -- No active dependency references. -- No active runtime/path references. -- No active user-facing docs claiming retired multi-service notifier behavior for this rollout. - -## 6) Implementation Plan (phased) - -### Phase 1: Docker E2E rebuild decision and environment prep - -- Apply repo testing-rule decision for E2E container rebuild based on changed paths. -- Rebuild/start E2E environment when required before any test execution. - -### Phase 2: E2E first and failing-spec triage baseline - -- Capture exact Firefox failures for current branch: - - `cd /projects/Charon && npx playwright test --project=firefox` -- Classify failing specs: - - **In-scope**: notification provider tests (`tests/settings/notifications.spec.ts` and related). - - **Out-of-scope**: manual DNS provider suites unless directly regressed by this change. -- If user-reported 5 failing specs are not notifications, create/attach separate tracking item and - do not block Discord-only implementation on unrelated suite failures. - -### Phase 3: Local Patch Report preflight (mandatory before unit/coverage) - -- Run local patch coverage preflight and generate required artifacts: - - `test-results/local-patch-report.md` - - `test-results/local-patch-report.json` - -### Phase 4: Backend hard gate (authoritative) - -- Implement Discord-only validation in API handlers and service layer. -- Remove/retire active legacy engine and flag exposure from runtime path. -- Apply migration policy for existing non-Discord rows. - -### Phase 5: Conditional GORM security check gate - -- Run GORM security scanner in check mode when backend model/database trigger paths are modified. -- Gate completion on zero CRITICAL/HIGH findings for triggered changes. - -### Phase 6: Frontend Discord-only UX - -- Restrict dropdown/options to Discord only. -- Align frontend type contract and submission behavior. -- Ensure edit/create forms cannot persist stale non-Discord values. - -### Phase 7: Legacy notifier retirement cleanup + audit evidence - -- Remove active runtime legacy references listed in Section 4.5. -- Execute dependency/runtime/doc audit commands in Section 5 and save evidence. - -### Phase 8: Validation and release readiness - -- Run validation sequence in required order (Section 7). -- Confirm rollout note: additional providers remain disabled pending per-provider validation PRs. - -## 7) Targeted Testing Strategy - -Mandatory repo QA/test order for this plan: - -1. Docker E2E rebuild decision and required rebuild/start. -2. E2E first (Firefox baseline + targeted notification suite). -3. Local Patch Report preflight artifact generation. -4. Conditional GORM security check gate (`--check` semantics when trigger paths change). -5. CodeQL CI-aligned scans (Go and JS). -6. Coverage gates (backend/frontend coverage thresholds and patch coverage review). -7. Frontend type-check and backend/frontend build verification. - -### 7.1 Backend tests (in scope) - -- `backend/internal/api/handlers/notification_provider_blocker3_test.go` - - Expand from security-event-only gating to global Discord-only gating. -- `backend/internal/services/notification_service_test.go` - - Ensure non-Discord create/update/test/send rejection. -- `backend/internal/notifications/router_test.go` - - Prove legacy fallback decision remains disabled and cannot be toggled into active use. -- `backend/internal/services/notification_service_discord_only_test.go` - - Prove Discord-only dispatch behavior and no successful fallback path. -- `backend/internal/services/security_notification_service_test.go` - - Prove security notifications route through Discord-only provider dispatch in rollout mode. -- Add/adjust tests for migration policy behavior on existing non-Discord rows. - -### 7.1.1 Notify-vs-fallback evidence points (required) - -- Unit proof SHALL assert legacy dispatch hook call-count is exactly 0 for all Discord-success and Discord-failure scenarios. -- Unit/integration proof SHALL assert Notify dispatch hook/request path is called for Discord deliveries (including `Content-Type: application/json` and deterministic provider-type guard behavior). -- Negative proof SHALL assert non-Discord create/update/test attempts fail with stable error codes and cannot produce a successful delivery event. -- CI gate SHALL fail if any test demonstrates delivery success through a legacy fallback path. - -### 7.2 Frontend unit tests (in scope) - -- `frontend/src/pages/__tests__/Notifications.test.tsx` - - Assert only Discord option exists in provider-type select. - - Remove/adjust non-Discord create assumptions. - -### 7.3 E2E tests (in scope) - -- `tests/settings/notifications.spec.ts` - - Keep Discord create/edit/test flows. - - Replace or retire non-Discord CRUD expectations for this phase. - -### 7.4 Firefox failing specs handling (required context) - -- If the current failing 5 specs are notification-related, they are in scope and must be fixed - within this PR. -- If they are unrelated (for example manual-dns-provider failures), track separately and avoid - scope creep; do not mark as fixed under this provider-type change. - -### 7.5 Suggested verification command set - -- `cd /projects/Charon && npx playwright test --project=firefox tests/settings/notifications.spec.ts` -- `cd /projects/Charon && npx playwright test --project=firefox --grep "Notification Providers"` -- `cd /projects/Charon && bash scripts/local-patch-report.sh` -- `cd /projects/Charon && ./scripts/scan-gorm-security.sh --check` (conditional on trigger paths) -- `cd /projects/Charon && pre-commit run codeql-go-scan --all-files` -- `cd /projects/Charon && pre-commit run codeql-js-scan --all-files` -- `cd /projects/Charon && go test ./backend/internal/api/handlers -run Blocker3 -v` -- `cd /projects/Charon && go test ./backend/internal/services -run Notification -v` -- `cd /projects/Charon && scripts/go-test-coverage.sh` -- `cd /projects/Charon/frontend && npm test -- Notifications` -- `cd /projects/Charon && scripts/frontend-test-coverage.sh` -- `cd /projects/Charon/frontend && npm run type-check && npm run build` -- `cd /projects/Charon/backend && go build ./...` - -## 8) Review of `.gitignore`, `.dockerignore`, `codecov.yml`, `Dockerfile` - -### Findings and required updates decision +Reviewed for this request: - `.gitignore` - - No mandatory change required for feature behavior. - - Optional: ensure any new audit evidence artifacts (if added) are either committed intentionally - or ignored consistently. -- `.dockerignore` - - No mandatory change required for Discord-only behavior. - - If adding migration/audit helper scripts needed at build time, ensure they are not excluded. - `codecov.yml` - - No mandatory change required. - - If new test files are added, verify they are not accidentally ignored by broad patterns. +- `.dockerignore` - `Dockerfile` - - No direct package-level retired notifier dependency is declared. - - No mandatory Dockerfile change for Discord-only behavior unless runtime audit shows residual - retired notifier-linked binaries or references. -## 9) PR Slicing Strategy +Suggested updates: -Decision: **Multiple PRs (3 slices)** for safer rollout and easier rollback. +- None required for this scope. +- Revisit only if implementation introduces new generated artifacts or test + output paths not currently handled (not expected here). -### PR-1: Backend guardrails + migration policy +## 10) Acceptance Criteria -- Scope: - - API/service Discord-only validation. - - Existing non-Discord compatibility policy implementation. - - Legacy flag/engine runtime retirement in backend. -- Primary files: - - `backend/internal/api/handlers/notification_provider_handler.go` - - `backend/internal/services/notification_service.go` - - `backend/internal/services/enhanced_security_notification_service.go` - - `backend/internal/notifications/engine.go` - - `backend/internal/notifications/router.go` - - `backend/internal/notifications/feature_flags.go` - - `backend/internal/api/handlers/feature_flags_handler.go` - - `backend/internal/models/notification_provider.go` -- Gate: - - Backend tests pass, API rejects non-Discord, migration behavior verified. -- Rollback: - - Revert PR-1 if existing-provider compatibility regresses. - - Do not reintroduce fallback by setting legacy fallback flag true or restoring legacy dispatch routing as a hotfix. +- AC1: `startMockSMTPServer` accepts in loop until cleanup and no longer exits + after a single connection. +- AC2: `startMockSSLSMTPServer` accepts in loop until cleanup and no longer + exits after a single connection. +- AC3: Each accepted connection is handled in its own goroutine. +- AC4: Cleanup closes listener and uses done-channel plus per-connection + waitgroup synchronization with bounded timeout. +- AC5: Unexpected `Accept()` failures are surfaced as test failure signals; + expected listener-close shutdown errors are treated as normal. +- AC6: `TestMailService_TestConnection_StartTLSSuccessWithAuth` passes reliably + under repeated runs. +- AC7: Targeted race validation for mail-service tests passes with `go test -race`. +- AC8: Cleanup timeout path reports failure and returns (non-hanging), never + silent pass. +- AC9: Backend coverage script/task completes successfully at configured + threshold. +- AC10: No production file changes are included in the implementation PR. -### PR-2: Frontend Discord-only UX and tests +## 11) Definition of Done -- Scope: - - Dropdown/UI restriction to Discord only. - - Frontend test updates for new provider-type policy. -- Primary files: - - `frontend/src/pages/Notifications.tsx` - - `frontend/src/api/notifications.ts` - - `frontend/src/pages/__tests__/Notifications.test.tsx` - - `tests/settings/notifications.spec.ts` -- Gate: - - Dependency: PR-2 entry is blocked until PR-1 is merged. - - Firefox notifications E2E targeted suite passes. - - Exit: frontend request contract submits `type=discord` only and backend rejects non-Discord mutations. -- Rollback: - - Revert PR-2 without reverting backend safeguards. - - No rollback action may re-enable hidden fallback behavior. - -### PR-3: Legacy notifier retirement evidence + docs alignment - -- Scope: - - Execute audit commands, capture evidence, update active docs if needed. -- Primary files: - - `README.md`, `ARCHITECTURE.md`, relevant `docs/features/**` and `docs/security*` files (only if active references remain). -- Gate: - - Dependency: PR-3 entry is blocked until PR-2 is merged. - - Entry blocker: runtime/dependency audit must pass before PR-3 can proceed. - - Dependency/runtime/doc audit pass criteria met. - - Exit: docs must reflect final backend/frontend state after PR-1 and PR-2 behavior. -- Rollback: - - Revert docs-only evidence changes without impacting runtime behavior. - - Preserve explicit no-fallback runtime guarantees from earlier slices. - -## 10) Rollback Strategy (No Hidden Fallback Ambiguity) - -Allowed rollback mechanisms: - -1. Revert entire rollout slice(s) to previous known-good commit/tag. -2. Pause notifications by disabling delivery globally while keeping legacy fallback permanently disabled (`feature.notifications.legacy.fallback_enabled=false`, immutable). Rollback SHALL revert to a prior release tag/DB snapshot only; it SHALL NOT re-enable or simulate legacy fallback. -3. Restore DB snapshot if migration state must be reverted. - -Disallowed rollback mechanisms: - -- Re-enable `feature.notifications.legacy.fallback_enabled` for runtime dispatch. -- Reintroduce runtime `EngineLegacy` or implicit fallback retry paths. -- Apply emergency patches that create dual-path ambiguity for Discord success attribution. - -## 11) Risks and Mitigations - -- Risk: Existing non-Discord providers silently break user expectations. - - Mitigation: explicit deprecated/read-only policy and clear API errors. -- Risk: Hidden runtime legacy paths remain while UI appears Discord-only. - - Mitigation: backend-first enforcement and runtime audit commands. -- Risk: Scope creep from unrelated Firefox failures. - - Mitigation: strict in-scope/out-of-scope classification in Phase 1. -- Risk: Future provider rollout pressure bypasses validation discipline. - - Mitigation: staged enablement policy with per-provider validation gate. - -## 12) Acceptance Criteria - -1. Provider type dropdown in notifications UI exposes only Discord. -2. API create/update/test rejects any non-Discord provider type with deterministic error. -3. Service/runtime dispatch is Discord-only for this rollout and attributable to Notify path. -4. Existing non-Discord DB rows follow explicit compatibility policy (deprecated read-only and non-dispatch). -5. Retired notifier references are absent from active dependency manifests (`go.mod`, `go.sum`, package lock files). -6. Runtime audit confirms no active retired-notifier install/link/reference in executable/runtime paths. -7. Active docs no longer claim multi-service notifier behavior for the current rollout. -8. Firefox failing-spec triage clearly distinguishes in-scope notification failures from separately tracked suites. -9. `.gitignore`, `.dockerignore`, `codecov.yml`, and `Dockerfile` are reviewed and updated only if required by actual implementation deltas. -10. Additional provider services remain disabled pending one-by-one validated rollout PRs. -11. Legacy fallback feature-flag behavior is force-false and non-revivable through standard API operations. -12. Rollback procedures preserve no-fallback ambiguity constraints. - -## 13) Handoff to Supervisor - -This spec is ready for Supervisor review and implementation assignment. - -Review focus: - -- Verify defense-in-depth layering is complete (UI + API + service + runtime). -- Confirm migration policy is explicit and reversible. -- Confirm audit command set is sufficient to prove legacy notifier retirement. -- Confirm PR slicing minimizes risk and isolates rollback paths. +- Planned helper changes are implemented exactly in scoped functions. +- Cleanup deterministically waits for accept-loop exit and active handlers via + done + waitgroup synchronization. +- Only expected listener-close shutdown accept errors are non-fatal; unexpected + accept errors fail tests. +- Cleanup timeout is reported as failure signal and cannot pass silently. +- Validation matrix passes. +- Diff is limited to test helper scope in `mail_service_test.go`. +- No updates to `.gitignore`, `codecov.yml`, `.dockerignore`, or `Dockerfile` + are required or included. diff --git a/docs/reports/qa_report.md b/docs/reports/qa_report.md index 08438179..9f5cdb21 100644 --- a/docs/reports/qa_report.md +++ b/docs/reports/qa_report.md @@ -97,3 +97,47 @@ Scope: Final focused QA/security gate for notifications/security-event UX change - Manual test plan (PR-1 + PR-2): `docs/issues/manual_test_provider_security_notifications_pr1_pr2.md` - Existing focused QA evidence in this report remains the baseline for automated validation. + +## QA/Security Validation Report - SMTP Flaky Test Fix (Test-Only Backend Change) + +Date: 2026-02-22 +Repository: /projects/Charon +Scope: Validate SMTP STARTTLS test-stability fix without production behavior change. + +### Scope Verification + +| Check | Status | Evidence | +|---|---|---| +| Changed files are test-only (no production code changes) | PASS | `git status --short` shows only `backend/internal/services/mail_service_test.go` and `docs/plans/current_spec.md` modified. | +| Production behavior unchanged by diff scope | PASS | No non-test backend/service implementation files modified. | + +### Required Validation Results + +| # | Command | Status | Evidence Snippet | +|---|---|---|---| +| 1 | `go test ./backend/internal/services -run TestMailService_TestConnection_StartTLSSuccessWithAuth -count=20` | PASS | `ok github.com/Wikid82/charon/backend/internal/services 1.403s` | +| 2 | `go test -race ./backend/internal/services -run 'TestMailService_(TestConnection|Send)' -count=1` | PASS | `ok github.com/Wikid82/charon/backend/internal/services 1.270s` | +| 3 | `bash scripts/go-test-coverage.sh` | PASS | `Statement coverage: 86.1%` / `Line coverage: 86.4%` / `Coverage requirement met` | +| 4 | `pre-commit run --all-files` | PASS | All hooks passed, including `golangci-lint (Fast Linters - BLOCKING)`, `Go Vet`, `Frontend TypeScript Check`, `Frontend Lint (Fix)`. | + +### Additional QA Context + +| Check | Status | Evidence | +|---|---|---| +| Local patch coverage preflight artifacts generated | PASS | `bash scripts/local-patch-report.sh` produced `test-results/local-patch-report.md` and `test-results/local-patch-report.json`. | +| Patch coverage threshold warning (advisory) | WARN (non-blocking) | Report output: `WARN: Overall patch coverage 53.8% ...` and `WARN: Backend patch coverage 52.0% ...`. | + +### Security Stance + +| Check | Status | Notes | +|---|---|---| +| New secret/token exposure risk introduced by test changes | PASS | Change scope is test helper logic only; no credentials/tokens were added to production paths, logs, or API outputs. | +| Gotify token leakage pattern introduced | PASS | No Gotify tokenized URLs or token fields were added in the changed test file. | + +### Blockers + +- None. + +### Verdict + +**PASS** — SMTP flaky test fix validates as test-only, stable under repetition/race checks, meets backend coverage gate, passes full pre-commit, and introduces no new secret/token exposure risk.