fix(ci): reorganize E2E tests for improved isolation and execution stability

2026-02-05 01:47:22 +00:00
parent 6aea2380b0
commit 39b5b8a928
2 changed files with 785 additions and 18 deletions
--- a/.github/workflows/e2e-tests-split.yml
+++ b/.github/workflows/e2e-tests-split.yml
@@ -110,7 +110,143 @@ jobs:
          path: charon-e2e-image.tar
          retention-days: 1

-  # Chromium browser tests (independent)
+  # Security Enforcement Tests (Chromium) - ISOLATED SERIAL EXECUTION
+  # These tests enable/disable Cerberus and must run alone to avoid contaminating other shards
+  e2e-chromium-security:
+    name: E2E Chromium (Security Enforcement)
+    runs-on: ubuntu-latest
+    needs: build
+    if: |
+      (github.event_name != 'workflow_dispatch') ||
+      (github.event.inputs.browser == 'chromium' || github.event.inputs.browser == 'all')
+    timeout-minutes: 30
+    env:
+      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
+      CHARON_EMERGENCY_SERVER_ENABLED: "true"
+      CHARON_SECURITY_TESTS_ENABLED: "true"
+      CHARON_E2E_IMAGE_TAG: charon:e2e-test
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: [1]  # Single shard - security tests run serially
+        total-shards: [1]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+
+      - name: Set up Node.js
+        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+
+      - name: Download Docker image
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7
+        with:
+          name: docker-image
+
+      - name: Validate Emergency Token Configuration
+        run: |
+          echo "🔐 Validating emergency token configuration..."
+          if [ -z "$CHARON_EMERGENCY_TOKEN" ]; then
+            echo "::error title=Missing Secret::CHARON_EMERGENCY_TOKEN secret not configured"
+            exit 1
+          fi
+          TOKEN_LENGTH=${#CHARON_EMERGENCY_TOKEN}
+          if [ $TOKEN_LENGTH -lt 64 ]; then
+            echo "::error title=Invalid Token Length::CHARON_EMERGENCY_TOKEN must be at least 64 characters"
+            exit 1
+          fi
+          MASKED_TOKEN="${CHARON_EMERGENCY_TOKEN:0:8}...${CHARON_EMERGENCY_TOKEN: -4}"
+          echo "::notice::Emergency token validated (length: $TOKEN_LENGTH, preview: $MASKED_TOKEN)"
+        env:
+          CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
+
+      - name: Load Docker image
+        run: |
+          docker load -i charon-e2e-image.tar
+          docker images | grep charon
+
+      - name: Generate ephemeral encryption key
+        run: echo "CHARON_ENCRYPTION_KEY=$(openssl rand -base64 32)" >> $GITHUB_ENV
+
+      - name: Start test environment
+        run: |
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml --profile security-tests up -d
+          echo "✅ Container started for Chromium security enforcement tests"
+
+      - name: Wait for service health
+        run: |
+          echo "⏳ Waiting for Charon to be healthy..."
+          MAX_ATTEMPTS=30
+          ATTEMPT=0
+          while [[ ${ATTEMPT} -lt ${MAX_ATTEMPTS} ]]; do
+            ATTEMPT=$((ATTEMPT + 1))
+            echo "Attempt ${ATTEMPT}/${MAX_ATTEMPTS}..."
+            if curl -sf http://127.0.0.1:8080/api/v1/health > /dev/null 2>&1; then
+              echo "✅ Charon is healthy!"
+              curl -s http://127.0.0.1:8080/api/v1/health | jq .
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "❌ Health check failed"
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs
+          exit 1
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install Playwright Chromium
+        run: |
+          echo "📦 Installing Chromium..."
+          npx playwright install --with-deps chromium
+          EXIT_CODE=$?
+          echo "✅ Install command completed (exit code: $EXIT_CODE)"
+          exit $EXIT_CODE
+
+      - name: Run Security Enforcement Tests (Chromium)
+        timeout-minutes: 25
+        run: npx playwright test --project=chromium tests/security-enforcement/
+        env:
+          PLAYWRIGHT_BASE_URL: http://127.0.0.1:8080
+          CI: true
+
+      - name: Upload HTML report (Chromium Security)
+        if: success() || failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: playwright-report-chromium-security
+          path: playwright-report/
+          retention-days: 14
+
+      - name: Upload test traces on failure
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: traces-chromium-security
+          path: test-results/**/*.zip
+          retention-days: 7
+
+      - name: Collect Docker logs on failure
+        if: failure()
+        run: |
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs > docker-logs-chromium-security.txt 2>&1
+
+      - name: Upload Docker logs on failure
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: docker-logs-chromium-security
+          path: docker-logs-chromium-security.txt
+          retention-days: 7
+
+      - name: Cleanup
+        if: success() || failure() || cancelled()
+        run: docker compose -f .docker/compose/docker-compose.playwright-ci.yml down -v 2>/dev/null || true
+
+  # Chromium browser tests (non-security) - PARALLEL SHARDED EXECUTION
  e2e-chromium:
    name: E2E Chromium (Shard ${{ matrix.shard }}/${{ matrix.total-shards }})
    runs-on: ubuntu-latest
@@ -122,13 +258,13 @@ jobs:
    env:
      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
      CHARON_EMERGENCY_SERVER_ENABLED: "true"
-      CHARON_SECURITY_TESTS_ENABLED: "true"
+      CHARON_SECURITY_TESTS_ENABLED: "false"  # Cerberus OFF for non-security tests
      CHARON_E2E_IMAGE_TAG: charon:e2e-test
    strategy:
      fail-fast: false
      matrix:
-        shard: [1, 2, 3,4, 5, 6, 7, 8]  # 2 shards: parallel test execution to stay under 20-minute timeout
-        total-shards: [8]
+        shard: [1, 2, 3, 4]  # 4 shards for non-security tests
+        total-shards: [4]

    steps:
      - name: Checkout repository
@@ -209,9 +345,22 @@ jobs:
          find ~/.cache/ms-playwright -name "*chromium*" -o -name "*chrome*" 2>/dev/null | head -10 || echo "No chromium files found"
          exit $EXIT_CODE

-      - name: Run Chromium tests
+      - name: Run Chromium tests (Non-Security)
        timeout-minutes: 20
-        run: npx playwright test --project=chromium --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
+        run: |
+          # Run all tests except security-enforcement directory (Cerberus is OFF by default)
+          npx playwright test --project=chromium \
+            tests/core \
+            tests/dns-provider-crud.spec.ts \
+            tests/dns-provider-types.spec.ts \
+            tests/emergency-server \
+            tests/integration \
+            tests/manual-dns-provider.spec.ts \
+            tests/monitoring \
+            tests/security \
+            tests/settings \
+            tests/tasks \
+            --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
        env:
          PLAYWRIGHT_BASE_URL: http://127.0.0.1:8080
          CI: true
@@ -258,7 +407,142 @@ jobs:
        if: success() || failure() || cancelled()
        run: docker compose -f .docker/compose/docker-compose.playwright-ci.yml down -v 2>/dev/null || true

-  # Firefox browser tests (independent)
+  # Security Enforcement Tests (Firefox) - ISOLATED SERIAL EXECUTION
+  e2e-firefox-security:
+    name: E2E Firefox (Security Enforcement)
+    runs-on: ubuntu-latest
+    needs: build
+    if: |
+      (github.event_name != 'workflow_dispatch') ||
+      (github.event.inputs.browser == 'firefox' || github.event.inputs.browser == 'all')
+    timeout-minutes: 30
+    env:
+      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
+      CHARON_EMERGENCY_SERVER_ENABLED: "true"
+      CHARON_SECURITY_TESTS_ENABLED: "true"
+      CHARON_E2E_IMAGE_TAG: charon:e2e-test
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: [1]  # Single shard - security tests run serially
+        total-shards: [1]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+
+      - name: Set up Node.js
+        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+
+      - name: Download Docker image
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7
+        with:
+          name: docker-image
+
+      - name: Validate Emergency Token Configuration
+        run: |
+          echo "🔐 Validating emergency token configuration..."
+          if [ -z "$CHARON_EMERGENCY_TOKEN" ]; then
+            echo "::error title=Missing Secret::CHARON_EMERGENCY_TOKEN secret not configured"
+            exit 1
+          fi
+          TOKEN_LENGTH=${#CHARON_EMERGENCY_TOKEN}
+          if [ $TOKEN_LENGTH -lt 64 ]; then
+            echo "::error title=Invalid Token Length::CHARON_EMERGENCY_TOKEN must be at least 64 characters"
+            exit 1
+          fi
+          MASKED_TOKEN="${CHARON_EMERGENCY_TOKEN:0:8}...${CHARON_EMERGENCY_TOKEN: -4}"
+          echo "::notice::Emergency token validated (length: $TOKEN_LENGTH, preview: $MASKED_TOKEN)"
+        env:
+          CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
+
+      - name: Load Docker image
+        run: |
+          docker load -i charon-e2e-image.tar
+          docker images | grep charon
+
+      - name: Generate ephemeral encryption key
+        run: echo "CHARON_ENCRYPTION_KEY=$(openssl rand -base64 32)" >> $GITHUB_ENV
+
+      - name: Start test environment
+        run: |
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml --profile security-tests up -d
+          echo "✅ Container started for Firefox security enforcement tests"
+
+      - name: Wait for service health
+        run: |
+          echo "⏳ Waiting for Charon to be healthy..."
+          MAX_ATTEMPTS=30
+          ATTEMPT=0
+          while [[ ${ATTEMPT} -lt ${MAX_ATTEMPTS} ]]; do
+            ATTEMPT=$((ATTEMPT + 1))
+            echo "Attempt ${ATTEMPT}/${MAX_ATTEMPTS}..."
+            if curl -sf http://127.0.0.1:8080/api/v1/health > /dev/null 2>&1; then
+              echo "✅ Charon is healthy!"
+              curl -s http://127.0.0.1:8080/api/v1/health | jq .
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "❌ Health check failed"
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs
+          exit 1
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install Playwright Firefox
+        run: |
+          echo "📦 Installing Firefox..."
+          npx playwright install --with-deps firefox
+          EXIT_CODE=$?
+          echo "✅ Install command completed (exit code: $EXIT_CODE)"
+          exit $EXIT_CODE
+
+      - name: Run Security Enforcement Tests (Firefox)
+        timeout-minutes: 25
+        run: npx playwright test --project=firefox tests/security-enforcement/
+        env:
+          PLAYWRIGHT_BASE_URL: http://127.0.0.1:8080
+          CI: true
+
+      - name: Upload HTML report (Firefox Security)
+        if: success() || failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: playwright-report-firefox-security
+          path: playwright-report/
+          retention-days: 14
+
+      - name: Upload test traces on failure
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: traces-firefox-security
+          path: test-results/**/*.zip
+          retention-days: 7
+
+      - name: Collect Docker logs on failure
+        if: failure()
+        run: |
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs > docker-logs-firefox-security.txt 2>&1
+
+      - name: Upload Docker logs on failure
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: docker-logs-firefox-security
+          path: docker-logs-firefox-security.txt
+          retention-days: 7
+
+      - name: Cleanup
+        if: success() || failure() || cancelled()
+        run: docker compose -f .docker/compose/docker-compose.playwright-ci.yml down -v 2>/dev/null || true
+
+  # Firefox browser tests (non-security) - PARALLEL SHARDED EXECUTION
  e2e-firefox:
    name: E2E Firefox (Shard ${{ matrix.shard }}/${{ matrix.total-shards }})
    runs-on: ubuntu-latest
@@ -270,13 +554,13 @@ jobs:
    env:
      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
      CHARON_EMERGENCY_SERVER_ENABLED: "true"
-      CHARON_SECURITY_TESTS_ENABLED: "true"
+      CHARON_SECURITY_TESTS_ENABLED: "false"  # Cerberus OFF for non-security tests
      CHARON_E2E_IMAGE_TAG: charon:e2e-test
    strategy:
      fail-fast: false
      matrix:
-        shard: [1, 2, 3,4, 5, 6, 7, 8]  # 2 shards: parallel test execution to stay under 20-minute timeout
-        total-shards: [8]
+        shard: [1, 2, 3, 4]  # 4 shards for non-security tests
+        total-shards: [4]

    steps:
      - name: Checkout repository
@@ -365,9 +649,22 @@ jobs:
          find ~/.cache/ms-playwright -name "*firefox*" 2>/dev/null | head -10 || echo "No firefox files found"
          exit $EXIT_CODE

-      - name: Run Firefox tests
+      - name: Run Firefox tests (Non-Security)
        timeout-minutes: 20
-        run: npx playwright test --project=firefox --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
+        run: |
+          # Run all tests except security-enforcement directory (Cerberus is OFF by default)
+          npx playwright test --project=firefox \
+            tests/core \
+            tests/dns-provider-crud.spec.ts \
+            tests/dns-provider-types.spec.ts \
+            tests/emergency-server \
+            tests/integration \
+            tests/manual-dns-provider.spec.ts \
+            tests/monitoring \
+            tests/security \
+            tests/settings \
+            tests/tasks \
+            --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
        env:
          PLAYWRIGHT_BASE_URL: http://127.0.0.1:8080
          CI: true
@@ -414,7 +711,142 @@ jobs:
        if: success() || failure() || cancelled()
        run: docker compose -f .docker/compose/docker-compose.playwright-ci.yml down -v 2>/dev/null || true

-  # WebKit browser tests (independent)
+  # Security Enforcement Tests (WebKit) - ISOLATED SERIAL EXECUTION
+  e2e-webkit-security:
+    name: E2E WebKit (Security Enforcement)
+    runs-on: ubuntu-latest
+    needs: build
+    if: |
+      (github.event_name != 'workflow_dispatch') ||
+      (github.event.inputs.browser == 'webkit' || github.event.inputs.browser == 'all')
+    timeout-minutes: 30
+    env:
+      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
+      CHARON_EMERGENCY_SERVER_ENABLED: "true"
+      CHARON_SECURITY_TESTS_ENABLED: "true"
+      CHARON_E2E_IMAGE_TAG: charon:e2e-test
+    strategy:
+      fail-fast: false
+      matrix:
+        shard: [1]  # Single shard - security tests run serially
+        total-shards: [1]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+
+      - name: Set up Node.js
+        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+
+      - name: Download Docker image
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7
+        with:
+          name: docker-image
+
+      - name: Validate Emergency Token Configuration
+        run: |
+          echo "🔐 Validating emergency token configuration..."
+          if [ -z "$CHARON_EMERGENCY_TOKEN" ]; then
+            echo "::error title=Missing Secret::CHARON_EMERGENCY_TOKEN secret not configured"
+            exit 1
+          fi
+          TOKEN_LENGTH=${#CHARON_EMERGENCY_TOKEN}
+          if [ $TOKEN_LENGTH -lt 64 ]; then
+            echo "::error title=Invalid Token Length::CHARON_EMERGENCY_TOKEN must be at least 64 characters"
+            exit 1
+          fi
+          MASKED_TOKEN="${CHARON_EMERGENCY_TOKEN:0:8}...${CHARON_EMERGENCY_TOKEN: -4}"
+          echo "::notice::Emergency token validated (length: $TOKEN_LENGTH, preview: $MASKED_TOKEN)"
+        env:
+          CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
+
+      - name: Load Docker image
+        run: |
+          docker load -i charon-e2e-image.tar
+          docker images | grep charon
+
+      - name: Generate ephemeral encryption key
+        run: echo "CHARON_ENCRYPTION_KEY=$(openssl rand -base64 32)" >> $GITHUB_ENV
+
+      - name: Start test environment
+        run: |
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml --profile security-tests up -d
+          echo "✅ Container started for WebKit security enforcement tests"
+
+      - name: Wait for service health
+        run: |
+          echo "⏳ Waiting for Charon to be healthy..."
+          MAX_ATTEMPTS=30
+          ATTEMPT=0
+          while [[ ${ATTEMPT} -lt ${MAX_ATTEMPTS} ]]; do
+            ATTEMPT=$((ATTEMPT + 1))
+            echo "Attempt ${ATTEMPT}/${MAX_ATTEMPTS}..."
+            if curl -sf http://127.0.0.1:8080/api/v1/health > /dev/null 2>&1; then
+              echo "✅ Charon is healthy!"
+              curl -s http://127.0.0.1:8080/api/v1/health | jq .
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "❌ Health check failed"
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs
+          exit 1
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install Playwright WebKit
+        run: |
+          echo "📦 Installing WebKit..."
+          npx playwright install --with-deps webkit
+          EXIT_CODE=$?
+          echo "✅ Install command completed (exit code: $EXIT_CODE)"
+          exit $EXIT_CODE
+
+      - name: Run Security Enforcement Tests (WebKit)
+        timeout-minutes: 25
+        run: npx playwright test --project=webkit tests/security-enforcement/
+        env:
+          PLAYWRIGHT_BASE_URL: http://127.0.0.1:8080
+          CI: true
+
+      - name: Upload HTML report (WebKit Security)
+        if: success() || failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: playwright-report-webkit-security
+          path: playwright-report/
+          retention-days: 14
+
+      - name: Upload test traces on failure
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: traces-webkit-security
+          path: test-results/**/*.zip
+          retention-days: 7
+
+      - name: Collect Docker logs on failure
+        if: failure()
+        run: |
+          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs > docker-logs-webkit-security.txt 2>&1
+
+      - name: Upload Docker logs on failure
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
+        with:
+          name: docker-logs-webkit-security
+          path: docker-logs-webkit-security.txt
+          retention-days: 7
+
+      - name: Cleanup
+        if: success() || failure() || cancelled()
+        run: docker compose -f .docker/compose/docker-compose.playwright-ci.yml down -v 2>/dev/null || true
+
+  # WebKit browser tests (non-security) - PARALLEL SHARDED EXECUTION
  e2e-webkit:
    name: E2E WebKit (Shard ${{ matrix.shard }}/${{ matrix.total-shards }})
    runs-on: ubuntu-latest
@@ -426,13 +858,13 @@ jobs:
    env:
      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
      CHARON_EMERGENCY_SERVER_ENABLED: "true"
-      CHARON_SECURITY_TESTS_ENABLED: "true"
+      CHARON_SECURITY_TESTS_ENABLED: "false"  # Cerberus OFF for non-security tests
      CHARON_E2E_IMAGE_TAG: charon:e2e-test
    strategy:
      fail-fast: false
      matrix:
-        shard: [1, 2, 3,4, 5, 6, 7, 8]  # 2 shards: parallel test execution to stay under 20-minute timeout
-        total-shards: [8]
+        shard: [1, 2, 3, 4]  # 4 shards for non-security tests
+        total-shards: [4]

    steps:
      - name: Checkout repository
@@ -521,9 +953,22 @@ jobs:
          find ~/.cache/ms-playwright -name "*webkit*" -o -name "*MiniBrowser*" 2>/dev/null | head -10 || echo "No webkit files found"
          exit $EXIT_CODE

-      - name: Run WebKit tests
+      - name: Run WebKit tests (Non-Security)
        timeout-minutes: 20
-        run: npx playwright test --project=webkit --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
+        run: |
+          # Run all tests except security-enforcement directory (Cerberus is OFF by default)
+          npx playwright test --project=webkit \
+            tests/core \
+            tests/dns-provider-crud.spec.ts \
+            tests/dns-provider-types.spec.ts \
+            tests/emergency-server \
+            tests/integration \
+            tests/manual-dns-provider.spec.ts \
+            tests/monitoring \
+            tests/security \
+            tests/settings \
+            tests/tasks \
+            --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
        env:
          PLAYWRIGHT_BASE_URL: http://127.0.0.1:8080
          CI: true
--- a/docs/implementation/E2E_TEST_REORGANIZATION_IMPLEMENTATION.md
+++ b/docs/implementation/E2E_TEST_REORGANIZATION_IMPLEMENTATION.md
@@ -0,0 +1,322 @@
+# E2E Test Reorganization Implementation
+
+## Problem Statement
+
+CI E2E tests were timing out at 20 minutes even with 8 shards per browser (24 total shards) because:
+
+1. **Cross-Shard Contamination**: Security enforcement tests that enable/disable Cerberus were randomly distributed across shards, causing ACL and rate limit failures in non-security tests
+2. **Global State Interference**: Tests modifying global security state (Cerberus middleware) were running in parallel, causing unpredictable test failures
+3. **Uneven Distribution**: Random shard distribution didn't account for test dependencies and sequential requirements
+
+## Solution Architecture
+
+### Test Isolation Strategy
+
+Reorganized tests into two categories with dedicated job execution:
+
+#### **Category 1: Security Enforcement Tests (Isolated Serial Execution)**
+- **Location**: `tests/security-enforcement/`
+- **Job Names**:
+  - `e2e-chromium-security`
+  - `e2e-firefox-security`
+  - `e2e-webkit-security`
+- **Sharding**: 1 shard per browser (no sharding within security tests)
+- **Environment**: `CHARON_SECURITY_TESTS_ENABLED: "true"`
+- **Timeout**: 30 minutes (allows for sequential execution)
+- **Test Files**:
+  - `rate-limit-enforcement.spec.ts`
+  - `crowdsec-enforcement.spec.ts`
+  - `emergency-token.spec.ts` (break glass protocol)
+  - `combined-enforcement.spec.ts`
+  - `security-headers-enforcement.spec.ts`
+  - `waf-enforcement.spec.ts`
+  - `acl-enforcement.spec.ts`
+  - `zzz-admin-whitelist-blocking.spec.ts` (test.describe.serial)
+  - `zzzz-break-glass-recovery.spec.ts` (test.describe.serial)
+  - `emergency-reset.spec.ts`
+
+**Execution Flow** (as specified by user):
+1. Enable Cerberus security module
+2. Run tests requiring security ON (ACL, WAF, rate limiting, etc.)
+3. Execute break glass protocol test (`emergency-token.spec.ts`)
+4. Run tests requiring security OFF (verify bypass)
+
+#### **Category 2: Non-Security Tests (Parallel Sharded Execution)**
+- **Job Names**:
+  - `e2e-chromium` (Shard 1-4)
+  - `e2e-firefox` (Shard 1-4)
+  - `e2e-webkit` (Shard 1-4)
+- **Sharding**: 4 shards per browser (12 total shards)
+- **Environment**: `CHARON_SECURITY_TESTS_ENABLED: "false"`  ← **Cerberus OFF by default**
+- **Timeout**: 20 minutes per shard
+- **Test Directories**:
+  - `tests/core`
+  - `tests/dns-provider-crud.spec.ts`
+  - `tests/dns-provider-types.spec.ts`
+  - `tests/emergency-server`
+  - `tests/integration`
+  - `tests/manual-dns-provider.spec.ts`
+  - `tests/monitoring`
+  - `tests/security` (UI/dashboard tests, not enforcement)
+  - `tests/settings`
+  - `tests/tasks`
+
+### Job Distribution
+
+**Before**:
+```
+Total: 24 shards (8 per browser)
+├── Chromium: 8 shards (all tests randomly distributed)
+├── Firefox:  8 shards (all tests randomly distributed)
+└── WebKit:   8 shards (all tests randomly distributed)
+
+Issues:
+- Security tests randomly distributed across all shards
+- Cerberus state changes affecting parallel test execution
+- ACL/rate limit failures in non-security tests
+```
+
+**After**:
+```
+Total: 15 jobs
+├── Security Enforcement (3 jobs)
+│   ├── Chromium Security: 1 shard (serial execution, 30min timeout)
+│   ├── Firefox Security:  1 shard (serial execution, 30min timeout)
+│   └── WebKit Security:   1 shard (serial execution, 30min timeout)
+│
+└── Non-Security (12 shards)
+    ├── Chromium: 4 shards (parallel, Cerberus OFF, 20min timeout)
+    ├── Firefox:  4 shards (parallel, Cerberus OFF, 20min timeout)
+    └── WebKit:   4 shards (parallel, Cerberus OFF, 20min timeout)
+
+Benefits:
+- Security tests isolated, run serially without cross-shard interference
+- Non-security tests always run with Cerberus OFF (default state)
+- Reduced total job count from 24 to 15
+- Clear separation of concerns
+```
+
+## Implementation Details
+
+### Workflow Changes
+
+#### Security Enforcement Jobs (New)
+
+Created dedicated jobs for security enforcement tests:
+
+```yaml
+e2e-{browser}-security:
+  name: E2E {Browser} (Security Enforcement)
+  timeout-minutes: 30
+  env:
+    CHARON_SECURITY_TESTS_ENABLED: "true"
+  strategy:
+    matrix:
+      shard: [1]  # Single shard
+      total-shards: [1]
+  steps:
+    - name: Run Security Enforcement Tests
+      run: npx playwright test --project={browser} tests/security-enforcement/
+```
+
+**Key Changes**:
+- Single shard per browser (no parallel execution within security tests)
+- Explicitly targets `tests/security-enforcement/` directory
+- 30-minute timeout to accommodate serial execution
+- `CHARON_SECURITY_TESTS_ENABLED: "true"` enables Cerberus middleware
+
+#### Non-Security Jobs (Updated)
+
+Updated existing browser jobs to exclude security enforcement tests:
+
+```yaml
+e2e-{browser}:
+  name: E2E {Browser} (Shard ${{ matrix.shard }}/${{ matrix.total-shards }})
+  timeout-minutes: 20
+  env:
+    CHARON_SECURITY_TESTS_ENABLED: "false"  # Cerberus OFF
+  strategy:
+    matrix:
+      shard: [1, 2, 3, 4]  # 4 shards
+      total-shards: [4]
+  steps:
+    - name: Run {Browser} tests (Non-Security)
+      run: |
+        npx playwright test --project={browser} \
+          tests/core \
+          tests/dns-provider-crud.spec.ts \
+          tests/dns-provider-types.spec.ts \
+          tests/emergency-server \
+          tests/integration \
+          tests/manual-dns-provider.spec.ts \
+          tests/monitoring \
+          tests/security \
+          tests/settings \
+          tests/tasks \
+          --shard=${{ matrix.shard }}/${{ matrix.total-shards }}
+```
+
+**Key Changes**:
+- Reduced from 8 shards to 4 shards per browser
+- Explicitly lists test directories (excludes `tests/security-enforcement/`)
+- `CHARON_SECURITY_TESTS_ENABLED: "false"` keeps Cerberus OFF by default
+- 20-minute timeout per shard (sufficient for non-security tests)
+
+### Environment Variable Strategy
+
+| Job Type | Variable | Value | Purpose |
+|----------|----------|-------|---------|
+| Security Enforcement | `CHARON_SECURITY_TESTS_ENABLED` | `"true"` | Enable Cerberus middleware for enforcement tests |
+| Non-Security | `CHARON_SECURITY_TESTS_ENABLED` | `"false"` | Keep Cerberus OFF to prevent ACL/rate limit interference |
+
+## Benefits
+
+### 1. **Test Isolation**
+- Security enforcement tests run independently without affecting other shards
+- No cross-shard contamination from global state changes
+- Clear separation between enforcement tests and regular functionality tests
+
+### 2. **Predictable Execution**
+- Security tests execute serially in a controlled environment
+- Proper test execution order: enable → tests ON → break glass → tests OFF
+- Non-security tests always start with Cerberus OFF (default state)
+
+### 3. **Performance Optimization**
+- Reduced total job count from 24 to 15 (37.5% reduction)
+- Eliminated failed tests due to ACL/rate limit interference
+- Balanced shard durations to stay under timeout limits
+
+### 4. **Maintainability**
+- Explicit test path listing makes it clear which tests run where
+- Security enforcement tests are clearly identified and isolated
+- Easy to add new test categories without affecting security tests
+
+### 5. **Debugging**
+- Failures in security enforcement jobs are clearly isolated
+- Non-security test failures can't be caused by security middleware interference
+- Clearer artifact naming: `playwright-report-{browser}-security` vs `playwright-report-{browser}-{shard}`
+
+## Testing Strategy
+
+### Test Execution Order (User-Specified)
+
+For security enforcement tests, the execution follows this sequence:
+
+1. **Enable Security Module**
+   - Tests that enable Cerberus middleware
+
+2. **Tests Requiring Security ON**
+   - ACL enforcement verification
+   - WAF rule enforcement
+   - Rate limiting enforcement
+   - CrowdSec integration enforcement
+   - Security headers enforcement
+   - Combined enforcement scenarios
+
+3. **Break Glass Protocol**
+   - `emergency-token.spec.ts` - Emergency bypass testing
+
+4. **Tests Requiring Security OFF**
+   - Verify bypass functionality
+   - Test default (Cerberus disabled) behavior
+
+### Test File Naming Convention
+
+Security enforcement tests use prefixes for ordering:
+- Regular tests: `*-enforcement.spec.ts`
+- Serialized tests: `zzz-*-blocking.spec.ts` (test.describe.serial)
+- Final tests: `zzzz-*-recovery.spec.ts` (test.describe.serial)
+
+This naming convention ensures Playwright executes tests in the correct order even within the single security shard.
+
+## Migration Impact
+
+### CI Pipeline Changes
+
+**Before**:
+- 24 parallel jobs (8 shards × 3 browsers)
+- Random test distribution
+- Frequent failures due to security middleware interference
+
+**After**:
+- 15 jobs (3 security + 12 non-security)
+- Deterministic test distribution
+- Security tests isolated to prevent interference
+
+### Execution Time
+
+**Estimated Timings**:
+- Security enforcement jobs: ~25 minutes each (serial execution)
+- Non-security shards: ~15 minutes each (parallel execution)
+- Total pipeline time: ~30 minutes (parallel job execution)
+
+**Previous Timings**:
+- All shards: Exceeding 20 minutes with frequent timeouts
+- Total pipeline time: Failing due to timeouts
+
+## Validation Checklist
+
+- [ ] Security enforcement tests run serially without cross-shard interference
+- [ ] Non-security tests complete within 20-minute timeout
+- [ ] All browsers (Chromium, Firefox, WebKit) have dedicated security enforcement jobs
+- [ ] `CHARON_SECURITY_TESTS_ENABLED` correctly set for each job type
+- [ ] Test artifacts clearly named by category (security vs shard number)
+- [ ] CI pipeline completes successfully without timeout errors
+- [ ] No ACL/rate limit failures in non-security test shards
+
+## Future Improvements
+
+### Potential Optimizations
+
+1. **Further Shard Balancing**
+   - Profile individual test execution times
+   - Redistribute tests across shards to balance duration
+   - Consider 5-6 shards if any shard approaches 20-minute timeout
+
+2. **Test Grouping**
+   - Group similar test types together for better cache utilization
+   - Consider browser-specific test isolation (e.g., Firefox-specific tests)
+
+3. **Dynamic Sharding**
+   - Use Playwright's built-in test duration data for intelligent distribution
+   - Automatically adjust shard count based on test additions
+
+4. **Parallel Security Tests**
+   - If security tests grow significantly, consider splitting into sub-categories
+   - Example: WAF tests, ACL tests, rate limit tests in separate shards
+   - Requires careful state management to avoid interference
+
+## Related Documentation
+
+- User request: "We need to make sure all the security tests are ran in the same shard...Cerberus should be off by default so all the other tests in other shards arent hitting the acl or rate limit and failing"
+- Test execution flow specified by user: "enable security → tests requiring security ON → break glass protocol → tests requiring security OFF"
+- Original issue: Tests timing out at 20 minutes even with 6 shards due to cross-shard security middleware interference
+
+## Rollout Plan
+
+### Phase 1: Implementation ✅
+- [x] Create dedicated security enforcement jobs for all browsers
+- [x] Update non-security jobs to exclude security-enforcement directory
+- [x] Set `CHARON_SECURITY_TESTS_ENABLED` appropriately for each job type
+- [x] Document changes and strategy
+
+### Phase 2: Validation (In Progress)
+- [ ] Run full CI pipeline to verify no timeout errors
+- [ ] Validate security enforcement tests execute in correct order
+- [ ] Confirm non-security tests don't hit ACL/rate limit failures
+- [ ] Monitor execution times to ensure shards stay under timeout limits
+
+### Phase 3: Optimization (TBD)
+- [ ] Profile test execution times per shard
+- [ ] Adjust shard distribution if any shard approaches timeout
+- [ ] Consider further optimizations based on real-world execution data
+
+## Conclusion
+
+This reorganization addresses the root cause of CI timeout and test interference issues by:
+- **Isolating** security enforcement tests in dedicated serial jobs
+- **Separating** concerns between security testing and functional testing
+- **Ensuring** non-security tests always run with Cerberus OFF (default state)
+- **Preventing** cross-shard contamination from global security state changes
+
+The implementation follows the user's explicit requirements and maintains clarity through clear job naming, environment variable configuration, and explicit test path specifications.