Charon/.github/workflows/e2e-tests.yml

# E2E Tests Workflow
# Runs Playwright E2E tests with sharding for faster execution
# and collects frontend code coverage via @bgotink/playwright-coverage
#
# Phase 4: Build Once, Test Many - Use registry image instead of building
# This workflow now waits for docker-build.yml to complete and pulls the built image
#
# Test Execution Architecture:
#   - Parallel Sharding: Tests split across 4 shards for speed
#   - Per-Shard HTML Reports: Each shard generates its own HTML report
#   - No Merging Needed: Smaller reports are easier to debug
#   - Trace Collection: Failure traces captured for debugging
#
# Coverage Architecture:
#   - Backend: Docker container at localhost:8080 (API)
#   - Frontend: Vite dev server at localhost:3000 (serves source files)
#   - Tests hit Vite, which proxies API calls to Docker
#   - V8 coverage maps directly to source files for accurate reporting
#   - Coverage disabled by default (requires PLAYWRIGHT_COVERAGE=1)
#   - NOTE: Coverage mode uses Vite dev server, not registry image
#
# Triggers:
#   - workflow_run after docker-build.yml completes (standard mode)
#   - Manual dispatch with browser/image selection
#
# Jobs:
#   1. e2e-tests: Run tests in parallel shards, upload per-shard HTML reports
#   2. test-summary: Generate summary with links to shard reports
#   3. comment-results: Post test results as PR comment
#   4. upload-coverage: Merge and upload E2E coverage to Codecov (if enabled)
#   5. e2e-results: Status check to block merge on failure

name: E2E Tests

on:
  workflow_run:
    workflows: ["Docker Build, Publish & Test"]
    types: [completed]
    branches: [main, development, 'feature/**']  # Explicit branch filter prevents unexpected triggers

  workflow_dispatch:
    inputs:
      image_tag:
        description: 'Docker image tag to test (e.g., pr-123-abc1234)'
        required: false
        type: string
      browser:
        description: 'Browser to test'
        required: false
        default: 'chromium'
        type: choice
        options:
          - chromium
          - firefox
          - webkit
          - all

env:
  NODE_VERSION: '20'
  GO_VERSION: '1.25.6'
  GOTOOLCHAIN: auto
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository_owner }}/charon
  PLAYWRIGHT_COVERAGE: ${{ vars.PLAYWRIGHT_COVERAGE || '0' }}
  # Enhanced debugging environment variables
  DEBUG: 'charon:*,charon-test:*'
  PLAYWRIGHT_DEBUG: '1'
  CI_LOG_LEVEL: 'verbose'

# Prevent race conditions when PR is updated mid-test
# Cancels old test runs when new build completes with different SHA
concurrency:
  group: e2e-${{ github.workflow }}-${{ github.event.workflow_run.head_branch || github.ref }}-${{ github.event.workflow_run.head_sha || github.sha }}
  cancel-in-progress: true

jobs:
  # Run tests in parallel shards against registry image
  e2e-tests:
    name: E2E ${{ matrix.browser }} (Shard ${{ matrix.shard }}/${{ matrix.total-shards }})
    runs-on: ubuntu-latest
    timeout-minutes: 30
    # Only run if docker-build.yml succeeded, or if manually triggered
    if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
    env:
      # Required for security teardown (emergency reset fallback when ACL blocks API)
      CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}
      # Enable security-focused endpoints and test gating
      CHARON_EMERGENCY_SERVER_ENABLED: "true"
      CHARON_SECURITY_TESTS_ENABLED: "true"
    strategy:
      fail-fast: false
      matrix:
        shard: [1, 2, 3, 4]
        total-shards: [4]
        browser: [chromium, firefox, webkit]

    steps:
      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

      - name: Set up Node.js
        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6
        with:
          node-version: ${{ env.NODE_VERSION }}
          cache: 'npm'

      # Determine the correct image tag based on trigger context
      # For PRs: pr-{number}-{sha}, For branches: {sanitized-branch}-{sha}
      - name: Determine image tag
        id: image
        env:
          EVENT: ${{ github.event.workflow_run.event }}
          REF: ${{ github.event.workflow_run.head_branch }}
          SHA: ${{ github.event.workflow_run.head_sha }}
          MANUAL_TAG: ${{ inputs.image_tag }}
        run: |
          # Manual trigger uses provided tag
          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
            if [[ -n "$MANUAL_TAG" ]]; then
              echo "tag=${MANUAL_TAG}" >> $GITHUB_OUTPUT
            else
              # Default to latest if no tag provided
              echo "tag=latest" >> $GITHUB_OUTPUT
            fi
            echo "source_type=manual" >> $GITHUB_OUTPUT
            exit 0
          fi

          # Extract 7-character short SHA
          SHORT_SHA=$(echo "$SHA" | cut -c1-7)

          if [[ "$EVENT" == "pull_request" ]]; then
            # Use native pull_requests array (no API calls needed)
            PR_NUM=$(echo '${{ toJson(github.event.workflow_run.pull_requests) }}' | jq -r '.[0].number')

            if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then
              echo "❌ ERROR: Could not determine PR number"
              echo "Event: $EVENT"
              echo "Ref: $REF"
              echo "SHA: $SHA"
              echo "Pull Requests JSON: ${{ toJson(github.event.workflow_run.pull_requests) }}"
              exit 1
            fi

            # Immutable tag with SHA suffix prevents race conditions
            echo "tag=pr-${PR_NUM}-${SHORT_SHA}" >> $GITHUB_OUTPUT
            echo "source_type=pr" >> $GITHUB_OUTPUT
          else
            # Branch push: sanitize branch name and append SHA
            # Sanitization: lowercase, replace / with -, remove special chars
            SANITIZED=$(echo "$REF" | \
              tr '[:upper:]' '[:lower:]' | \
              tr '/' '-' | \
              sed 's/[^a-z0-9-._]/-/g' | \
              sed 's/^-//; s/-$//' | \
              sed 's/--*/-/g' | \
              cut -c1-121)  # Leave room for -SHORT_SHA (7 chars)

            echo "tag=${SANITIZED}-${SHORT_SHA}" >> $GITHUB_OUTPUT
            echo "source_type=branch" >> $GITHUB_OUTPUT
          fi

          echo "sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
          echo "Determined image tag: $(cat $GITHUB_OUTPUT | grep tag=)"

      # Pull image from registry with retry logic (dual-source strategy)
      # Try registry first (fast), fallback to artifact if registry fails
      - name: Pull Docker image from registry
        id: pull_image
        uses: nick-fields/retry@v3
        with:
          timeout_minutes: 5
          max_attempts: 3
          retry_wait_seconds: 10
          command: |
            IMAGE_NAME="ghcr.io/${{ github.repository_owner }}/charon:${{ steps.image.outputs.tag }}"
            echo "Pulling image: $IMAGE_NAME"
            docker pull "$IMAGE_NAME"
            docker tag "$IMAGE_NAME" charon:e2e-test
            echo "✅ Successfully pulled from registry"
        continue-on-error: true

      # Fallback: Download artifact if registry pull failed
      - name: Fallback to artifact download
        if: steps.pull_image.outcome == 'failure'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          SHA: ${{ steps.image.outputs.sha }}
        run: |
          echo "⚠️ Registry pull failed, falling back to artifact..."

          # Determine artifact name based on source type
          if [[ "${{ steps.image.outputs.source_type }}" == "pr" ]]; then
            PR_NUM=$(echo '${{ toJson(github.event.workflow_run.pull_requests) }}' | jq -r '.[0].number')
            ARTIFACT_NAME="pr-image-${PR_NUM}"
          else
            ARTIFACT_NAME="push-image"
          fi

          echo "Downloading artifact: $ARTIFACT_NAME"
          gh run download ${{ github.event.workflow_run.id }} \
            --name "$ARTIFACT_NAME" \
            --dir /tmp/docker-image || {
            echo "❌ ERROR: Artifact download failed!"
            echo "Available artifacts:"
            gh run view ${{ github.event.workflow_run.id }} --json artifacts --jq '.artifacts[].name'
            exit 1
          }

          docker load < /tmp/docker-image/charon-image.tar
          docker tag $(docker images --format "{{.Repository}}:{{.Tag}}" | head -1) charon:e2e-test
          echo "✅ Successfully loaded from artifact"

      # Validate image freshness by checking SHA label
      - name: Validate image SHA
        env:
          SHA: ${{ steps.image.outputs.sha }}
        run: |
          LABEL_SHA=$(docker inspect charon:e2e-test --format '{{index .Config.Labels "org.opencontainers.image.revision"}}' | cut -c1-7 || echo "unknown")
          echo "Expected SHA: $SHA"
          echo "Image SHA:    $LABEL_SHA"

          if [[ "$LABEL_SHA" != "$SHA" && "$LABEL_SHA" != "unknown" ]]; then
            echo "⚠️ WARNING: Image SHA mismatch!"
            echo "Image may be stale. Proceeding with caution..."
          elif [[ "$LABEL_SHA" == "unknown" ]]; then
            echo "ℹ️ INFO: Could not determine image SHA from labels (artifact source)"
          else
            echo "✅ Image SHA matches expected commit"
          fi

      - name: Validate Emergency Token Configuration
        run: |
          echo "🔐 Validating emergency token configuration..."

          if [ -z "$CHARON_EMERGENCY_TOKEN" ]; then
            echo "::error title=Missing Secret::CHARON_EMERGENCY_TOKEN secret not configured in repository settings"
            echo "::error::Navigate to: Repository Settings → Secrets and Variables → Actions"
            echo "::error::Create secret: CHARON_EMERGENCY_TOKEN"
            echo "::error::Generate value with: openssl rand -hex 32"
            echo "::error::See docs/github-setup.md for detailed instructions"
            exit 1
          fi

          TOKEN_LENGTH=${#CHARON_EMERGENCY_TOKEN}
          if [ $TOKEN_LENGTH -lt 64 ]; then
            echo "::error title=Invalid Token Length::CHARON_EMERGENCY_TOKEN must be at least 64 characters (current: $TOKEN_LENGTH)"
            echo "::error::Generate new token with: openssl rand -hex 32"
            exit 1
          fi

          # Mask token in output (show first 8 chars only)
          MASKED_TOKEN="${CHARON_EMERGENCY_TOKEN:0:8}...${CHARON_EMERGENCY_TOKEN: -4}"
          echo "::notice::Emergency token validated (length: $TOKEN_LENGTH, preview: $MASKED_TOKEN)"
        env:
          CHARON_EMERGENCY_TOKEN: ${{ secrets.CHARON_EMERGENCY_TOKEN }}

      - name: Generate ephemeral encryption key
        run: |
          # Generate a unique, ephemeral encryption key for this CI run
          # Key is 32 bytes, base64-encoded as required by CHARON_ENCRYPTION_KEY
          echo "CHARON_ENCRYPTION_KEY=$(openssl rand -base64 32)" >> $GITHUB_ENV
          echo "✅ Generated ephemeral encryption key for E2E tests"

      - name: Start test environment
        run: |
          # Use docker-compose.playwright-ci.yml for CI (no .env file, uses GitHub Secrets)
          # Note: Using pre-pulled/pre-built image (charon:e2e-test) - no rebuild needed
          docker compose -f .docker/compose/docker-compose.playwright-ci.yml --profile security-tests up -d
          echo "✅ Container started via docker-compose.playwright-ci.yml"

      - name: Wait for service health
        run: |
          echo "⏳ Waiting for Charon to be healthy..."
          MAX_ATTEMPTS=30
          ATTEMPT=0

          while [[ ${ATTEMPT} -lt ${MAX_ATTEMPTS} ]]; do
            ATTEMPT=$((ATTEMPT + 1))
            echo "Attempt ${ATTEMPT}/${MAX_ATTEMPTS}..."

            if curl -sf http://localhost:8080/api/v1/health > /dev/null 2>&1; then
              echo "✅ Charon is healthy!"
              curl -s http://localhost:8080/api/v1/health | jq .
              exit 0
            fi

            sleep 2
          done

          echo "❌ Health check failed"
          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs
          exit 1

      - name: Install dependencies
        run: npm ci

      - name: Clean Playwright browser cache
        run: rm -rf ~/.cache/ms-playwright


      - name: Cache Playwright browsers
        id: playwright-cache
        uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5
        with:
          path: ~/.cache/ms-playwright
          # Use exact match only - no restore-keys fallback
          # This ensures we don't restore stale browsers when Playwright version changes
          key: playwright-${{ matrix.browser }}-${{ hashFiles('package-lock.json') }}

      - name: Install & verify Playwright browsers
        run: |
          npx playwright install --with-deps --force

          set -euo pipefail

          echo "🎯 Playwright CLI version"
          npx playwright --version || true

          echo "🔍 Showing Playwright cache root (if present)"
          ls -la ~/.cache/ms-playwright || true

          echo "📥 Install or verify browser: ${{ matrix.browser }}"

          # Install when cache miss, otherwise verify the expected executables exist
          if [[ "${{ steps.playwright-cache.outputs.cache-hit }}" != "true" ]]; then
            echo "📥 Cache miss - downloading ${{ matrix.browser }} browser..."
            npx playwright install --with-deps ${{ matrix.browser }}
          else
            echo "✅ Cache hit - verifying ${{ matrix.browser }} browser files..."
          fi

          # Look for the browser-specific headless shell executable(s)
          case "${{ matrix.browser }}" in
            chromium)
              EXPECTED_PATTERN="chrome-headless-shell*"
              ;;
            firefox)
              EXPECTED_PATTERN="firefox*"
              ;;
            webkit)
              EXPECTED_PATTERN="webkit*"
              ;;
            *)
              EXPECTED_PATTERN="*"
              ;;
          esac

          echo "Searching for expected files (pattern=$EXPECTED_PATTERN)..."
          find ~/.cache/ms-playwright -maxdepth 4 -type f -name "$EXPECTED_PATTERN" -print || true

          # Attempt to derive the exact executable path Playwright will use
          echo "Attempting to resolve Playwright's executable path via Node API (best-effort)"
          node -e "try{ const pw = require('playwright'); const b = pw['${{ matrix.browser }}']; console.log('exePath:', b.executablePath ? b.executablePath() : 'n/a'); }catch(e){ console.error('node-check-failed', e.message); process.exit(0); }" || true

          # If the expected binary is missing, force reinstall
          MISSING_COUNT=$(find ~/.cache/ms-playwright -maxdepth 4 -type f -name "$EXPECTED_PATTERN" | wc -l || true)
          if [[ "$MISSING_COUNT" -lt 1 ]]; then
            echo "⚠️ Expected Playwright browser executable not found (count=$MISSING_COUNT). Forcing reinstall..."
            npx playwright install --with-deps ${{ matrix.browser }} --force
          fi

          echo "Post-install: show cache contents (top 5 lines)"
          find ~/.cache/ms-playwright -maxdepth 3 -printf '%p\n' | head -40 || true

          # Final sanity check: try a headless launch via a tiny Node script (browser-specific args, retry without args)
          echo "🔁 Verifying browser can be launched (headless)"
          node -e "(async()=>{ try{ const pw=require('playwright'); const name='${{ matrix.browser }}'; const browser = pw[name]; const argsMap = { chromium: ['--no-sandbox'], firefox: ['--no-sandbox'], webkit: [] }; const args = argsMap[name] || [];
            // First attempt: launch with recommended args for this browser
            try {
              console.log('attempt-launch', name, 'args', JSON.stringify(args));
              const b = await browser.launch({ headless: true, args });
              await b.close();
              console.log('launch-ok', 'argsUsed', JSON.stringify(args));
              process.exit(0);
            } catch (err) {
              console.warn('launch-with-args-failed', err && err.message);
              if (args.length) {
                // Retry without args (some browsers reject unknown flags)
                console.log('retrying-without-args');
                const b2 = await browser.launch({ headless: true });
                await b2.close();
                console.log('launch-ok-no-args');
                process.exit(0);
              }
              throw err;
            }
          } catch (e) { console.error('launch-failed', e && e.message); process.exit(2); } })()" || (echo '❌ Browser launch verification failed' && exit 1)

          echo "✅ Playwright ${{ matrix.browser }} ready and verified"

      - name: Run E2E tests (Shard ${{ matrix.shard }}/${{ matrix.total-shards }})
        run: |
          echo "════════════════════════════════════════════════════════════"
          echo "E2E Test Shard ${{ matrix.shard }}/${{ matrix.total-shards }}"
          echo "Browser: ${{ matrix.browser }}"
          echo "Start Time: $(date -u +'%Y-%m-%dT%H:%M:%SZ')"
          echo ""
          echo "Reporter: HTML (per-shard reports)"
          echo "Output: playwright-report/ directory"
          echo "════════════════════════════════════════════════════════════"

          # Capture start time for performance budget tracking
          SHARD_START=$(date +%s)
          echo "SHARD_START=$SHARD_START" >> $GITHUB_ENV

          npx playwright test \
            --project=${{ matrix.browser }} \
            --shard=${{ matrix.shard }}/${{ matrix.total-shards }}

          # Capture end time for performance budget tracking
          SHARD_END=$(date +%s)
          echo "SHARD_END=$SHARD_END" >> $GITHUB_ENV

          SHARD_DURATION=$((SHARD_END - SHARD_START))

          echo ""
          echo "════════════════════════════════════════════════════════════"
          echo "Shard ${{ matrix.shard }} Complete | Duration: ${SHARD_DURATION}s"
          echo "════════════════════════════════════════════════════════════"
        env:
          # Test directly against Docker container (no coverage)
          PLAYWRIGHT_BASE_URL: http://localhost:8080
          CI: true
          TEST_WORKER_INDEX: ${{ matrix.shard }}

      - name: Verify shard performance budget
        if: always()
        run: |
          # Calculate shard execution time
          SHARD_DURATION=$((SHARD_END - SHARD_START))
          MAX_DURATION=900  # 15 minutes

          echo "📊 Performance Budget Check"
          echo "   Shard Duration: ${SHARD_DURATION}s"
          echo "   Budget Limit:   ${MAX_DURATION}s"
          echo "   Utilization:    $((SHARD_DURATION * 100 / MAX_DURATION))%"

          # Fail if shard exceeded performance budget
          if [[ $SHARD_DURATION -gt $MAX_DURATION ]]; then
            echo "::error::Shard exceeded performance budget: ${SHARD_DURATION}s > ${MAX_DURATION}s"
            echo "::error::This likely indicates feature flag polling regression or API bottleneck"
            echo "::error::Review test logs and consider optimizing wait helpers or API calls"
            exit 1
          fi

          echo "✅ Shard completed within budget: ${SHARD_DURATION}s"

      - name: Upload HTML report (per-shard)
        if: always()
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: playwright-report-${{ matrix.browser }}-shard-${{ matrix.shard }}
          path: playwright-report/
          retention-days: 14

      - name: Upload test traces on failure
        if: failure()
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: traces-${{ matrix.browser }}-shard-${{ matrix.shard }}
          path: test-results/**/*.zip
          retention-days: 7

      - name: Collect Docker logs on failure
        if: failure()
        run: |
          echo "📋 Container logs:"
          docker compose -f .docker/compose/docker-compose.playwright-ci.yml logs > docker-logs-${{ matrix.browser }}-shard-${{ matrix.shard }}.txt 2>&1

      - name: Upload Docker logs on failure
        if: failure()
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: docker-logs-${{ matrix.browser }}-shard-${{ matrix.shard }}
          path: docker-logs-${{ matrix.browser }}-shard-${{ matrix.shard }}.txt
          retention-days: 7

      - name: Cleanup
        if: always()
        run: |
          docker compose -f .docker/compose/docker-compose.playwright-ci.yml down -v 2>/dev/null || true

  # Summarize test results from all shards (no merging needed)
  test-summary:
    name: E2E Test Summary
    runs-on: ubuntu-latest
    needs: e2e-tests
    if: always()

    steps:
      - name: Generate job summary with per-shard links
        run: |
          echo "## 📊 E2E Test Results" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Per-Shard HTML Reports" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Each shard generates its own HTML report for easier debugging:" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "| Browser | Shards | HTML Reports | Traces (on failure) |" >> $GITHUB_STEP_SUMMARY
          echo "|---------|--------|--------------|---------------------|" >> $GITHUB_STEP_SUMMARY
          echo "| Chromium | 1-4 | \`playwright-report-chromium-shard-{1..4}\` | \`traces-chromium-shard-{1..4}\` |" >> $GITHUB_STEP_SUMMARY
          echo "| Firefox | 1-4 | \`playwright-report-firefox-shard-{1..4}\` | \`traces-firefox-shard-{1..4}\` |" >> $GITHUB_STEP_SUMMARY
          echo "| WebKit | 1-4 | \`playwright-report-webkit-shard-{1..4}\` | \`traces-webkit-shard-{1..4}\` |" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### How to View Reports" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "1. Download the shard HTML report artifact (zip file)" >> $GITHUB_STEP_SUMMARY
          echo "2. Extract and open \`index.html\` in your browser" >> $GITHUB_STEP_SUMMARY
          echo "3. Or run: \`npx playwright show-report path/to/extracted-folder\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Debugging Tips" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "- **Failed tests?** Download the shard report that failed. Each shard has a focused subset of tests." >> $GITHUB_STEP_SUMMARY
          echo "- **Traces**: Available in trace artifacts (only on failure)" >> $GITHUB_STEP_SUMMARY
          echo "- **Docker Logs**: Backend errors available in docker-logs-shard-N artifacts" >> $GITHUB_STEP_SUMMARY
          echo "- **Local repro**: \`npx playwright test --grep=\"test name\"\`" >> $GITHUB_STEP_SUMMARY

  # Comment on PR with results (only for workflow_run triggered by PR)
  comment-results:
    name: Comment Test Results
    runs-on: ubuntu-latest
    needs: [e2e-tests, test-summary]
    # Only comment if triggered by workflow_run from a pull_request event
    if: ${{ always() && github.event_name == 'workflow_run' && github.event.workflow_run.event == 'pull_request' }}
    permissions:
      pull-requests: write

    steps:
      - name: Determine test status
        id: status
        run: |
          if [[ "${{ needs.e2e-tests.result }}" == "success" ]]; then
            echo "emoji=✅" >> $GITHUB_OUTPUT
            echo "status=PASSED" >> $GITHUB_OUTPUT
            echo "message=All E2E tests passed!" >> $GITHUB_OUTPUT
          elif [[ "${{ needs.e2e-tests.result }}" == "failure" ]]; then
            echo "emoji=❌" >> $GITHUB_OUTPUT
            echo "status=FAILED" >> $GITHUB_OUTPUT
            echo "message=Some E2E tests failed. Check artifacts for per-shard reports." >> $GITHUB_OUTPUT
          else
            echo "emoji=⚠️" >> $GITHUB_OUTPUT
            echo "status=UNKNOWN" >> $GITHUB_OUTPUT
            echo "message=E2E tests did not complete successfully." >> $GITHUB_OUTPUT
          fi

      - name: Get PR number
        id: pr
        run: |
          PR_NUM=$(echo '${{ toJson(github.event.workflow_run.pull_requests) }}' | jq -r '.[0].number')
          if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then
            echo "⚠️ Could not determine PR number, skipping comment"
            echo "skip=true" >> $GITHUB_OUTPUT
          else
            echo "number=$PR_NUM" >> $GITHUB_OUTPUT
            echo "skip=false" >> $GITHUB_OUTPUT
          fi

      - name: Comment on PR
        if: steps.pr.outputs.skip != 'true'
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
        with:
          script: |
            const emoji = '${{ steps.status.outputs.emoji }}';
            const status = '${{ steps.status.outputs.status }}';
            const message = '${{ steps.status.outputs.message }}';
            const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
            const prNumber = parseInt('${{ steps.pr.outputs.number }}');

            const body = `## ${emoji} E2E Test Results: ${status}

            ${message}

            | Metric | Result |
            |--------|--------|
            | Browsers | Chromium, Firefox, WebKit |
            | Shards per Browser | 4 |
            | Total Jobs | 12 |
            | Status | ${status} |

            **Per-Shard HTML Reports** (easier to debug):
            - \`playwright-report-{browser}-shard-{1..4}\` (12 total artifacts)
            - Trace artifacts: \`traces-{browser}-shard-{N}\`

            [📊 View workflow run & download reports](${runUrl})

            ---
            <sub>🤖 This comment was automatically generated by the E2E Tests workflow.</sub>`;

            // Find existing comment
            const { data: comments } = await github.rest.issues.listComments({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: prNumber,
            });

            const botComment = comments.find(comment =>
              comment.user.type === 'Bot' &&
              comment.body.includes('E2E Test Results')
            );

            if (botComment) {
              await github.rest.issues.updateComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                comment_id: botComment.id,
                body: body
              });
            } else {
              await github.rest.issues.createComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: prNumber,
                body: body
              });
            }

  # Upload merged E2E coverage to Codecov
  upload-coverage:
    name: Upload E2E Coverage
    runs-on: ubuntu-latest
    needs: e2e-tests
    # Coverage is only produced when PLAYWRIGHT_COVERAGE=1 (requires Vite dev server)
    if: vars.PLAYWRIGHT_COVERAGE == '1'


    steps:
      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

      - name: Set up Node.js
        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6
        with:
          node-version: ${{ env.NODE_VERSION }}
          cache: 'npm'

      - name: Download all coverage artifacts
        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7
        with:
          pattern: e2e-coverage-*
          path: all-coverage
          merge-multiple: false

      - name: Merge LCOV coverage files
        run: |
          # Install lcov for merging
          sudo apt-get update && sudo apt-get install -y lcov

          # Create merged coverage directory
          mkdir -p coverage/e2e-merged

          # Find all lcov.info files and merge them
          LCOV_FILES=$(find all-coverage -name "lcov.info" -type f)

          if [[ -n "$LCOV_FILES" ]]; then
            # Build merge command
            MERGE_ARGS=""
            for file in $LCOV_FILES; do
              MERGE_ARGS="$MERGE_ARGS -a $file"
            done

            lcov $MERGE_ARGS -o coverage/e2e-merged/lcov.info
            echo "✅ Merged $(echo "$LCOV_FILES" | wc -w) coverage files"
          else
            echo "⚠️ No coverage files found to merge"
            exit 0
          fi

      - name: Upload E2E coverage to Codecov
        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          files: ./coverage/e2e-merged/lcov.info
          flags: e2e
          name: e2e-coverage
          fail_ci_if_error: false

      - name: Upload merged coverage artifact
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
        with:
          name: e2e-coverage-merged
          path: coverage/e2e-merged/
          retention-days: 30

  # Final status check - blocks merge if tests fail
  e2e-results:
    name: E2E Test Results
    runs-on: ubuntu-latest
    needs: e2e-tests
    if: always()

    steps:
      - name: Check test results
        run: |
          if [[ "${{ needs.e2e-tests.result }}" == "success" ]]; then
            echo "✅ All E2E tests passed"
            exit 0
          elif [[ "${{ needs.e2e-tests.result }}" == "skipped" ]]; then
            echo "⏭️ E2E tests were skipped"
            exit 0
          else
            echo "❌ E2E tests failed or were cancelled"
            echo "Result: ${{ needs.e2e-tests.result }}"
            exit 1
          fi