name: AetherArena harness gate (ADR-149)

# Runs the AetherArena scoring harness as a PR build gate. Every PR that touches
# the scorer, the metrics, or the benchmark scaffold must keep the deterministic
# score hash stable (ADR-149 §2.5 determinism_gate). If the scoring maths changes,
# the hash moves and this gate fails until `expected_score.sha256` is regenerated
# and reviewed — so scorer drift can never land silently.
#
# This is the "a PR that runs the harness as part of the build process" requirement.

on:
  pull_request:
    paths:
      - 'v2/crates/wifi-densepose-train/src/ruview_metrics.rs'
      - 'v2/crates/wifi-densepose-train/src/ablation.rs'
      - 'v2/crates/wifi-densepose-train/src/bin/aa_score_runner.rs'
      - 'aether-arena/**'
      - '.github/workflows/aether-arena-harness.yml'
  push:
    branches: ['feat/adr-149-aether-arena']
  workflow_dispatch:

permissions:
  contents: read
  pull-requests: write

jobs:
  harness-gate:
    name: Run AA scorer harness (determinism gate)
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: v2
    steps:
      - uses: actions/checkout@v6

      - name: Install Rust toolchain
        run: rustup show && rustc --version

      - name: Cache cargo
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: aa-harness-${{ runner.os }}-${{ hashFiles('v2/Cargo.lock') }}

      # 1. Build the pure-Rust scorer (no torch / no GPU → fast PR gate).
      - name: Build AA score runner
        run: cargo build -p wifi-densepose-train --bin aa_score_runner --no-default-features

      # 2. Determinism gate: the committed expected hash must still match. A
      #    non-zero exit here fails the PR.
      - name: Run determinism gate
        run: cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features

      # 3. Repeatability analysis (witness chain): the harness must produce one
      #    identical proof hash across many runs — any nondeterminism fails here.
      - name: Repeatability analysis (16 runs)
        run: cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16

      # 4. Real-scoring smoke: score a sample prediction against the public smoke
      #    split, exercising the actual model-scoring path (not just the fixture).
      - name: Real-scoring smoke test
        run: |
          cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- \
            --split ../aether-arena/fixtures/smoke_split.json \
            --pred  ../aether-arena/fixtures/smoke_pred.json --json

      # 5. Witness ledger chain integrity: the append-only results ledger must
      #    verify (every prev_hash link + row_hash intact = no silent edits).
      - name: Verify witness ledger chain
        working-directory: aether-arena/ledger
        run: python3 ledger_tools.py verify

      # 6. Emit the witness row + repeatability into the PR run summary.
      - name: Witness row → job summary
        if: always()
        run: |
          ROW=$(cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --json)
          REP=$(cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16)
          {
            echo "## AetherArena harness gate (witness chain)"
            echo ""
            echo "Deterministic witness (ADR-149 §2.2 / proof + repeatability):"
            echo '```json'
            echo "$ROW"
            echo "$REP"
            echo '```'
            echo ""
            echo "If the determinism gate failed, the scoring maths changed: regenerate with"
            echo '`cargo run -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --generate-hash > aether-arena/fixtures/expected_score.sha256` and review the diff.'
          } >> "$GITHUB_STEP_SUMMARY"