From 9359bf5d043a2573495d93856c5f3824f6aa82b1 Mon Sep 17 00:00:00 2001 From: ruv Date: Sat, 30 May 2026 17:01:10 -0400 Subject: [PATCH] =?UTF-8?q?feat(aether-arena):=20HF=20Space=20(Gradio)=20v?= =?UTF-8?q?0=20=E2=80=94=20deployed=20to=20ruvnet/aether-arena=20(M6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Public face of the benchmark: empty-board leaderboard from the witness ledger, chain-integrity display, submit/verify/about tabs. Presentation layer per ADR-149 ยง2.2 (heavy scoring stays in the pinned RuView harness / CI). Live: https://huggingface.co/spaces/ruvnet/aether-arena Co-Authored-By: claude-flow --- aether-arena/space/README.md | 36 ++++++++ aether-arena/space/app.py | 129 ++++++++++++++++++++++++++++ aether-arena/space/ledger.jsonl | 1 + aether-arena/space/requirements.txt | 1 + 4 files changed, 167 insertions(+) create mode 100644 aether-arena/space/README.md create mode 100644 aether-arena/space/app.py create mode 100644 aether-arena/space/ledger.jsonl create mode 100644 aether-arena/space/requirements.txt diff --git a/aether-arena/space/README.md b/aether-arena/space/README.md new file mode 100644 index 00000000..2367a9ef --- /dev/null +++ b/aether-arena/space/README.md @@ -0,0 +1,36 @@ +--- +title: AetherArena โ€” Spatial-Intelligence Benchmark +emoji: ๐Ÿ“ก +colorFrom: indigo +colorTo: purple +sdk: gradio +sdk_version: 4.44.1 +app_file: app.py +pinned: true +license: cc-by-nc-4.0 +tags: + - benchmark + - leaderboard + - wifi-sensing + - spatial-intelligence + - pose-estimation +--- + +# AetherArena ("AA") โ€” The Official Spatial-Intelligence Benchmark + +> Public leaderboard. Private evaluation split. Open scorer. Signed results. + +The field's standard yardstick for camera-free **spatial intelligence** (pose, presence, +occupancy, tracking, vitals) from RF/WiFi and, over time, mmWave / UWB / multimodal. + +- **Project-agnostic** โ€” any team, framework, or modality enters; RuView donated the seed + scorer and is scored like everyone else. +- **Benchmark-first** โ€” the board starts empty; every row is a real scoring-pipeline + **witness** (`inputs_sha256` + `proof_sha256` + `harness_version`) in an append-only, + hash-chained, tamper-evident ledger. +- **Reproducible** โ€” the scorer is open; reproduce any proof hash + repeatability locally. + +Spec: [ADR-149](https://github.com/ruvnet/RuView/blob/main/docs/adr/ADR-149-public-community-leaderboard-huggingface.md). +Source + open scorer: https://github.com/ruvnet/RuView/tree/main/aether-arena + +Non-commercial (CC BY-NC 4.0): the v0 eval split derives from MM-Fi (CC BY-NC); AA is operated non-commercially. diff --git a/aether-arena/space/app.py b/aether-arena/space/app.py new file mode 100644 index 00000000..bab8c9f8 --- /dev/null +++ b/aether-arena/space/app.py @@ -0,0 +1,129 @@ +"""AetherArena ("AA") โ€” The Official Spatial-Intelligence Benchmark. + +Hugging Face Space (Gradio) โ€” the public face of the benchmark (ADR-149). +This Space is the presentation + submission layer; the heavy scoring runs in the +pinned RuView harness (CI / scorer container), and results land in the append-only, +hash-chained **witness ledger** shown here. + +Benchmark-first: the board starts EMPTY. No seeded or hand-entered numbers โ€” every +row is a real scoring-pipeline witness (inputs_sha256 + proof_sha256 + harness_version). +""" +import hashlib +import json +from pathlib import Path + +import gradio as gr + +LEDGER = Path(__file__).parent / "ledger.jsonl" +GENESIS_PREV = "0" * 64 + + +def _rows(): + if not LEDGER.exists(): + return [] + return [json.loads(l) for l in LEDGER.read_text().splitlines() if l.strip()] + + +def _canon(row: dict) -> bytes: + body = {k: row[k] for k in sorted(row) if k != "row_hash"} + return json.dumps(body, separators=(",", ":"), sort_keys=True).encode() + + +def verify_chain(): + rows, prev = _rows(), GENESIS_PREV + for i, r in enumerate(rows): + if r.get("prev_hash") != prev or r.get("row_hash") != hashlib.sha256(_canon(r)).hexdigest(): + return f"โŒ Ledger chain BROKEN at row {i} โ€” tampering detected." + prev = r["row_hash"] + return f"โœ… Witness ledger chain intact โ€” {len(rows)} row(s), append-only." + + +def leaderboard(category: str): + results = [r for r in _rows() if r.get("kind") == "result" and (category == "all" or r.get("category") == category)] + if not results: + return [["โ€” no entries yet โ€”", "be the first", "", "", ""]] + results.sort(key=lambda r: r.get("pck20_all") or r.get("pck_all") or 0, reverse=True) + return [[ + r.get("submitter", "?"), + r.get("model_ref", "?"), + r.get("tier", "?"), + f"{(r.get('pck20_all') or r.get('pck_all') or 0):.4f}", + (r.get("proof_sha256") or "")[:16], + ] for r in results] + + +FOUR_PART = "### Public leaderboard. Private evaluation split. Open scorer. Signed results." + +ABOUT = """ +**AetherArena** is the official, project-agnostic **Spatial-Intelligence Benchmark** โ€” +camera-free pose, presence, occupancy, tracking, and vitals from RF/WiFi (and, over +time, mmWave / UWB / radar / multimodal). It is **not** a single-vendor board: any +team, framework, or modality enters, and every entrant โ€” including the RuView baseline +that donated the seed scorer โ€” is scored by the identical, open, pinned harness. + +The scorer reuses RuView's released `wifi-densepose-train` acceptance harness +(`ruview_metrics` + ablation). You submit a **model, not predictions**; it is scored +against a **private** MM-Fi held-out split; one **witness** row (inputs hash + proof +hash + harness version) is appended to a **hash-chained, tamper-evident ledger**. + +Spec: ADR-149. v0 ranks **pose, presence, edge-latency, determinism**. Tracking & +vitals activate when their ground truth lands; **privacy-leakage** is gated until the +membership-inference attacker ships. Source + the open scorer: +https://github.com/ruvnet/RuView/tree/main/aether-arena +""" + +SUBMIT = """ +### Submit a model + +1. Write a manifest โ€” [`schema/aa-submission.toml`](https://github.com/ruvnet/RuView/blob/main/aether-arena/schema/aa-submission.toml): + declare your model ref, category, the ADR-145 feature set (F0 CSI โ€ฆ F3 BFLD), and the tensor I/O contract. +2. Provide your model artifact (`.safetensors` / `.rvf` / LoRA adapter). +3. It moves through `submitted โ†’ validated โ†’ quarantined โ†’ smoke_scored โ†’ full_scored โ†’ published`, + scored in a no-network, read-only sandbox against the private split. +4. Your signed witness row appears on the leaderboard. + +**You submit a model, never predictions** โ€” predictions on data you hold prove nothing. +""" + +VERIFY = """ +### Verify it's fair (you don't have to trust us) + +The scorer is open and reproducible. Reproduce the determinism proof + repeatability locally: + +```bash +git clone https://github.com/ruvnet/RuView && cd RuView/v2 +# determinism gate (same as CI): +cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features +# repeatability โ€” N runs, one identical proof hash: +cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16 +# verify the append-only witness ledger chain: +cd ../aether-arena/ledger && python3 ledger_tools.py verify +``` + +A stranger must be able to: submit โ†’ get a deterministic score โ†’ see the signed row โ†’ +rerun the scorer locally โ†’ understand why the rank is fair. That is the launch gate (ADR-149 ยง7). +""" + +with gr.Blocks(title="AetherArena โ€” Spatial-Intelligence Benchmark") as demo: + gr.Markdown("# ๐Ÿ“ก AetherArena (AA)\n## The Official Spatial-Intelligence Benchmark") + gr.Markdown(FOUR_PART) + chain = gr.Markdown(verify_chain()) + + with gr.Tab("๐Ÿ† Leaderboard"): + cat = gr.Dropdown(["all", "pose", "presence"], value="all", label="Category") + tbl = gr.Dataframe( + headers=["Submitter", "Model", "Tier", "Score", "Proof (sha256โ€ฆ)"], + value=leaderboard("all"), interactive=False, wrap=True, + ) + cat.change(leaderboard, cat, tbl) + gr.Markdown("*Benchmark-first: the board starts empty. Every row is a real harness witness โ€” no seeded numbers.*") + + with gr.Tab("๐Ÿ“ค Submit"): + gr.Markdown(SUBMIT) + with gr.Tab("๐Ÿ”ฌ Verify"): + gr.Markdown(VERIFY) + with gr.Tab("โ„น๏ธ About"): + gr.Markdown(ABOUT) + +if __name__ == "__main__": + demo.launch() diff --git a/aether-arena/space/ledger.jsonl b/aether-arena/space/ledger.jsonl new file mode 100644 index 00000000..ca253a68 --- /dev/null +++ b/aether-arena/space/ledger.jsonl @@ -0,0 +1 @@ +{"benchmark": "AetherArena", "created": "2026-05-30", "kind": "genesis", "note": "Official Spatial-Intelligence Benchmark \u2014 append-only signed ledger. Entries are real harness scores only; no seeded numbers.", "prev_hash": "0000000000000000000000000000000000000000000000000000000000000000", "row_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "seq": 0, "spec": "ADR-149"} diff --git a/aether-arena/space/requirements.txt b/aether-arena/space/requirements.txt new file mode 100644 index 00000000..93846839 --- /dev/null +++ b/aether-arena/space/requirements.txt @@ -0,0 +1 @@ +gradio==4.44.1