fix(witness): redact secrets from bundled verify.py output (SECURITY)

The Python proof verifier (archive/v1/data/proof/verify.py) imports the project settings, which read the user's .env file. When pydantic validation fails (e.g., extra fields not in the Settings schema), the error dump includes the offending input_value — which means real Docker tokens, GitHub PATs, API keys, etc. were being echoed to stdout and captured into the bundled verification-output.log. Confirmed on this branch's first bundle generation: dckr_pat_, tok_... cluster token, and other long opaque strings leaked into witness-bundle-ADR028-<commit>/proof/verification-output.log inside the .tar.gz. Bundle + tarball nuked from disk before any push. Added: - scripts/redact-secrets.py — stdin->stdout filter with patterns for common token prefixes (dckr_pat_, tok_, sk-, ghp_, gho_, github_pat_, AKIA, hf_, xoxb-, xoxp-, Bearer), `field=secret` assignments, long opaque alphanumeric strings (40+ chars), and long hex runs (20+ chars which catch token suffixes after `...` truncation). - generate-witness-bundle.sh now pipes verify.py stderr through that filter before tee-ing into the bundled log. - Also fixed pre-existing stale `v1/` paths in the witness script (correct path is `archive/v1/`). The user must rotate the leaked credentials regardless (the bundle was never pushed, but they appeared in this local Claude session log). Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-22 21:04:57 -04:00 · 2026-05-22 21:04:57 -04:00 · f8a2e36958
parent 4c39e28bd0
commit f8a2e36958
2 changed files with 68 additions and 6 deletions
--- a/scripts/generate-witness-bundle.sh
+++ b/scripts/generate-witness-bundle.sh
@ -39,18 +39,18 @@ cp "$REPO_ROOT/docs/adr/ADR-028-esp32-capability-audit.md" "$BUNDLE_DIR/"
 # ---------------------------------------------------------------
 echo "[2/7] Copying proof system..."
 mkdir -p "$BUNDLE_DIR/proof"
-cp "$REPO_ROOT/v1/data/proof/verify.py" "$BUNDLE_DIR/proof/"
-cp "$REPO_ROOT/v1/data/proof/expected_features.sha256" "$BUNDLE_DIR/proof/"
-cp "$REPO_ROOT/v1/data/proof/generate_reference_signal.py" "$BUNDLE_DIR/proof/"
+cp "$REPO_ROOT/archive/v1/data/proof/verify.py" "$BUNDLE_DIR/proof/"
+cp "$REPO_ROOT/archive/v1/data/proof/expected_features.sha256" "$BUNDLE_DIR/proof/"
+cp "$REPO_ROOT/archive/v1/data/proof/generate_reference_signal.py" "$BUNDLE_DIR/proof/"
 # Reference signal is large (~10 MB) — include metadata only
 python3 -c "
 import json, os
-with open('$REPO_ROOT/v1/data/proof/sample_csi_data.json') as f:
+with open('$REPO_ROOT/archive/v1/data/proof/sample_csi_data.json') as f:
    d = json.load(f)
 meta = {k: v for k, v in d.items() if k != 'frames'}
 meta['frame_count'] = len(d['frames'])
 meta['first_frame_keys'] = list(d['frames'][0].keys())
-meta['file_size_bytes'] = os.path.getsize('$REPO_ROOT/v1/data/proof/sample_csi_data.json')
+meta['file_size_bytes'] = os.path.getsize('$REPO_ROOT/archive/v1/data/proof/sample_csi_data.json')
 with open('$BUNDLE_DIR/proof/reference_signal_metadata.json', 'w') as f:
    json.dump(meta, f, indent=2)
 " 2>/dev/null && echo "  Reference signal metadata extracted." || echo "  (Python not available — metadata skipped)"
@ -73,7 +73,13 @@ cd "$REPO_ROOT"
 # 4. Run Python proof verification
 # ---------------------------------------------------------------
 echo "[4/7] Running Python proof verification..."
-python3 "$REPO_ROOT/v1/data/proof/verify.py" 2>&1 | tee "$BUNDLE_DIR/proof/verification-output.log" | tail -5 || true
+# SECURITY: the verify.py emits a Pydantic schema dump on validation failure
+# that includes the user's .env contents (Docker tokens, API keys, etc.).
+# Redact any line matching common secret-shaped patterns before writing the
+# bundled log. See ADR-110 wave 5 incident note.
+python3 "$REPO_ROOT/archive/v1/data/proof/verify.py" 2>&1 | \
+  python3 "$REPO_ROOT/scripts/redact-secrets.py" \
+  | tee "$BUNDLE_DIR/proof/verification-output.log" | tail -5 || true

 # ---------------------------------------------------------------
 # 5. Firmware manifest
--- a/scripts/redact-secrets.py
+++ b/scripts/redact-secrets.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+"""Pipe stdin through a secret-redaction filter to stdout.
+
+Used by generate-witness-bundle.sh to strip credentials from log files
+before they enter the witness bundle. Pure stdlib so it runs anywhere.
+
+Usage:
+    some-command 2>&1 | python3 scripts/redact-secrets.py > clean.log
+"""
+import re
+import sys
+
+
+# Token prefix patterns — common SaaS / VCS API token shapes.
+PREFIX_PATTERNS = [
+    (re.compile(r'(dckr_pat_|tok_|sk-|ghp_|gho_|github_pat_|AKIA|hf_|xoxb-|xoxp-|Bearer\s+)[A-Za-z0-9_\-\.]+',
+                re.IGNORECASE), r'\1[REDACTED]'),
+]
+
+# Long opaque strings (40+ alphanumeric / underscore / dash chars).
+LONG_OPAQUE = re.compile(r'[A-Za-z0-9_\-]{40,}')
+
+# Long hex runs (20+ hex chars — covers token suffixes after `...`).
+LONG_HEX = re.compile(r'[a-fA-F0-9]{20,}')
+
+# `field=VALUE` style assignment where field name suggests a secret.
+SECRET_ASSIGNMENT = re.compile(
+    r'(token|password|secret|api_key|access_key|private_key|psk|bearer)'
+    r'(["\'\s:=]+)["\']?([A-Za-z0-9._\-/+]{12,})["\']?',
+    re.IGNORECASE
+)
+
+
+def redact_line(line: str) -> str:
+    for pat, repl in PREFIX_PATTERNS:
+        line = pat.sub(repl, line)
+    line = SECRET_ASSIGNMENT.sub(lambda m: f'{m.group(1)}={"[REDACTED]"}', line)
+    line = LONG_OPAQUE.sub('[REDACTED-OPAQUE]', line)
+    line = LONG_HEX.sub('[REDACTED-HEX]', line)
+    return line
+
+
+def main() -> int:
+    for raw in sys.stdin.buffer:
+        try:
+            text = raw.decode('utf-8', errors='replace')
+        except Exception:
+            sys.stdout.buffer.write(b'[REDACTED-UNDECODABLE]\n')
+            continue
+        sys.stdout.write(redact_line(text))
+        sys.stdout.flush()
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())