diff --git a/scripts/generate-witness-bundle.sh b/scripts/generate-witness-bundle.sh index 9697f437..6ebc7d7f 100644 --- a/scripts/generate-witness-bundle.sh +++ b/scripts/generate-witness-bundle.sh @@ -39,18 +39,18 @@ cp "$REPO_ROOT/docs/adr/ADR-028-esp32-capability-audit.md" "$BUNDLE_DIR/" # --------------------------------------------------------------- echo "[2/7] Copying proof system..." mkdir -p "$BUNDLE_DIR/proof" -cp "$REPO_ROOT/v1/data/proof/verify.py" "$BUNDLE_DIR/proof/" -cp "$REPO_ROOT/v1/data/proof/expected_features.sha256" "$BUNDLE_DIR/proof/" -cp "$REPO_ROOT/v1/data/proof/generate_reference_signal.py" "$BUNDLE_DIR/proof/" +cp "$REPO_ROOT/archive/v1/data/proof/verify.py" "$BUNDLE_DIR/proof/" +cp "$REPO_ROOT/archive/v1/data/proof/expected_features.sha256" "$BUNDLE_DIR/proof/" +cp "$REPO_ROOT/archive/v1/data/proof/generate_reference_signal.py" "$BUNDLE_DIR/proof/" # Reference signal is large (~10 MB) — include metadata only python3 -c " import json, os -with open('$REPO_ROOT/v1/data/proof/sample_csi_data.json') as f: +with open('$REPO_ROOT/archive/v1/data/proof/sample_csi_data.json') as f: d = json.load(f) meta = {k: v for k, v in d.items() if k != 'frames'} meta['frame_count'] = len(d['frames']) meta['first_frame_keys'] = list(d['frames'][0].keys()) -meta['file_size_bytes'] = os.path.getsize('$REPO_ROOT/v1/data/proof/sample_csi_data.json') +meta['file_size_bytes'] = os.path.getsize('$REPO_ROOT/archive/v1/data/proof/sample_csi_data.json') with open('$BUNDLE_DIR/proof/reference_signal_metadata.json', 'w') as f: json.dump(meta, f, indent=2) " 2>/dev/null && echo " Reference signal metadata extracted." || echo " (Python not available — metadata skipped)" @@ -73,7 +73,13 @@ cd "$REPO_ROOT" # 4. Run Python proof verification # --------------------------------------------------------------- echo "[4/7] Running Python proof verification..." -python3 "$REPO_ROOT/v1/data/proof/verify.py" 2>&1 | tee "$BUNDLE_DIR/proof/verification-output.log" | tail -5 || true +# SECURITY: the verify.py emits a Pydantic schema dump on validation failure +# that includes the user's .env contents (Docker tokens, API keys, etc.). +# Redact any line matching common secret-shaped patterns before writing the +# bundled log. See ADR-110 wave 5 incident note. +python3 "$REPO_ROOT/archive/v1/data/proof/verify.py" 2>&1 | \ + python3 "$REPO_ROOT/scripts/redact-secrets.py" \ + | tee "$BUNDLE_DIR/proof/verification-output.log" | tail -5 || true # --------------------------------------------------------------- # 5. Firmware manifest diff --git a/scripts/redact-secrets.py b/scripts/redact-secrets.py new file mode 100644 index 00000000..b2fb6705 --- /dev/null +++ b/scripts/redact-secrets.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Pipe stdin through a secret-redaction filter to stdout. + +Used by generate-witness-bundle.sh to strip credentials from log files +before they enter the witness bundle. Pure stdlib so it runs anywhere. + +Usage: + some-command 2>&1 | python3 scripts/redact-secrets.py > clean.log +""" +import re +import sys + + +# Token prefix patterns — common SaaS / VCS API token shapes. +PREFIX_PATTERNS = [ + (re.compile(r'(dckr_pat_|tok_|sk-|ghp_|gho_|github_pat_|AKIA|hf_|xoxb-|xoxp-|Bearer\s+)[A-Za-z0-9_\-\.]+', + re.IGNORECASE), r'\1[REDACTED]'), +] + +# Long opaque strings (40+ alphanumeric / underscore / dash chars). +LONG_OPAQUE = re.compile(r'[A-Za-z0-9_\-]{40,}') + +# Long hex runs (20+ hex chars — covers token suffixes after `...`). +LONG_HEX = re.compile(r'[a-fA-F0-9]{20,}') + +# `field=VALUE` style assignment where field name suggests a secret. +SECRET_ASSIGNMENT = re.compile( + r'(token|password|secret|api_key|access_key|private_key|psk|bearer)' + r'(["\'\s:=]+)["\']?([A-Za-z0-9._\-/+]{12,})["\']?', + re.IGNORECASE +) + + +def redact_line(line: str) -> str: + for pat, repl in PREFIX_PATTERNS: + line = pat.sub(repl, line) + line = SECRET_ASSIGNMENT.sub(lambda m: f'{m.group(1)}={"[REDACTED]"}', line) + line = LONG_OPAQUE.sub('[REDACTED-OPAQUE]', line) + line = LONG_HEX.sub('[REDACTED-HEX]', line) + return line + + +def main() -> int: + for raw in sys.stdin.buffer: + try: + text = raw.decode('utf-8', errors='replace') + except Exception: + sys.stdout.buffer.write(b'[REDACTED-UNDECODABLE]\n') + continue + sys.stdout.write(redact_line(text)) + sys.stdout.flush() + return 0 + + +if __name__ == '__main__': + sys.exit(main())