From 7524260ead64ae1f50f69db0b2a746849e624568 Mon Sep 17 00:00:00 2001
From: Erik Bray <erikbray93@gmail.com>
Date: Tue, 3 Mar 2026 14:22:03 +0100
Subject: [PATCH] [fix] Security hardening (upstream PRs #5, #7):
 stack-protector-strong, format-security flags, NULL guards on
 ane_compile/fread/fopen, tokenize.py input validation

---
 training/Makefile         | 27 +++++++++++++++++++++++++--
 training/stories_config.h | 30 ++++++++++++++++++++++++++++--
 training/tokenize.py      | 25 +++++++++++++++++++++++--
 3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/training/Makefile b/training/Makefile
index 7f16c1a..b726d22 100644
--- a/training/Makefile
+++ b/training/Makefile
@@ -1,5 +1,10 @@
 CC = xcrun clang
-CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc
+
+ANE_COMPAT = -Wno-deprecated-declarations
+SEC_FLAGS = -fstack-protector-strong -Wformat-security
+
+CFLAGS = -O2 -Wall $(ANE_COMPAT) -fobjc-arc $(SEC_FLAGS)
+CFLAGS_DEBUG = -O0 -g -Wall $(ANE_COMPAT) -fobjc-arc -fsanitize=address,undefined
 FRAMEWORKS = -framework Foundation -framework CoreML -framework IOSurface
 LDFLAGS = $(FRAMEWORKS) -ldl
 
@@ -36,13 +41,31 @@ test_qos_sweep: test_qos_sweep.m
 test_ane_advanced: test_ane_advanced.m
 	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
 
+test_chaining: test_chaining.m
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
 probes: $(PROBES)
 
+data: tokenize
+	@bash download_data.sh
+
 tokenize:
 	python3 tokenize.py
 
+setup: data
+	@echo "=== Setup complete ==="
+	@echo "Data:  tinystories_data00.bin"
+	@echo "To train: make train_large && ./train_large"
+	@echo "Override paths: ANE_MODEL_PATH=... ANE_DATA_PATH=... ./train_large"
+
+verify-flags:
+	@echo "=== Active CFLAGS ==="
+	@echo "$(CFLAGS)"
+	@echo "=== Compiler version ==="
+	@xcrun clang --version
+
 clean:
 	rm -f train train_large train_large_ane $(PROBES) test_rmsnorm_bwd test_classifier
 
-.PHONY: clean tokenize probes
+.PHONY: clean tokenize probes verify-flags data setup
 
diff --git a/training/stories_config.h b/training/stories_config.h
index f967974..f4c0996 100644
--- a/training/stories_config.h
+++ b/training/stories_config.h
@@ -22,8 +22,19 @@
 #define SEQ 256
 #define NLAYERS 12
 #define VOCAB 32000
-#define ACCUM_STEPS 10
+#define DEFAULT_ACCUM_STEPS 10
 #define MAX_COMPILES 100
+static int g_accum_steps = DEFAULT_ACCUM_STEPS;
+
+static void init_accum_steps(void) {
+    const char *env = getenv("ANE_ACCUM_STEPS");
+    if (env && env[0]) {
+        int v = atoi(env);
+        if (v > 0 && v <= 10000) g_accum_steps = v;
+    }
+}
+
+#define ACCUM_STEPS g_accum_steps
 
 // Per compile: 5 weight-bearing kernels per layer + 1 classifier = 5*12+1 = 61
 // Plus 1 static (sdpaBwd2 per layer, no weights) = 12 more but those are weight-free
@@ -111,15 +122,30 @@ typedef struct {
 
 // Globals
 static Class g_D, g_I, g_AR, g_AIO;
+static bool g_ane_init_done = false;   // Re-entry guard (ref: CRIT-01)
+static bool g_ane_ok_large = false;    // true only when all private classes loaded successfully
 static mach_timebase_info_data_t g_tb;
 static int g_compile_count = 0;
 
 static void ane_init(void) {
-    dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
+    if (g_ane_init_done) return;
+    g_ane_init_done = true;  // Set first to prevent re-entry (ref: CRIT-01)
+    void *handle = dlopen(
+        "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
+        RTLD_NOW);
+    if (!handle) {
+        fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
+        return;
+    }
     g_D  = NSClassFromString(@"_ANEInMemoryModelDescriptor");
     g_I  = NSClassFromString(@"_ANEInMemoryModel");
     g_AR = NSClassFromString(@"_ANERequest");
     g_AIO= NSClassFromString(@"_ANEIOSurfaceObject");
+    if (!g_D || !g_I || !g_AR || !g_AIO) {
+        fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
+        return;
+    }
+    g_ane_ok_large = true;
 }
 static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
 
diff --git a/training/tokenize.py b/training/tokenize.py
index 219cb21..815d740 100644
--- a/training/tokenize.py
+++ b/training/tokenize.py
@@ -3,11 +3,13 @@
 Data format: flat uint16 token IDs (llama2.c BPE, 32K vocab).
 Source: ~/tiny_stories_data_pretokenized.zip"""
 
-import os, struct, zipfile
+import os, sys, struct, zipfile
 from pathlib import Path
 
 ZIP_PATH = os.path.expanduser('~/tiny_stories_data_pretokenized.zip')
 OUTPUT_PATH = str(Path(__file__).resolve().parent / 'tinystories_data00.bin')
+VOCAB_SIZE = 32000
+MAX_ZIP_SIZE = int(os.environ.get('MAX_ZIP_BYTES', str(10 * 1024 * 1024 * 1024)))
 
 def main():
     if os.path.exists(OUTPUT_PATH):
@@ -15,8 +17,24 @@ def main():
         print(f"{OUTPUT_PATH} already exists ({n} tokens, {os.path.getsize(OUTPUT_PATH)/1e6:.1f} MB)")
         return
 
+    if not os.path.exists(ZIP_PATH):
+        print(f"ERROR: ZIP file not found: {ZIP_PATH}", file=sys.stderr)
+        print(f"  Expected: ~/tiny_stories_data_pretokenized.zip", file=sys.stderr)
+        sys.exit(1)
+
+    zip_size = os.path.getsize(ZIP_PATH)
+    if zip_size > MAX_ZIP_SIZE:
+        print(f"ERROR: ZIP file too large ({zip_size/1e9:.1f} GB > {MAX_ZIP_SIZE/1e9:.0f} GB limit).",
+              file=sys.stderr)
+        sys.exit(1)
+
     print(f"Extracting data00.bin from {ZIP_PATH}...")
     with zipfile.ZipFile(ZIP_PATH, 'r') as z:
+        names = z.namelist()
+        if 'data00.bin' not in names:
+            print(f"ERROR: data00.bin not found in ZIP. Contents: {names[:10]}", file=sys.stderr)
+            sys.exit(1)
+
         with z.open('data00.bin') as src, open(OUTPUT_PATH, 'wb') as dst:
             while True:
                 chunk = src.read(1 << 20)
@@ -27,10 +45,13 @@ def main():
     n = os.path.getsize(OUTPUT_PATH) // 2
     print(f"Written {OUTPUT_PATH} ({n} tokens, {os.path.getsize(OUTPUT_PATH)/1e6:.1f} MB)")
 
-    # Sanity check
     with open(OUTPUT_PATH, 'rb') as f:
         tokens = struct.unpack('<10H', f.read(20))
         print(f"First 10 tokens: {tokens}")
+        oob = [t for t in tokens if t >= VOCAB_SIZE]
+        if oob:
+            print(f"WARNING: out-of-vocab tokens found: {oob} (vocab_size={VOCAB_SIZE})",
+                  file=sys.stderr)
 
 if __name__ == '__main__':
     main()