[fix] Security hardening (upstream PRs #5, #7): stack-protector-strong, format-security flags, NULL guards on ane_compile/fread/fopen, tokenize.py input validation

2026-03-03 14:22:03 +01:00 · 2026-03-03 14:22:03 +01:00 · 7524260ead
parent 4ae51e038b
commit 7524260ead
3 changed files with 76 additions and 6 deletions
--- a/training/Makefile
+++ b/training/Makefile
@ -1,5 +1,10 @@
 CC = xcrun clang
-CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc
+
+ANE_COMPAT = -Wno-deprecated-declarations
+SEC_FLAGS = -fstack-protector-strong -Wformat-security
+
+CFLAGS = -O2 -Wall $(ANE_COMPAT) -fobjc-arc $(SEC_FLAGS)
+CFLAGS_DEBUG = -O0 -g -Wall $(ANE_COMPAT) -fobjc-arc -fsanitize=address,undefined
 FRAMEWORKS = -framework Foundation -framework CoreML -framework IOSurface
 LDFLAGS = $(FRAMEWORKS) -ldl

@ -36,13 +41,31 @@ test_qos_sweep: test_qos_sweep.m
 test_ane_advanced: test_ane_advanced.m
 	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

+test_chaining: test_chaining.m
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
 probes: $(PROBES)

+data: tokenize
+	@bash download_data.sh
+
 tokenize:
 	python3 tokenize.py

+setup: data
+	@echo "=== Setup complete ==="
+	@echo "Data:  tinystories_data00.bin"
+	@echo "To train: make train_large && ./train_large"
+	@echo "Override paths: ANE_MODEL_PATH=... ANE_DATA_PATH=... ./train_large"
+
+verify-flags:
+	@echo "=== Active CFLAGS ==="
+	@echo "$(CFLAGS)"
+	@echo "=== Compiler version ==="
+	@xcrun clang --version
+
 clean:
 	rm -f train train_large train_large_ane $(PROBES) test_rmsnorm_bwd test_classifier

-.PHONY: clean tokenize probes
+.PHONY: clean tokenize probes verify-flags data setup

--- a/training/stories_config.h
+++ b/training/stories_config.h
@ -22,8 +22,19 @@
 #define SEQ 256
 #define NLAYERS 12
 #define VOCAB 32000
-#define ACCUM_STEPS 10
+#define DEFAULT_ACCUM_STEPS 10
 #define MAX_COMPILES 100
+static int g_accum_steps = DEFAULT_ACCUM_STEPS;
+
+static void init_accum_steps(void) {
+    const char *env = getenv("ANE_ACCUM_STEPS");
+    if (env && env[0]) {
+        int v = atoi(env);
+        if (v > 0 && v <= 10000) g_accum_steps = v;
+    }
+}
+
+#define ACCUM_STEPS g_accum_steps

 // Per compile: 5 weight-bearing kernels per layer + 1 classifier = 5*12+1 = 61
 // Plus 1 static (sdpaBwd2 per layer, no weights) = 12 more but those are weight-free
@ -111,15 +122,30 @@ typedef struct {

 // Globals
 static Class g_D, g_I, g_AR, g_AIO;
+static bool g_ane_init_done = false;   // Re-entry guard (ref: CRIT-01)
+static bool g_ane_ok_large = false;    // true only when all private classes loaded successfully
 static mach_timebase_info_data_t g_tb;
 static int g_compile_count = 0;

 static void ane_init(void) {
-    dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
+    if (g_ane_init_done) return;
+    g_ane_init_done = true;  // Set first to prevent re-entry (ref: CRIT-01)
+    void *handle = dlopen(
+        "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
+        RTLD_NOW);
+    if (!handle) {
+        fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
+        return;
+    }
    g_D  = NSClassFromString(@"_ANEInMemoryModelDescriptor");
    g_I  = NSClassFromString(@"_ANEInMemoryModel");
    g_AR = NSClassFromString(@"_ANERequest");
    g_AIO= NSClassFromString(@"_ANEIOSurfaceObject");
+    if (!g_D || !g_I || !g_AR || !g_AIO) {
+        fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
+        return;
+    }
+    g_ane_ok_large = true;
 }
 static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }

--- a/training/tokenize.py
+++ b/training/tokenize.py
@ -3,11 +3,13 @@
 Data format: flat uint16 token IDs (llama2.c BPE, 32K vocab).
 Source: ~/tiny_stories_data_pretokenized.zip"""

-import os, struct, zipfile
+import os, sys, struct, zipfile
 from pathlib import Path

 ZIP_PATH = os.path.expanduser('~/tiny_stories_data_pretokenized.zip')
 OUTPUT_PATH = str(Path(__file__).resolve().parent / 'tinystories_data00.bin')
+VOCAB_SIZE = 32000
+MAX_ZIP_SIZE = int(os.environ.get('MAX_ZIP_BYTES', str(10 * 1024 * 1024 * 1024)))

 def main():
    if os.path.exists(OUTPUT_PATH):
@ -15,8 +17,24 @@ def main():
        print(f"{OUTPUT_PATH} already exists ({n} tokens, {os.path.getsize(OUTPUT_PATH)/1e6:.1f} MB)")
        return

+    if not os.path.exists(ZIP_PATH):
+        print(f"ERROR: ZIP file not found: {ZIP_PATH}", file=sys.stderr)
+        print(f"  Expected: ~/tiny_stories_data_pretokenized.zip", file=sys.stderr)
+        sys.exit(1)
+
+    zip_size = os.path.getsize(ZIP_PATH)
+    if zip_size > MAX_ZIP_SIZE:
+        print(f"ERROR: ZIP file too large ({zip_size/1e9:.1f} GB > {MAX_ZIP_SIZE/1e9:.0f} GB limit).",
+              file=sys.stderr)
+        sys.exit(1)
+
    print(f"Extracting data00.bin from {ZIP_PATH}...")
    with zipfile.ZipFile(ZIP_PATH, 'r') as z:
+        names = z.namelist()
+        if 'data00.bin' not in names:
+            print(f"ERROR: data00.bin not found in ZIP. Contents: {names[:10]}", file=sys.stderr)
+            sys.exit(1)
+
        with z.open('data00.bin') as src, open(OUTPUT_PATH, 'wb') as dst:
            while True:
                chunk = src.read(1 << 20)
@ -27,10 +45,13 @@ def main():
    n = os.path.getsize(OUTPUT_PATH) // 2
    print(f"Written {OUTPUT_PATH} ({n} tokens, {os.path.getsize(OUTPUT_PATH)/1e6:.1f} MB)")

-    # Sanity check
    with open(OUTPUT_PATH, 'rb') as f:
        tokens = struct.unpack('<10H', f.read(20))
        print(f"First 10 tokens: {tokens}")
+        oob = [t for t in tokens if t >= VOCAB_SIZE]
+        if oob:
+            print(f"WARNING: out-of-vocab tokens found: {oob} (vocab_size={VOCAB_SIZE})",
+                  file=sys.stderr)

 if __name__ == '__main__':
    main()