From 7524260ead64ae1f50f69db0b2a746849e624568 Mon Sep 17 00:00:00 2001 From: Erik Bray Date: Tue, 3 Mar 2026 14:22:03 +0100 Subject: [PATCH] [fix] Security hardening (upstream PRs #5, #7): stack-protector-strong, format-security flags, NULL guards on ane_compile/fread/fopen, tokenize.py input validation --- training/Makefile | 27 +++++++++++++++++++++++++-- training/stories_config.h | 30 ++++++++++++++++++++++++++++-- training/tokenize.py | 25 +++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/training/Makefile b/training/Makefile index 7f16c1a..b726d22 100644 --- a/training/Makefile +++ b/training/Makefile @@ -1,5 +1,10 @@ CC = xcrun clang -CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc + +ANE_COMPAT = -Wno-deprecated-declarations +SEC_FLAGS = -fstack-protector-strong -Wformat-security + +CFLAGS = -O2 -Wall $(ANE_COMPAT) -fobjc-arc $(SEC_FLAGS) +CFLAGS_DEBUG = -O0 -g -Wall $(ANE_COMPAT) -fobjc-arc -fsanitize=address,undefined FRAMEWORKS = -framework Foundation -framework CoreML -framework IOSurface LDFLAGS = $(FRAMEWORKS) -ldl @@ -36,13 +41,31 @@ test_qos_sweep: test_qos_sweep.m test_ane_advanced: test_ane_advanced.m $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) +test_chaining: test_chaining.m + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + probes: $(PROBES) +data: tokenize + @bash download_data.sh + tokenize: python3 tokenize.py +setup: data + @echo "=== Setup complete ===" + @echo "Data: tinystories_data00.bin" + @echo "To train: make train_large && ./train_large" + @echo "Override paths: ANE_MODEL_PATH=... ANE_DATA_PATH=... ./train_large" + +verify-flags: + @echo "=== Active CFLAGS ===" + @echo "$(CFLAGS)" + @echo "=== Compiler version ===" + @xcrun clang --version + clean: rm -f train train_large train_large_ane $(PROBES) test_rmsnorm_bwd test_classifier -.PHONY: clean tokenize probes +.PHONY: clean tokenize probes verify-flags data setup diff --git a/training/stories_config.h b/training/stories_config.h index f967974..f4c0996 100644 --- a/training/stories_config.h +++ b/training/stories_config.h @@ -22,8 +22,19 @@ #define SEQ 256 #define NLAYERS 12 #define VOCAB 32000 -#define ACCUM_STEPS 10 +#define DEFAULT_ACCUM_STEPS 10 #define MAX_COMPILES 100 +static int g_accum_steps = DEFAULT_ACCUM_STEPS; + +static void init_accum_steps(void) { + const char *env = getenv("ANE_ACCUM_STEPS"); + if (env && env[0]) { + int v = atoi(env); + if (v > 0 && v <= 10000) g_accum_steps = v; + } +} + +#define ACCUM_STEPS g_accum_steps // Per compile: 5 weight-bearing kernels per layer + 1 classifier = 5*12+1 = 61 // Plus 1 static (sdpaBwd2 per layer, no weights) = 12 more but those are weight-free @@ -111,15 +122,30 @@ typedef struct { // Globals static Class g_D, g_I, g_AR, g_AIO; +static bool g_ane_init_done = false; // Re-entry guard (ref: CRIT-01) +static bool g_ane_ok_large = false; // true only when all private classes loaded successfully static mach_timebase_info_data_t g_tb; static int g_compile_count = 0; static void ane_init(void) { - dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW); + if (g_ane_init_done) return; + g_ane_init_done = true; // Set first to prevent re-entry (ref: CRIT-01) + void *handle = dlopen( + "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", + RTLD_NOW); + if (!handle) { + fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror()); + return; + } g_D = NSClassFromString(@"_ANEInMemoryModelDescriptor"); g_I = NSClassFromString(@"_ANEInMemoryModel"); g_AR = NSClassFromString(@"_ANERequest"); g_AIO= NSClassFromString(@"_ANEIOSurfaceObject"); + if (!g_D || !g_I || !g_AR || !g_AIO) { + fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n"); + return; + } + g_ane_ok_large = true; } static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; } diff --git a/training/tokenize.py b/training/tokenize.py index 219cb21..815d740 100644 --- a/training/tokenize.py +++ b/training/tokenize.py @@ -3,11 +3,13 @@ Data format: flat uint16 token IDs (llama2.c BPE, 32K vocab). Source: ~/tiny_stories_data_pretokenized.zip""" -import os, struct, zipfile +import os, sys, struct, zipfile from pathlib import Path ZIP_PATH = os.path.expanduser('~/tiny_stories_data_pretokenized.zip') OUTPUT_PATH = str(Path(__file__).resolve().parent / 'tinystories_data00.bin') +VOCAB_SIZE = 32000 +MAX_ZIP_SIZE = int(os.environ.get('MAX_ZIP_BYTES', str(10 * 1024 * 1024 * 1024))) def main(): if os.path.exists(OUTPUT_PATH): @@ -15,8 +17,24 @@ def main(): print(f"{OUTPUT_PATH} already exists ({n} tokens, {os.path.getsize(OUTPUT_PATH)/1e6:.1f} MB)") return + if not os.path.exists(ZIP_PATH): + print(f"ERROR: ZIP file not found: {ZIP_PATH}", file=sys.stderr) + print(f" Expected: ~/tiny_stories_data_pretokenized.zip", file=sys.stderr) + sys.exit(1) + + zip_size = os.path.getsize(ZIP_PATH) + if zip_size > MAX_ZIP_SIZE: + print(f"ERROR: ZIP file too large ({zip_size/1e9:.1f} GB > {MAX_ZIP_SIZE/1e9:.0f} GB limit).", + file=sys.stderr) + sys.exit(1) + print(f"Extracting data00.bin from {ZIP_PATH}...") with zipfile.ZipFile(ZIP_PATH, 'r') as z: + names = z.namelist() + if 'data00.bin' not in names: + print(f"ERROR: data00.bin not found in ZIP. Contents: {names[:10]}", file=sys.stderr) + sys.exit(1) + with z.open('data00.bin') as src, open(OUTPUT_PATH, 'wb') as dst: while True: chunk = src.read(1 << 20) @@ -27,10 +45,13 @@ def main(): n = os.path.getsize(OUTPUT_PATH) // 2 print(f"Written {OUTPUT_PATH} ({n} tokens, {os.path.getsize(OUTPUT_PATH)/1e6:.1f} MB)") - # Sanity check with open(OUTPUT_PATH, 'rb') as f: tokens = struct.unpack('<10H', f.read(20)) print(f"First 10 tokens: {tokens}") + oob = [t for t in tokens if t >= VOCAB_SIZE] + if oob: + print(f"WARNING: out-of-vocab tokens found: {oob} (vocab_size={VOCAB_SIZE})", + file=sys.stderr) if __name__ == '__main__': main()