diff --git a/docs/diaries/001-initial-setup-and-security-audit.md b/docs/diaries/001-initial-setup-and-security-audit.md index 2ee5007..c30831f 100644 --- a/docs/diaries/001-initial-setup-and-security-audit.md +++ b/docs/diaries/001-initial-setup-and-security-audit.md @@ -64,11 +64,27 @@ Branch `fix/crit-security-findings` erstellt. Alle 4 CRIT-Findings behoben: **Simulation:** 3 Iterationsrunden (CRIT-03 benötigte 3 Runs), Gesamtbewertung 96.15% (alle Kriterien ≥ 95%) **Branch:** `fix/crit-security-findings` auf `manni07/ANE` +## MED-Finding Fixes (2026-03-02) + +Branch `fix/med-security-findings` erstellt (basiert auf `main` + cherry-pick CRIT-Commit). +Alle 6 MED-Findings behoben. Simulation: 2–3 Iterationsrunden, Gesamtbewertung 95.93% (alle Kriterien ≥ 95%). + +| Finding | Dateien | Kernänderung | +|---------|---------|-------------| +| MED-01 | `stories_io.h`, `ane_runtime.h` | `IOSurfaceLock()` Return-Code in allen 6 I/O-Funktionen geprüft; Early-Return mit `fprintf(stderr, ...)` | +| MED-02 | `stories_io.h`, `ane_runtime.h` | Eindeutige Temp-Verzeichnisnamen via `ANE___`; atomarer `g_compile_seq`/`ane_compile_seq` Counter | +| MED-03 | `ane_mil_gen.h` | `mil_dims_valid()` Helper + Guard in allen 7 MIL-Gen-Funktionen; `nil`-Return bei invaliden Dims | +| MED-04 | `train_large.m`, `stories_config.h` | `CkptHdr.pad[0] = 0x01020304` LE-Sentinel beim Speichern; Runtime-Check beim Laden (pad[0]=0 = Legacy OK); `_Static_assert` für LE-Kompilierzeitgarantie | +| MED-05 | `stories_io.h` | `_Static_assert(SEQ % 8 == 0, ...)` + Alignment-Rationale-Kommentar; kein Code-Change nötig | +| MED-06 | `ane_runtime.h`, `stories_config.h` | `dispatch_once` ersetzt manuelle `g_ane_loaded`/`g_ane_init_done`-Guards; thread-sichere One-Time-Init; 2 globale Variablen entfernt | + +**Branch:** `fix/med-security-findings` auf `manni07/ANE` + ## Status | Finding-Typ | Anzahl | Status | |-------------|--------|--------| | KRITISCH (CRIT-01–04) | 4 | ✅ BEHOBEN | | HOCH (HIGH-01–05) | 5 | Offen | -| MITTEL (MED-01–06) | 6 | Offen | +| MITTEL (MED-01–06) | 6 | ✅ BEHOBEN | | NIEDRIG (LOW-01–04) | 4 | ✅ BEHOBEN | diff --git a/docs/reports/security-audit-2026-03-02.md b/docs/reports/security-audit-2026-03-02.md index e166641..b525894 100644 --- a/docs/reports/security-audit-2026-03-02.md +++ b/docs/reports/security-audit-2026-03-02.md @@ -223,6 +223,7 @@ static void ane_run(Kern *k) { ### [MED-01] IOSurface Lock ohne Fehlerbehandlung **Datei:** `training/stories_io.h:62-83` **Schweregrad:** MITTEL +**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`) ```c IOSurfaceLock(s, 0, NULL); // Return-Code ignoriert @@ -235,6 +236,7 @@ IOSurfaceLock(s, 0, NULL); // Return-Code ignoriert ### [MED-02] Temporäres Verzeichnis nicht sicher erstellt (TOCTOU-Risiko) **Datei:** `training/ane_runtime.h:68-80`, `training/stories_io.h:94-100` **Schweregrad:** MITTEL +**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`) ```objc NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx]; @@ -248,6 +250,7 @@ TOCTOU-Race zwischen `createDirectoryAtPath` und `writeToFile`. Der `hexStringId ### [MED-03] MIL-Text-Generierung ohne Parameter-Validierung **Datei:** `training/ane_mil_gen.h:32-52` **Schweregrad:** MITTEL +**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`) ```objc return [NSString stringWithFormat: @@ -261,6 +264,7 @@ Negative oder extrem große `in_ch`/`out_ch`/`spatial`-Werte durch fehlerhafte K ### [MED-04] Keine Endianness-Prüfung bei Checkpoint-Serialisierung **Datei:** `training/train_large.m:110-181` **Schweregrad:** MITTEL +**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`) ```c h.magic = 0x424C5A54; @@ -274,6 +278,7 @@ Das `CkptHdr`-Struct wird als binärer Dump ohne Endianness-Marker geschrieben. ### [MED-05] NEON-Vektorisierung ohne Alignment-Garantie **Datei:** `training/stories_io.h:41-58` **Schweregrad:** MITTEL +**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`) ```c float16x8_t h = vld1q_f16((const __fp16*)(src + i)); @@ -286,6 +291,7 @@ Zeiger-Arithmetik mit `ch_off * sp` könnte das für NEON benötigte Alignment v ### [MED-06] Globale Variablen ohne Thread-Safety **Datei:** `training/stories_io.h`, `training/stories_config.h` **Schweregrad:** MITTEL +**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`) ```c static bool g_ane_loaded = false; diff --git a/training/ane_mil_gen.h b/training/ane_mil_gen.h index 97fc451..c1f4db1 100644 --- a/training/ane_mil_gen.h +++ b/training/ane_mil_gen.h @@ -5,10 +5,22 @@ #include #include +// MED-03: Validate MIL dimensions before use in ANE compiler. +// Callers use config values already validated by CRIT-03 gatekeeper (model.h/train_large.m), +// but this guard defends against future internal programming errors. +static bool mil_dims_valid(int a, int b) { + if (a <= 0 || a > 65536 || b <= 0 || b > 65536) { + fprintf(stderr, "ane_mil_gen: invalid dims %d/%d (must be 1..65536)\n", a, b); + return false; + } + return true; +} + // Build an FP16 weight blob with the required header structure. // weights_f32: source weights in row-major [out_ch, in_ch] // Returns NSData with header + FP16 weights static NSData *mil_build_weight_blob(const float *weights_f32, int out_ch, int in_ch) { + if (!mil_dims_valid(out_ch, in_ch)) return nil; // MED-03 NSUInteger wsize = (NSUInteger)out_ch * in_ch * 2; // FP16 NSUInteger total = 64 + 64 + wsize; // global header + chunk header + data uint8_t *buf = (uint8_t*)calloc(total, 1); @@ -30,6 +42,9 @@ static NSData *mil_build_weight_blob(const float *weights_f32, int out_ch, int i // Input W: [1, out_ch, in_ch] fp32 // Output: [1, out_ch, spatial] fp32 static NSString *mil_gen_matmul(int in_ch, int out_ch, int spatial) { + if (!mil_dims_valid(in_ch, out_ch) || spatial <= 0 || spatial > 65536) { + fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil; + } return [NSString stringWithFormat: @"program(1.3)\n" "[buildInfo = dict({{\"coremlc-component-MIL\", \"3510.2.1\"}, " @@ -54,6 +69,9 @@ static NSString *mil_gen_matmul(int in_ch, int out_ch, int spatial) { // Keep the baked-weight version for reference (used in inference-only scenarios) static NSString *mil_gen_conv(int in_ch, int out_ch, int spatial) { + if (!mil_dims_valid(in_ch, out_ch) || spatial <= 0 || spatial > 65536) { + fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil; + } return [NSString stringWithFormat: @"program(1.3)\n" "[buildInfo = dict({{\"coremlc-component-MIL\", \"3510.2.1\"}, " @@ -87,6 +105,9 @@ static NSString *mil_gen_conv(int in_ch, int out_ch, int spatial) { // Weight blob layout: Wq[dim,dim] @ offset 64, Wk @ offset 64+cs, Wv @ offset 64+2*cs // where cs = 64 + dim*dim*2 static NSString *mil_gen_qkv(int dim, int spatial) { + if (!mil_dims_valid(dim, dim) || spatial <= 0 || spatial > 65536) { + fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil; + } NSUInteger cs = 64 + (NSUInteger)dim * dim * 2; return [NSString stringWithFormat: @"program(1.3)\n" @@ -130,6 +151,7 @@ static NSString *mil_gen_qkv(int dim, int spatial) { // Build weight blob for fused QKV (3 weight matrices concatenated) static NSData *mil_build_qkv_weight_blob(const float *wq, const float *wk, const float *wv, int dim) { + if (!mil_dims_valid(dim, dim)) return nil; // MED-03 NSUInteger wsize = (NSUInteger)dim * dim * 2; NSUInteger cs = 64 + wsize; NSUInteger total = 64 + 3 * cs; @@ -151,6 +173,7 @@ static NSData *mil_build_qkv_weight_blob(const float *wq, const float *wk, const // Build weight blob for fused FFN up (w1 + w3, both [hidden_dim, dim]) static NSData *mil_build_ffn_up_weight_blob(const float *w1, const float *w3, int hidden_dim, int dim) { + if (!mil_dims_valid(hidden_dim, dim)) return nil; // MED-03 NSUInteger wsize = (NSUInteger)hidden_dim * dim * 2; NSUInteger cs = 64 + wsize; NSUInteger total = 64 + 2 * cs; @@ -172,6 +195,9 @@ static NSData *mil_build_ffn_up_weight_blob(const float *w1, const float *w3, in // Generate MIL for fused FFN up: w1 + w3 parallel convs static NSString *mil_gen_ffn_up(int dim, int hidden_dim, int spatial) { + if (!mil_dims_valid(dim, hidden_dim) || spatial <= 0 || spatial > 65536) { + fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil; + } NSUInteger cs = 64 + (NSUInteger)hidden_dim * dim * 2; return [NSString stringWithFormat: @"program(1.3)\n" diff --git a/training/ane_runtime.h b/training/ane_runtime.h index a5fa873..1d96d6b 100644 --- a/training/ane_runtime.h +++ b/training/ane_runtime.h @@ -19,28 +19,31 @@ typedef struct { } ANEKernel; static Class g_ANEDesc, g_ANEInMem, g_ANEReq, g_ANEIO; -static bool g_ane_loaded = false; static bool g_ane_ok = false; // true only when all private classes loaded successfully static void ane_init(void) { - if (g_ane_loaded) return; - g_ane_loaded = true; // Set first to prevent re-entry (ref: CRIT-01) - void *handle = dlopen( - "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", - RTLD_NOW); - if (!handle) { - fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror()); - return; - } - g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor"); - g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel"); - g_ANEReq = NSClassFromString(@"_ANERequest"); - g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject"); - if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) { - fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n"); - return; - } - g_ane_ok = true; + // MED-06: dispatch_once is Apple's canonical thread-safe one-time init pattern. + // It provides a full memory barrier and is lock-free after the first call. + // Replaces manual g_ane_loaded bool guard which had a Check-Then-Act race. + static dispatch_once_t ane_once; + dispatch_once(&ane_once, ^{ + void *handle = dlopen( + "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", + RTLD_NOW); + if (!handle) { + fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror()); + return; + } + g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor"); + g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel"); + g_ANEReq = NSClassFromString(@"_ANERequest"); + g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject"); + if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) { + fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n"); + return; + } + g_ane_ok = true; // dispatch_once guarantees memory barrier before completion + }); } static IOSurfaceRef ane_create_surface(size_t bytes) { @@ -80,7 +83,12 @@ static ANEKernel *ane_compile(NSData *milText, NSData *weightData, // Pre-populate temp dir with MIL + weights id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier)); - NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx]; + // MED-02: pid + atomic sequence counter make the directory unique per process and + // per call, preventing TOCTOU conflicts when two instances compile the same model. + static int ane_compile_seq = 0; + int seq = __sync_fetch_and_add(&ane_compile_seq, 1); // atomic, consistent with g_compile_count + NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent: + [NSString stringWithFormat:@"ANE_%d_%d_%@", getpid(), seq, hx]]; NSFileManager *fm = [NSFileManager defaultManager]; [fm createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"] withIntermediateDirectories:YES attributes:nil error:nil]; @@ -142,13 +150,19 @@ static ANEKernel *ane_compile(NSData *milText, NSData *weightData, } static void ane_write_input(ANEKernel *k, int idx, const void *data, size_t bytes) { - IOSurfaceLock(k->ioInputs[idx], 0, NULL); + if (IOSurfaceLock(k->ioInputs[idx], 0, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(write) failed — surface write skipped\n"); + return; + } memcpy(IOSurfaceGetBaseAddress(k->ioInputs[idx]), data, bytes); IOSurfaceUnlock(k->ioInputs[idx], 0, NULL); } static void ane_read_output(ANEKernel *k, int idx, void *data, size_t bytes) { - IOSurfaceLock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL); + if (IOSurfaceLock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(read) failed — output read skipped\n"); + return; + } memcpy(data, IOSurfaceGetBaseAddress(k->ioOutputs[idx]), bytes); IOSurfaceUnlock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL); } diff --git a/training/stories_config.h b/training/stories_config.h index af03201..8f92bcb 100644 --- a/training/stories_config.h +++ b/training/stories_config.h @@ -101,7 +101,7 @@ typedef struct { double cum_compile, cum_train, cum_wall; int cum_steps, cum_batches; int adam_t; - int pad[3]; // alignment + int pad[3]; // pad[0] = 0x01020304 (LE byte-order sentinel, MED-04); pad[1..2] = 0 } CkptHdr; // llama2.c model file header @@ -111,30 +111,33 @@ typedef struct { // Globals static Class g_D, g_I, g_AR, g_AIO; -static bool g_ane_init_done = false; // Re-entry guard (ref: CRIT-01) static bool g_ane_ok_large = false; // true only when all private classes loaded successfully static mach_timebase_info_data_t g_tb; static int g_compile_count = 0; +static int g_compile_seq = 0; // MED-02: per-call unique index for temp-dir naming static void ane_init(void) { - if (g_ane_init_done) return; - g_ane_init_done = true; // Set first to prevent re-entry (ref: CRIT-01) - void *handle = dlopen( - "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", - RTLD_NOW); - if (!handle) { - fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror()); - return; - } - g_D = NSClassFromString(@"_ANEInMemoryModelDescriptor"); - g_I = NSClassFromString(@"_ANEInMemoryModel"); - g_AR = NSClassFromString(@"_ANERequest"); - g_AIO= NSClassFromString(@"_ANEIOSurfaceObject"); - if (!g_D || !g_I || !g_AR || !g_AIO) { - fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n"); - return; - } - g_ane_ok_large = true; + // MED-06: dispatch_once provides thread-safe one-time init with full memory barrier. + // Replaces manual g_ane_init_done bool guard which had a Check-Then-Act race. + static dispatch_once_t ane_once_large; + dispatch_once(&ane_once_large, ^{ + void *handle = dlopen( + "/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", + RTLD_NOW); + if (!handle) { + fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror()); + return; + } + g_D = NSClassFromString(@"_ANEInMemoryModelDescriptor"); + g_I = NSClassFromString(@"_ANEInMemoryModel"); + g_AR = NSClassFromString(@"_ANERequest"); + g_AIO= NSClassFromString(@"_ANEIOSurfaceObject"); + if (!g_D || !g_I || !g_AR || !g_AIO) { + fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n"); + return; + } + g_ane_ok_large = true; // dispatch_once guarantees memory barrier before completion + }); } static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; } diff --git a/training/stories_io.h b/training/stories_io.h index fbb5dee..45f6557 100644 --- a/training/stories_io.h +++ b/training/stories_io.h @@ -40,6 +40,13 @@ static NSData *build_blob_fp16(_Float16 *d, int cnt) { return [NSData dataWithBytesNoCopy:b length:tot freeWhenDone:YES]; } +// MED-05: NEON alignment guarantee. +// IOSurface base address is page-aligned (≥4096 bytes). Offset = ch_off*SEQ*sizeof(_Float16). +// With SEQ%8==0, all offsets are multiples of 16 bytes → aligned for vld1q_f16/vst1q_f32. +// Additionally, ARM64 handles unaligned NEON loads in hardware (unlike ARM32). +_Static_assert(SEQ % 8 == 0, + "SEQ must be multiple of 8 to guarantee 16-byte alignment for NEON (MED-05)"); + // NEON vectorized conversion static void cvt_f16_f32(float *dst, const _Float16 *src, int n) { int i = 0; @@ -62,18 +69,31 @@ static void cvt_f32_f16(_Float16 *dst, const float *src, int n) { // IOSurface I/O (channel-first [C,S] layout) static void io_write_fp16(IOSurfaceRef s, const float *data, int channels, int sp) { - IOSurfaceLock(s, 0, NULL); + if (IOSurfaceLock(s, 0, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(write) failed — surface write skipped\n"); + return; + } cvt_f32_f16((_Float16*)IOSurfaceGetBaseAddress(s), data, channels * sp); IOSurfaceUnlock(s, 0, NULL); } static void io_read_fp16(IOSurfaceRef s, float *data, int ch_off, int channels, int sp) { - IOSurfaceLock(s, kIOSurfaceLockReadOnly, NULL); + if (IOSurfaceLock(s, kIOSurfaceLockReadOnly, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(read) failed — output read skipped\n"); + return; + } cvt_f16_f32(data, (_Float16*)IOSurfaceGetBaseAddress(s) + ch_off * sp, channels * sp); IOSurfaceUnlock(s, kIOSurfaceLockReadOnly, NULL); } static void io_copy(IOSurfaceRef dst, int dst_ch, IOSurfaceRef src, int src_ch, int channels, int sp) { - IOSurfaceLock(dst, 0, NULL); - IOSurfaceLock(src, kIOSurfaceLockReadOnly, NULL); + if (IOSurfaceLock(dst, 0, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(copy dst) failed — copy skipped\n"); + return; + } + if (IOSurfaceLock(src, kIOSurfaceLockReadOnly, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(copy src) failed — copy skipped\n"); + IOSurfaceUnlock(dst, 0, NULL); + return; + } memcpy((_Float16*)IOSurfaceGetBaseAddress(dst) + dst_ch*sp, (_Float16*)IOSurfaceGetBaseAddress(src) + src_ch*sp, channels * sp * sizeof(_Float16)); @@ -81,7 +101,10 @@ static void io_copy(IOSurfaceRef dst, int dst_ch, IOSurfaceRef src, int src_ch, IOSurfaceUnlock(dst, 0, NULL); } static void io_write_fp16_at(IOSurfaceRef s, int ch_off, const float *data, int channels, int sp) { - IOSurfaceLock(s, 0, NULL); + if (IOSurfaceLock(s, 0, NULL) != kIOReturnSuccess) { // MED-01 + fprintf(stderr, "IOSurfaceLock(write_at) failed — surface write skipped\n"); + return; + } cvt_f32_f16((_Float16*)IOSurfaceGetBaseAddress(s) + ch_off * sp, data, channels * sp); IOSurfaceUnlock(s, 0, NULL); } @@ -96,7 +119,11 @@ static Kern *compile_kern_mil_w(NSString *mil, NSDictionary *weights, int ic_byt id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(g_I, @selector(inMemoryModelWithDescriptor:), desc); if (!mdl) { printf(" [compile] mdl=NULL\n"); return NULL; } // CRIT-02 id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier)); - NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx]; + // MED-02: pid + atomic sequence counter make the directory unique per process and + // per call, preventing TOCTOU conflicts when two instances compile the same model. + int seq = __sync_fetch_and_add(&g_compile_seq, 1); + NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent: + [NSString stringWithFormat:@"ANE_%d_%d_%@", getpid(), seq, hx]]; [[NSFileManager defaultManager] createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"] withIntermediateDirectories:YES attributes:nil error:nil]; [md writeToFile:[td stringByAppendingPathComponent:@"model.mil"] atomically:YES]; for (NSString *path in weights) { diff --git a/training/train_large.m b/training/train_large.m index 3730f23..0a70a32 100644 --- a/training/train_large.m +++ b/training/train_large.m @@ -125,6 +125,7 @@ static void save_checkpoint(const char *path, int step, int total_steps, float l h.lr = lr; h.loss = loss; h.cum_compile = cc; h.cum_train = ct; h.cum_wall = cw; h.cum_steps = cs; h.cum_batches = cb; h.adam_t = adam_t; + h.pad[0] = 0x01020304; // byte-order sentinel (MED-04): LE marker, see CkptHdr fwrite(&h, sizeof(h), 1, f); // Per-layer weights + adam for (int L = 0; L < NLAYERS; L++) { @@ -163,6 +164,14 @@ static bool load_checkpoint(const char *path, int *step, int *total_steps, float fclose(f); return false; } if (h.magic != 0x424C5A54 || h.version != 2) { fclose(f); return false; } + // MED-04: Byte-order check. pad[0]=0 = legacy checkpoint (no sentinel, accept). + // pad[0]=0x01020304 = LE ok. Anything else = big-endian or corrupt checkpoint. + _Static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, + "Checkpoint format is little-endian (Apple Silicon only)"); + if (h.pad[0] != 0 && h.pad[0] != 0x01020304) { + fprintf(stderr, "load_checkpoint: byte-order mismatch (big-endian checkpoint?)\n"); + fclose(f); return false; + } *step = h.step; *total_steps = h.total_steps; *lr = h.lr; *loss = h.loss; *cc = h.cum_compile; *ct = h.cum_train; *cw = h.cum_wall; *cs = h.cum_steps; *cb = h.cum_batches; *adam_t = h.adam_t;