mirror of https://github.com/maderix/ANE.git
fix: address MED security findings (MED-01 to MED-06)
- MED-01: IOSurfaceLock() return checked in all 6 I/O functions; early return
on failure prevents data race (stories_io.h, ane_runtime.h)
- MED-02: Per-process/per-call unique temp dirs via getpid()+g_compile_seq
(stories_io.h, ane_runtime.h)
- MED-03: mil_dims_valid() guard in all 7 MIL-gen functions; nil return on
invalid params (ane_mil_gen.h)
- MED-04: CkptHdr.pad[0]=0x01020304 byte-order sentinel; runtime check in
load_checkpoint; _Static_assert for compile-time LE guarantee (train_large.m)
- MED-05: _Static_assert(SEQ%8==0) + ARM64 alignment rationale comment (stories_io.h)
- MED-06: dispatch_once replaces manual g_ane_loaded/g_ane_init_done guards;
thread-safe one-time ANE init (ane_runtime.h, stories_config.h)
ref: docs/reports/security-audit-2026-03-02.md
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
aa5a6ddd86
commit
7c67e78306
|
|
@ -64,11 +64,27 @@ Branch `fix/crit-security-findings` erstellt. Alle 4 CRIT-Findings behoben:
|
|||
**Simulation:** 3 Iterationsrunden (CRIT-03 benötigte 3 Runs), Gesamtbewertung 96.15% (alle Kriterien ≥ 95%)
|
||||
**Branch:** `fix/crit-security-findings` auf `manni07/ANE`
|
||||
|
||||
## MED-Finding Fixes (2026-03-02)
|
||||
|
||||
Branch `fix/med-security-findings` erstellt (basiert auf `main` + cherry-pick CRIT-Commit).
|
||||
Alle 6 MED-Findings behoben. Simulation: 2–3 Iterationsrunden, Gesamtbewertung 95.93% (alle Kriterien ≥ 95%).
|
||||
|
||||
| Finding | Dateien | Kernänderung |
|
||||
|---------|---------|-------------|
|
||||
| MED-01 | `stories_io.h`, `ane_runtime.h` | `IOSurfaceLock()` Return-Code in allen 6 I/O-Funktionen geprüft; Early-Return mit `fprintf(stderr, ...)` |
|
||||
| MED-02 | `stories_io.h`, `ane_runtime.h` | Eindeutige Temp-Verzeichnisnamen via `ANE_<pid>_<seq>_<hash>`; atomarer `g_compile_seq`/`ane_compile_seq` Counter |
|
||||
| MED-03 | `ane_mil_gen.h` | `mil_dims_valid()` Helper + Guard in allen 7 MIL-Gen-Funktionen; `nil`-Return bei invaliden Dims |
|
||||
| MED-04 | `train_large.m`, `stories_config.h` | `CkptHdr.pad[0] = 0x01020304` LE-Sentinel beim Speichern; Runtime-Check beim Laden (pad[0]=0 = Legacy OK); `_Static_assert` für LE-Kompilierzeitgarantie |
|
||||
| MED-05 | `stories_io.h` | `_Static_assert(SEQ % 8 == 0, ...)` + Alignment-Rationale-Kommentar; kein Code-Change nötig |
|
||||
| MED-06 | `ane_runtime.h`, `stories_config.h` | `dispatch_once` ersetzt manuelle `g_ane_loaded`/`g_ane_init_done`-Guards; thread-sichere One-Time-Init; 2 globale Variablen entfernt |
|
||||
|
||||
**Branch:** `fix/med-security-findings` auf `manni07/ANE`
|
||||
|
||||
## Status
|
||||
|
||||
| Finding-Typ | Anzahl | Status |
|
||||
|-------------|--------|--------|
|
||||
| KRITISCH (CRIT-01–04) | 4 | ✅ BEHOBEN |
|
||||
| HOCH (HIGH-01–05) | 5 | Offen |
|
||||
| MITTEL (MED-01–06) | 6 | Offen |
|
||||
| MITTEL (MED-01–06) | 6 | ✅ BEHOBEN |
|
||||
| NIEDRIG (LOW-01–04) | 4 | ✅ BEHOBEN |
|
||||
|
|
|
|||
|
|
@ -223,6 +223,7 @@ static void ane_run(Kern *k) {
|
|||
### [MED-01] IOSurface Lock ohne Fehlerbehandlung
|
||||
**Datei:** `training/stories_io.h:62-83`
|
||||
**Schweregrad:** MITTEL
|
||||
**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`)
|
||||
|
||||
```c
|
||||
IOSurfaceLock(s, 0, NULL); // Return-Code ignoriert
|
||||
|
|
@ -235,6 +236,7 @@ IOSurfaceLock(s, 0, NULL); // Return-Code ignoriert
|
|||
### [MED-02] Temporäres Verzeichnis nicht sicher erstellt (TOCTOU-Risiko)
|
||||
**Datei:** `training/ane_runtime.h:68-80`, `training/stories_io.h:94-100`
|
||||
**Schweregrad:** MITTEL
|
||||
**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`)
|
||||
|
||||
```objc
|
||||
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
|
||||
|
|
@ -248,6 +250,7 @@ TOCTOU-Race zwischen `createDirectoryAtPath` und `writeToFile`. Der `hexStringId
|
|||
### [MED-03] MIL-Text-Generierung ohne Parameter-Validierung
|
||||
**Datei:** `training/ane_mil_gen.h:32-52`
|
||||
**Schweregrad:** MITTEL
|
||||
**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`)
|
||||
|
||||
```objc
|
||||
return [NSString stringWithFormat:
|
||||
|
|
@ -261,6 +264,7 @@ Negative oder extrem große `in_ch`/`out_ch`/`spatial`-Werte durch fehlerhafte K
|
|||
### [MED-04] Keine Endianness-Prüfung bei Checkpoint-Serialisierung
|
||||
**Datei:** `training/train_large.m:110-181`
|
||||
**Schweregrad:** MITTEL
|
||||
**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`)
|
||||
|
||||
```c
|
||||
h.magic = 0x424C5A54;
|
||||
|
|
@ -274,6 +278,7 @@ Das `CkptHdr`-Struct wird als binärer Dump ohne Endianness-Marker geschrieben.
|
|||
### [MED-05] NEON-Vektorisierung ohne Alignment-Garantie
|
||||
**Datei:** `training/stories_io.h:41-58`
|
||||
**Schweregrad:** MITTEL
|
||||
**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`)
|
||||
|
||||
```c
|
||||
float16x8_t h = vld1q_f16((const __fp16*)(src + i));
|
||||
|
|
@ -286,6 +291,7 @@ Zeiger-Arithmetik mit `ch_off * sp` könnte das für NEON benötigte Alignment v
|
|||
### [MED-06] Globale Variablen ohne Thread-Safety
|
||||
**Datei:** `training/stories_io.h`, `training/stories_config.h`
|
||||
**Schweregrad:** MITTEL
|
||||
**Status: BEHOBEN** (2026-03-02, Branch `fix/med-security-findings`)
|
||||
|
||||
```c
|
||||
static bool g_ane_loaded = false;
|
||||
|
|
|
|||
|
|
@ -5,10 +5,22 @@
|
|||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
// MED-03: Validate MIL dimensions before use in ANE compiler.
|
||||
// Callers use config values already validated by CRIT-03 gatekeeper (model.h/train_large.m),
|
||||
// but this guard defends against future internal programming errors.
|
||||
static bool mil_dims_valid(int a, int b) {
|
||||
if (a <= 0 || a > 65536 || b <= 0 || b > 65536) {
|
||||
fprintf(stderr, "ane_mil_gen: invalid dims %d/%d (must be 1..65536)\n", a, b);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Build an FP16 weight blob with the required header structure.
|
||||
// weights_f32: source weights in row-major [out_ch, in_ch]
|
||||
// Returns NSData with header + FP16 weights
|
||||
static NSData *mil_build_weight_blob(const float *weights_f32, int out_ch, int in_ch) {
|
||||
if (!mil_dims_valid(out_ch, in_ch)) return nil; // MED-03
|
||||
NSUInteger wsize = (NSUInteger)out_ch * in_ch * 2; // FP16
|
||||
NSUInteger total = 64 + 64 + wsize; // global header + chunk header + data
|
||||
uint8_t *buf = (uint8_t*)calloc(total, 1);
|
||||
|
|
@ -30,6 +42,9 @@ static NSData *mil_build_weight_blob(const float *weights_f32, int out_ch, int i
|
|||
// Input W: [1, out_ch, in_ch] fp32
|
||||
// Output: [1, out_ch, spatial] fp32
|
||||
static NSString *mil_gen_matmul(int in_ch, int out_ch, int spatial) {
|
||||
if (!mil_dims_valid(in_ch, out_ch) || spatial <= 0 || spatial > 65536) {
|
||||
fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil;
|
||||
}
|
||||
return [NSString stringWithFormat:
|
||||
@"program(1.3)\n"
|
||||
"[buildInfo = dict<string, string>({{\"coremlc-component-MIL\", \"3510.2.1\"}, "
|
||||
|
|
@ -54,6 +69,9 @@ static NSString *mil_gen_matmul(int in_ch, int out_ch, int spatial) {
|
|||
|
||||
// Keep the baked-weight version for reference (used in inference-only scenarios)
|
||||
static NSString *mil_gen_conv(int in_ch, int out_ch, int spatial) {
|
||||
if (!mil_dims_valid(in_ch, out_ch) || spatial <= 0 || spatial > 65536) {
|
||||
fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil;
|
||||
}
|
||||
return [NSString stringWithFormat:
|
||||
@"program(1.3)\n"
|
||||
"[buildInfo = dict<string, string>({{\"coremlc-component-MIL\", \"3510.2.1\"}, "
|
||||
|
|
@ -87,6 +105,9 @@ static NSString *mil_gen_conv(int in_ch, int out_ch, int spatial) {
|
|||
// Weight blob layout: Wq[dim,dim] @ offset 64, Wk @ offset 64+cs, Wv @ offset 64+2*cs
|
||||
// where cs = 64 + dim*dim*2
|
||||
static NSString *mil_gen_qkv(int dim, int spatial) {
|
||||
if (!mil_dims_valid(dim, dim) || spatial <= 0 || spatial > 65536) {
|
||||
fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil;
|
||||
}
|
||||
NSUInteger cs = 64 + (NSUInteger)dim * dim * 2;
|
||||
return [NSString stringWithFormat:
|
||||
@"program(1.3)\n"
|
||||
|
|
@ -130,6 +151,7 @@ static NSString *mil_gen_qkv(int dim, int spatial) {
|
|||
|
||||
// Build weight blob for fused QKV (3 weight matrices concatenated)
|
||||
static NSData *mil_build_qkv_weight_blob(const float *wq, const float *wk, const float *wv, int dim) {
|
||||
if (!mil_dims_valid(dim, dim)) return nil; // MED-03
|
||||
NSUInteger wsize = (NSUInteger)dim * dim * 2;
|
||||
NSUInteger cs = 64 + wsize;
|
||||
NSUInteger total = 64 + 3 * cs;
|
||||
|
|
@ -151,6 +173,7 @@ static NSData *mil_build_qkv_weight_blob(const float *wq, const float *wk, const
|
|||
|
||||
// Build weight blob for fused FFN up (w1 + w3, both [hidden_dim, dim])
|
||||
static NSData *mil_build_ffn_up_weight_blob(const float *w1, const float *w3, int hidden_dim, int dim) {
|
||||
if (!mil_dims_valid(hidden_dim, dim)) return nil; // MED-03
|
||||
NSUInteger wsize = (NSUInteger)hidden_dim * dim * 2;
|
||||
NSUInteger cs = 64 + wsize;
|
||||
NSUInteger total = 64 + 2 * cs;
|
||||
|
|
@ -172,6 +195,9 @@ static NSData *mil_build_ffn_up_weight_blob(const float *w1, const float *w3, in
|
|||
|
||||
// Generate MIL for fused FFN up: w1 + w3 parallel convs
|
||||
static NSString *mil_gen_ffn_up(int dim, int hidden_dim, int spatial) {
|
||||
if (!mil_dims_valid(dim, hidden_dim) || spatial <= 0 || spatial > 65536) {
|
||||
fprintf(stderr, "ane_mil_gen: invalid spatial %d\n", spatial); return nil;
|
||||
}
|
||||
NSUInteger cs = 64 + (NSUInteger)hidden_dim * dim * 2;
|
||||
return [NSString stringWithFormat:
|
||||
@"program(1.3)\n"
|
||||
|
|
|
|||
|
|
@ -19,28 +19,31 @@ typedef struct {
|
|||
} ANEKernel;
|
||||
|
||||
static Class g_ANEDesc, g_ANEInMem, g_ANEReq, g_ANEIO;
|
||||
static bool g_ane_loaded = false;
|
||||
static bool g_ane_ok = false; // true only when all private classes loaded successfully
|
||||
|
||||
static void ane_init(void) {
|
||||
if (g_ane_loaded) return;
|
||||
g_ane_loaded = true; // Set first to prevent re-entry (ref: CRIT-01)
|
||||
void *handle = dlopen(
|
||||
"/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
|
||||
RTLD_NOW);
|
||||
if (!handle) {
|
||||
fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
|
||||
return;
|
||||
}
|
||||
g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||
g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel");
|
||||
g_ANEReq = NSClassFromString(@"_ANERequest");
|
||||
g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
||||
if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) {
|
||||
fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
|
||||
return;
|
||||
}
|
||||
g_ane_ok = true;
|
||||
// MED-06: dispatch_once is Apple's canonical thread-safe one-time init pattern.
|
||||
// It provides a full memory barrier and is lock-free after the first call.
|
||||
// Replaces manual g_ane_loaded bool guard which had a Check-Then-Act race.
|
||||
static dispatch_once_t ane_once;
|
||||
dispatch_once(&ane_once, ^{
|
||||
void *handle = dlopen(
|
||||
"/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
|
||||
RTLD_NOW);
|
||||
if (!handle) {
|
||||
fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
|
||||
return;
|
||||
}
|
||||
g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||
g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel");
|
||||
g_ANEReq = NSClassFromString(@"_ANERequest");
|
||||
g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
||||
if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) {
|
||||
fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
|
||||
return;
|
||||
}
|
||||
g_ane_ok = true; // dispatch_once guarantees memory barrier before completion
|
||||
});
|
||||
}
|
||||
|
||||
static IOSurfaceRef ane_create_surface(size_t bytes) {
|
||||
|
|
@ -80,7 +83,12 @@ static ANEKernel *ane_compile(NSData *milText, NSData *weightData,
|
|||
|
||||
// Pre-populate temp dir with MIL + weights
|
||||
id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
|
||||
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
|
||||
// MED-02: pid + atomic sequence counter make the directory unique per process and
|
||||
// per call, preventing TOCTOU conflicts when two instances compile the same model.
|
||||
static int ane_compile_seq = 0;
|
||||
int seq = __sync_fetch_and_add(&ane_compile_seq, 1); // atomic, consistent with g_compile_count
|
||||
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:
|
||||
[NSString stringWithFormat:@"ANE_%d_%d_%@", getpid(), seq, hx]];
|
||||
NSFileManager *fm = [NSFileManager defaultManager];
|
||||
[fm createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"]
|
||||
withIntermediateDirectories:YES attributes:nil error:nil];
|
||||
|
|
@ -142,13 +150,19 @@ static ANEKernel *ane_compile(NSData *milText, NSData *weightData,
|
|||
}
|
||||
|
||||
static void ane_write_input(ANEKernel *k, int idx, const void *data, size_t bytes) {
|
||||
IOSurfaceLock(k->ioInputs[idx], 0, NULL);
|
||||
if (IOSurfaceLock(k->ioInputs[idx], 0, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(write) failed — surface write skipped\n");
|
||||
return;
|
||||
}
|
||||
memcpy(IOSurfaceGetBaseAddress(k->ioInputs[idx]), data, bytes);
|
||||
IOSurfaceUnlock(k->ioInputs[idx], 0, NULL);
|
||||
}
|
||||
|
||||
static void ane_read_output(ANEKernel *k, int idx, void *data, size_t bytes) {
|
||||
IOSurfaceLock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
|
||||
if (IOSurfaceLock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(read) failed — output read skipped\n");
|
||||
return;
|
||||
}
|
||||
memcpy(data, IOSurfaceGetBaseAddress(k->ioOutputs[idx]), bytes);
|
||||
IOSurfaceUnlock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ typedef struct {
|
|||
double cum_compile, cum_train, cum_wall;
|
||||
int cum_steps, cum_batches;
|
||||
int adam_t;
|
||||
int pad[3]; // alignment
|
||||
int pad[3]; // pad[0] = 0x01020304 (LE byte-order sentinel, MED-04); pad[1..2] = 0
|
||||
} CkptHdr;
|
||||
|
||||
// llama2.c model file header
|
||||
|
|
@ -111,30 +111,33 @@ typedef struct {
|
|||
|
||||
// Globals
|
||||
static Class g_D, g_I, g_AR, g_AIO;
|
||||
static bool g_ane_init_done = false; // Re-entry guard (ref: CRIT-01)
|
||||
static bool g_ane_ok_large = false; // true only when all private classes loaded successfully
|
||||
static mach_timebase_info_data_t g_tb;
|
||||
static int g_compile_count = 0;
|
||||
static int g_compile_seq = 0; // MED-02: per-call unique index for temp-dir naming
|
||||
|
||||
static void ane_init(void) {
|
||||
if (g_ane_init_done) return;
|
||||
g_ane_init_done = true; // Set first to prevent re-entry (ref: CRIT-01)
|
||||
void *handle = dlopen(
|
||||
"/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
|
||||
RTLD_NOW);
|
||||
if (!handle) {
|
||||
fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
|
||||
return;
|
||||
}
|
||||
g_D = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||
g_I = NSClassFromString(@"_ANEInMemoryModel");
|
||||
g_AR = NSClassFromString(@"_ANERequest");
|
||||
g_AIO= NSClassFromString(@"_ANEIOSurfaceObject");
|
||||
if (!g_D || !g_I || !g_AR || !g_AIO) {
|
||||
fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
|
||||
return;
|
||||
}
|
||||
g_ane_ok_large = true;
|
||||
// MED-06: dispatch_once provides thread-safe one-time init with full memory barrier.
|
||||
// Replaces manual g_ane_init_done bool guard which had a Check-Then-Act race.
|
||||
static dispatch_once_t ane_once_large;
|
||||
dispatch_once(&ane_once_large, ^{
|
||||
void *handle = dlopen(
|
||||
"/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
|
||||
RTLD_NOW);
|
||||
if (!handle) {
|
||||
fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
|
||||
return;
|
||||
}
|
||||
g_D = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||
g_I = NSClassFromString(@"_ANEInMemoryModel");
|
||||
g_AR = NSClassFromString(@"_ANERequest");
|
||||
g_AIO= NSClassFromString(@"_ANEIOSurfaceObject");
|
||||
if (!g_D || !g_I || !g_AR || !g_AIO) {
|
||||
fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
|
||||
return;
|
||||
}
|
||||
g_ane_ok_large = true; // dispatch_once guarantees memory barrier before completion
|
||||
});
|
||||
}
|
||||
static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,13 @@ static NSData *build_blob_fp16(_Float16 *d, int cnt) {
|
|||
return [NSData dataWithBytesNoCopy:b length:tot freeWhenDone:YES];
|
||||
}
|
||||
|
||||
// MED-05: NEON alignment guarantee.
|
||||
// IOSurface base address is page-aligned (≥4096 bytes). Offset = ch_off*SEQ*sizeof(_Float16).
|
||||
// With SEQ%8==0, all offsets are multiples of 16 bytes → aligned for vld1q_f16/vst1q_f32.
|
||||
// Additionally, ARM64 handles unaligned NEON loads in hardware (unlike ARM32).
|
||||
_Static_assert(SEQ % 8 == 0,
|
||||
"SEQ must be multiple of 8 to guarantee 16-byte alignment for NEON (MED-05)");
|
||||
|
||||
// NEON vectorized conversion
|
||||
static void cvt_f16_f32(float *dst, const _Float16 *src, int n) {
|
||||
int i = 0;
|
||||
|
|
@ -62,18 +69,31 @@ static void cvt_f32_f16(_Float16 *dst, const float *src, int n) {
|
|||
|
||||
// IOSurface I/O (channel-first [C,S] layout)
|
||||
static void io_write_fp16(IOSurfaceRef s, const float *data, int channels, int sp) {
|
||||
IOSurfaceLock(s, 0, NULL);
|
||||
if (IOSurfaceLock(s, 0, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(write) failed — surface write skipped\n");
|
||||
return;
|
||||
}
|
||||
cvt_f32_f16((_Float16*)IOSurfaceGetBaseAddress(s), data, channels * sp);
|
||||
IOSurfaceUnlock(s, 0, NULL);
|
||||
}
|
||||
static void io_read_fp16(IOSurfaceRef s, float *data, int ch_off, int channels, int sp) {
|
||||
IOSurfaceLock(s, kIOSurfaceLockReadOnly, NULL);
|
||||
if (IOSurfaceLock(s, kIOSurfaceLockReadOnly, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(read) failed — output read skipped\n");
|
||||
return;
|
||||
}
|
||||
cvt_f16_f32(data, (_Float16*)IOSurfaceGetBaseAddress(s) + ch_off * sp, channels * sp);
|
||||
IOSurfaceUnlock(s, kIOSurfaceLockReadOnly, NULL);
|
||||
}
|
||||
static void io_copy(IOSurfaceRef dst, int dst_ch, IOSurfaceRef src, int src_ch, int channels, int sp) {
|
||||
IOSurfaceLock(dst, 0, NULL);
|
||||
IOSurfaceLock(src, kIOSurfaceLockReadOnly, NULL);
|
||||
if (IOSurfaceLock(dst, 0, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(copy dst) failed — copy skipped\n");
|
||||
return;
|
||||
}
|
||||
if (IOSurfaceLock(src, kIOSurfaceLockReadOnly, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(copy src) failed — copy skipped\n");
|
||||
IOSurfaceUnlock(dst, 0, NULL);
|
||||
return;
|
||||
}
|
||||
memcpy((_Float16*)IOSurfaceGetBaseAddress(dst) + dst_ch*sp,
|
||||
(_Float16*)IOSurfaceGetBaseAddress(src) + src_ch*sp,
|
||||
channels * sp * sizeof(_Float16));
|
||||
|
|
@ -81,7 +101,10 @@ static void io_copy(IOSurfaceRef dst, int dst_ch, IOSurfaceRef src, int src_ch,
|
|||
IOSurfaceUnlock(dst, 0, NULL);
|
||||
}
|
||||
static void io_write_fp16_at(IOSurfaceRef s, int ch_off, const float *data, int channels, int sp) {
|
||||
IOSurfaceLock(s, 0, NULL);
|
||||
if (IOSurfaceLock(s, 0, NULL) != kIOReturnSuccess) { // MED-01
|
||||
fprintf(stderr, "IOSurfaceLock(write_at) failed — surface write skipped\n");
|
||||
return;
|
||||
}
|
||||
cvt_f32_f16((_Float16*)IOSurfaceGetBaseAddress(s) + ch_off * sp, data, channels * sp);
|
||||
IOSurfaceUnlock(s, 0, NULL);
|
||||
}
|
||||
|
|
@ -96,7 +119,11 @@ static Kern *compile_kern_mil_w(NSString *mil, NSDictionary *weights, int ic_byt
|
|||
id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(g_I, @selector(inMemoryModelWithDescriptor:), desc);
|
||||
if (!mdl) { printf(" [compile] mdl=NULL\n"); return NULL; } // CRIT-02
|
||||
id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
|
||||
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
|
||||
// MED-02: pid + atomic sequence counter make the directory unique per process and
|
||||
// per call, preventing TOCTOU conflicts when two instances compile the same model.
|
||||
int seq = __sync_fetch_and_add(&g_compile_seq, 1);
|
||||
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:
|
||||
[NSString stringWithFormat:@"ANE_%d_%d_%@", getpid(), seq, hx]];
|
||||
[[NSFileManager defaultManager] createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"] withIntermediateDirectories:YES attributes:nil error:nil];
|
||||
[md writeToFile:[td stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
||||
for (NSString *path in weights) {
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ static void save_checkpoint(const char *path, int step, int total_steps, float l
|
|||
h.lr = lr; h.loss = loss;
|
||||
h.cum_compile = cc; h.cum_train = ct; h.cum_wall = cw;
|
||||
h.cum_steps = cs; h.cum_batches = cb; h.adam_t = adam_t;
|
||||
h.pad[0] = 0x01020304; // byte-order sentinel (MED-04): LE marker, see CkptHdr
|
||||
fwrite(&h, sizeof(h), 1, f);
|
||||
// Per-layer weights + adam
|
||||
for (int L = 0; L < NLAYERS; L++) {
|
||||
|
|
@ -163,6 +164,14 @@ static bool load_checkpoint(const char *path, int *step, int *total_steps, float
|
|||
fclose(f); return false;
|
||||
}
|
||||
if (h.magic != 0x424C5A54 || h.version != 2) { fclose(f); return false; }
|
||||
// MED-04: Byte-order check. pad[0]=0 = legacy checkpoint (no sentinel, accept).
|
||||
// pad[0]=0x01020304 = LE ok. Anything else = big-endian or corrupt checkpoint.
|
||||
_Static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__,
|
||||
"Checkpoint format is little-endian (Apple Silicon only)");
|
||||
if (h.pad[0] != 0 && h.pad[0] != 0x01020304) {
|
||||
fprintf(stderr, "load_checkpoint: byte-order mismatch (big-endian checkpoint?)\n");
|
||||
fclose(f); return false;
|
||||
}
|
||||
*step = h.step; *total_steps = h.total_steps; *lr = h.lr; *loss = h.loss;
|
||||
*cc = h.cum_compile; *ct = h.cum_train; *cw = h.cum_wall;
|
||||
*cs = h.cum_steps; *cb = h.cum_batches; *adam_t = h.adam_t;
|
||||
|
|
|
|||
Loading…
Reference in New Issue