ANE/training/ane_runtime.h

367 lines
14 KiB
Objective-C

// ane_runtime.h — Reusable ANE in-memory compile/load/eval wrapper
// Uses _ANEInMemoryModel via private AppleNeuralEngine.framework
#pragma once
#import <Foundation/Foundation.h>
#import <objc/runtime.h>
#import <objc/message.h>
#import <dlfcn.h>
#import <IOSurface/IOSurface.h>
#import <sys/mman.h>
#import <sys/stat.h>
#import <fcntl.h>
#import <sys/sysctl.h>
// Chip Detection and MIL Version Selection
typedef NS_ENUM(NSInteger, ANEChipType) {
ANE_CHIP_UNKNOWN = 0,
ANE_CHIP_M1, ANE_CHIP_M1_PRO, ANE_CHIP_M1_MAX, ANE_CHIP_M1_ULTRA,
ANE_CHIP_M2, ANE_CHIP_M2_PRO, ANE_CHIP_M2_MAX, ANE_CHIP_M2_ULTRA,
ANE_CHIP_M3, ANE_CHIP_M3_PRO, ANE_CHIP_M3_MAX, ANE_CHIP_M3_ULTRA,
ANE_CHIP_M4, ANE_CHIP_M4_PRO, ANE_CHIP_M4_MAX,
ANE_CHIP_M5
};
static const size_t SYSCTL_BUFFER_SIZE = 256;
static const int ASCII_DIGIT_OFFSET = '0';
static const int BASE_CHIP_GENERATION_MULTIPLIER = 10;
static const int PRO_VARIANT_OFFSET = 1;
static const int MAX_VARIANT_OFFSET = 2;
static const int ULTRA_VARIANT_OFFSET = 3;
static const char* SYSCTL_BRAND_STRING_KEY = "machdep.cpu.brand_string";
static const char* APPLE_M_PREFIX = "Apple M";
static const size_t APPLE_M_PREFIX_LENGTH = 7;
static const char* VARIANT_PRO = "Pro";
static const char* VARIANT_MAX = "Max";
static const char* VARIANT_ULTRA = "Ultra";
static const size_t VARIANT_PRO_MAX_LENGTH = 3;
static const size_t VARIANT_ULTRA_LENGTH = 5;
static ANEChipType parse_base_chip_generation(const char *generation_string) {
int generation = 0;
if (generation_string[0] >= '1' && generation_string[0] <= '9') {
generation = generation_string[0] - ASCII_DIGIT_OFFSET;
if (generation_string[1] >= '0' && generation_string[1] <= '9') {
generation = generation * BASE_CHIP_GENERATION_MULTIPLIER + (generation_string[1] - ASCII_DIGIT_OFFSET);
}
}
switch (generation) {
case 1: return ANE_CHIP_M1;
case 2: return ANE_CHIP_M2;
case 3: return ANE_CHIP_M3;
case 4: return ANE_CHIP_M4;
case 5: return ANE_CHIP_M5;
default: return ANE_CHIP_UNKNOWN;
}
}
static ANEChipType parse_chip_variant(ANEChipType base_chip, const char *variant_string) {
if (strncmp(variant_string, VARIANT_PRO, VARIANT_PRO_MAX_LENGTH) == 0) {
return (ANEChipType)(base_chip + PRO_VARIANT_OFFSET);
}
if (strncmp(variant_string, VARIANT_MAX, VARIANT_PRO_MAX_LENGTH) == 0) {
return (ANEChipType)(base_chip + MAX_VARIANT_OFFSET);
}
if (strncmp(variant_string, VARIANT_ULTRA, VARIANT_ULTRA_LENGTH) == 0) {
return (ANEChipType)(base_chip + ULTRA_VARIANT_OFFSET);
}
return base_chip;
}
static ANEChipType ane_get_chip_type(void) {
static ANEChipType cached_chip = ANE_CHIP_UNKNOWN;
static bool initialized = false;
if (initialized) return cached_chip;
initialized = true;
char brand[SYSCTL_BUFFER_SIZE] = {0};
size_t brand_size = sizeof(brand);
if (sysctlbyname(SYSCTL_BRAND_STRING_KEY, brand, &brand_size, NULL, 0) == 0) {
if (strncmp(brand, APPLE_M_PREFIX, APPLE_M_PREFIX_LENGTH) == 0) {
const char *generation_pointer = brand + APPLE_M_PREFIX_LENGTH;
ANEChipType base_chip = parse_base_chip_generation(generation_pointer);
if (base_chip != ANE_CHIP_UNKNOWN) {
const char *variant_pointer = generation_pointer + 1;
if (generation_pointer[1] >= '0' && generation_pointer[1] <= '9') {
variant_pointer++;
}
while (*variant_pointer == ' ') {
variant_pointer++;
}
cached_chip = parse_chip_variant(base_chip, variant_pointer);
}
}
}
return cached_chip;
}
static bool ane_supports_mil_1_5(void) {
return (ane_get_chip_type() >= ANE_CHIP_M5);
}
static const char *ane_get_mil_version(void) {
return ane_supports_mil_1_5() ? "1.5" : "1.3";
}
static const char *ane_get_mil_ios_target(void) {
return ane_supports_mil_1_5() ? "ios18" : "ios17";
}
static const char *ane_get_chip_name(void) {
switch (ane_get_chip_type()) {
case ANE_CHIP_M1: return "M1";
case ANE_CHIP_M1_PRO: return "M1 Pro";
case ANE_CHIP_M1_MAX: return "M1 Max";
case ANE_CHIP_M1_ULTRA: return "M1 Ultra";
case ANE_CHIP_M2: return "M2";
case ANE_CHIP_M2_PRO: return "M2 Pro";
case ANE_CHIP_M2_MAX: return "M2 Max";
case ANE_CHIP_M2_ULTRA: return "M2 Ultra";
case ANE_CHIP_M3: return "M3";
case ANE_CHIP_M3_PRO: return "M3 Pro";
case ANE_CHIP_M3_MAX: return "M3 Max";
case ANE_CHIP_M3_ULTRA: return "M3 Ultra";
case ANE_CHIP_M4: return "M4";
case ANE_CHIP_M4_PRO: return "M4 Pro";
case ANE_CHIP_M4_MAX: return "M4 Max";
case ANE_CHIP_M5: return "M5";
default: return "Unknown";
}
}
typedef struct {
id model; // _ANEInMemoryModel
IOSurfaceRef *ioInputs;
IOSurfaceRef *ioOutputs;
IOSurfaceRef weightsSurface; // Optional: dynamic weights IOSurface
id weightsBuffer; // Optional: _ANEIOSurfaceObject for weights
id request; // _ANERequest
NSString *tmpDir;
int nInputs, nOutputs;
size_t *inputBytes;
size_t *outputBytes;
size_t weightsBytes; // Size of weights surface
} ANEKernel;
static Class g_ANEDesc, g_ANEInMem, g_ANEReq, g_ANEIO;
static bool g_ane_loaded = false;
static void ane_init(void) {
if (g_ane_loaded) return;
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel");
g_ANEReq = NSClassFromString(@"_ANERequest");
g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject");
g_ane_loaded = true;
}
static IOSurfaceRef ane_create_surface(size_t bytes) {
size_t aligned = ((bytes + 127) / 128) * 128;
return IOSurfaceCreate((__bridge CFDictionaryRef)@{
(id)kIOSurfaceWidth: @(aligned),
(id)kIOSurfaceHeight: @1,
(id)kIOSurfaceBytesPerElement: @1,
(id)kIOSurfaceBytesPerRow: @(aligned),
(id)kIOSurfaceAllocSize: @(aligned),
(id)kIOSurfacePixelFormat: @0
});
}
// Create an IOSurface specifically for dynamic weights.
// Uses the same 128-byte alignment as regular surfaces.
static IOSurfaceRef ane_create_weights_surface(size_t bytes) {
size_t aligned = ((bytes + 127) / 128) * 128;
if (aligned < 128) aligned = 128;
NSMutableDictionary *props = [NSMutableDictionary dictionaryWithObjectsAndKeys:
@(aligned), (id)kIOSurfaceWidth,
@1, (id)kIOSurfaceHeight,
@1, (id)kIOSurfaceBytesPerElement,
@(aligned), (id)kIOSurfaceBytesPerRow,
@(aligned), (id)kIOSurfaceAllocSize,
@0, (id)kIOSurfacePixelFormat,
nil];
// Enable global access for ANE hardware
[props setObject:@YES forKey:(id)kIOSurfaceIsGlobal];
return IOSurfaceCreate((__bridge CFDictionaryRef)props);
}
static ANEKernel *ane_compile_with_weights(NSData *milText, NSData *weightData,
int nInputs, size_t *inputSizes,
int nOutputs, size_t *outputSizes,
IOSurfaceRef weightsSurface) {
ane_init();
NSError *e = nil;
NSDictionary *wdict = nil;
if (weightData) {
wdict = @{@"@model_path/weights/weight.bin": @{@"offset": @0, @"data": weightData}};
}
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
g_ANEDesc, @selector(modelWithMILText:weights:optionsPlist:),
milText, wdict, nil);
if (!desc) return NULL;
id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(
g_ANEInMem, @selector(inMemoryModelWithDescriptor:), desc);
// Pre-populate temp dir with MIL + weights
id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
NSFileManager *fm = [NSFileManager defaultManager];
[fm createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"]
withIntermediateDirectories:YES attributes:nil error:nil];
[milText writeToFile:[td stringByAppendingPathComponent:@"model.mil"] atomically:YES];
if (weightData)
[weightData writeToFile:[td stringByAppendingPathComponent:@"weights/weight.bin"] atomically:YES];
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
mdl, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
fprintf(stderr, "ANE compile failed: %s\n", [[e description] UTF8String]);
[fm removeItemAtPath:td error:nil];
return NULL;
}
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
mdl, @selector(loadWithQoS:options:error:), 21, @{}, &e)) {
fprintf(stderr, "ANE load failed: %s\n", [[e description] UTF8String]);
[fm removeItemAtPath:td error:nil];
return NULL;
}
ANEKernel *k = calloc(1, sizeof(ANEKernel));
k->model = mdl;
k->tmpDir = td;
k->nInputs = nInputs;
k->nOutputs = nOutputs;
k->inputBytes = malloc(nInputs * sizeof(size_t));
k->outputBytes = malloc(nOutputs * sizeof(size_t));
memcpy(k->inputBytes, inputSizes, nInputs * sizeof(size_t));
memcpy(k->outputBytes, outputSizes, nOutputs * sizeof(size_t));
// Create IOSurfaces for inputs/outputs
k->ioInputs = malloc(nInputs * sizeof(IOSurfaceRef));
k->ioOutputs = malloc(nOutputs * sizeof(IOSurfaceRef));
for (int i = 0; i < nInputs; i++)
k->ioInputs[i] = ane_create_surface(inputSizes[i]);
for (int i = 0; i < nOutputs; i++)
k->ioOutputs[i] = ane_create_surface(outputSizes[i]);
// Handle optional weights surface for dynamic weight injection
id weightsBufferObj = nil;
if (weightsSurface) {
k->weightsSurface = weightsSurface;
CFRetain(weightsSurface);
k->weightsBytes = IOSurfaceGetAllocSize(weightsSurface);
weightsBufferObj = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
g_ANEIO, @selector(objectWithIOSurface:), weightsSurface);
k->weightsBuffer = weightsBufferObj;
}
// Build request with optional weights buffer
NSMutableArray *wIns = [NSMutableArray arrayWithCapacity:nInputs];
NSMutableArray *iIdx = [NSMutableArray arrayWithCapacity:nInputs];
for (int i = 0; i < nInputs; i++) {
[wIns addObject:((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
g_ANEIO, @selector(objectWithIOSurface:), k->ioInputs[i])];
[iIdx addObject:@(i)];
}
NSMutableArray *wOuts = [NSMutableArray arrayWithCapacity:nOutputs];
NSMutableArray *oIdx = [NSMutableArray arrayWithCapacity:nOutputs];
for (int i = 0; i < nOutputs; i++) {
[wOuts addObject:((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(
g_ANEIO, @selector(objectWithIOSurface:), k->ioOutputs[i])];
[oIdx addObject:@(i)];
}
k->request = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(
g_ANEReq, @selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
wIns, iIdx, wOuts, oIdx, weightsBufferObj, nil, @0);
return k;
}
static ANEKernel *ane_compile(NSData *milText, NSData *weightData,
int nInputs, size_t *inputSizes,
int nOutputs, size_t *outputSizes) {
return ane_compile_with_weights(milText, weightData, nInputs, inputSizes, nOutputs, outputSizes, NULL);
}
static int ane_load_weights(ANEKernel *k, const void *data, size_t bytes) {
if (!k || !k->weightsSurface) {
fprintf(stderr, "ane_load_weights: kernel has no weights surface\n");
return -1;
}
size_t surfaceSize = IOSurfaceGetAllocSize(k->weightsSurface);
if (bytes > surfaceSize) {
fprintf(stderr, "ane_load_weights: data size %zu exceeds surface size %zu\n",
bytes, surfaceSize);
return -1;
}
IOSurfaceLock(k->weightsSurface, 0, NULL);
memcpy(IOSurfaceGetBaseAddress(k->weightsSurface), data, bytes);
IOSurfaceUnlock(k->weightsSurface, 0, NULL);
return 0;
}
static void *ane_weights_lock(ANEKernel *k) {
if (!k || !k->weightsSurface) return NULL;
IOSurfaceLock(k->weightsSurface, 0, NULL);
return IOSurfaceGetBaseAddress(k->weightsSurface);
}
static void ane_weights_unlock(ANEKernel *k) {
if (!k || !k->weightsSurface) return;
IOSurfaceUnlock(k->weightsSurface, 0, NULL);
}
static void ane_write_input(ANEKernel *k, int idx, const void *data, size_t bytes) {
IOSurfaceLock(k->ioInputs[idx], 0, NULL);
memcpy(IOSurfaceGetBaseAddress(k->ioInputs[idx]), data, bytes);
IOSurfaceUnlock(k->ioInputs[idx], 0, NULL);
}
static void ane_read_output(ANEKernel *k, int idx, void *data, size_t bytes) {
IOSurfaceLock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
memcpy(data, IOSurfaceGetBaseAddress(k->ioOutputs[idx]), bytes);
IOSurfaceUnlock(k->ioOutputs[idx], kIOSurfaceLockReadOnly, NULL);
}
static bool ane_eval(ANEKernel *k) {
NSError *e = nil;
BOOL result = ((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
k->model, @selector(evaluateWithQoS:options:request:error:),
21, @{}, k->request, &e);
if (!result && e) {
fprintf(stderr, "ANE evaluation failed: %s\n", [[e localizedDescription] UTF8String]);
}
return result;
}
static void ane_free(ANEKernel *k) {
if (!k) return;
NSError *e = nil;
((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(
k->model, @selector(unloadWithQoS:error:), 21, &e);
for (int i = 0; i < k->nInputs; i++) CFRelease(k->ioInputs[i]);
for (int i = 0; i < k->nOutputs; i++) CFRelease(k->ioOutputs[i]);
if (k->weightsSurface) CFRelease(k->weightsSurface);
[[NSFileManager defaultManager] removeItemAtPath:k->tmpDir error:nil];
free(k->ioInputs); free(k->ioOutputs);
free(k->inputBytes); free(k->outputBytes);
free(k);
}