mirror of https://github.com/maderix/ANE.git
Fix benchmarks for macOS 26: replace compileModelAtURL with in-memory MIL pipeline
[MLModel compileModelAtURL:] fails on macOS 26, breaking inmem_bench, sram_bench, and sram_probe. This switches all three to generate MIL text and weight blobs programmatically in memory (matching the working inmem_peak.m approach), bypassing CoreML disk compilation entirely. - inmem_bench.m: replace CoreML compile + file read with genMIL/buildWeightBlob - sram_bench.m: switch from _ANEClient/_ANEModel to _ANEInMemoryModel API - sram_probe.m: same _ANEClient → _ANEInMemoryModel conversion Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
443194bca4
commit
d3d00307c0
|
|
@ -1,5 +1,4 @@
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
#import <CoreML/CoreML.h>
|
|
||||||
#import <objc/runtime.h>
|
#import <objc/runtime.h>
|
||||||
#import <objc/message.h>
|
#import <objc/message.h>
|
||||||
#import <dlfcn.h>
|
#import <dlfcn.h>
|
||||||
|
|
@ -9,18 +8,45 @@
|
||||||
static mach_timebase_info_data_t g_tb;
|
static mach_timebase_info_data_t g_tb;
|
||||||
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
||||||
|
|
||||||
|
static NSData *buildWeightBlob(int ch) {
|
||||||
|
NSUInteger wsize = (NSUInteger)ch * ch * 2;
|
||||||
|
NSUInteger total = 64 + 64 + wsize;
|
||||||
|
uint8_t *buf = calloc(total, 1);
|
||||||
|
buf[0] = 0x01; buf[4] = 0x02;
|
||||||
|
uint8_t *chunk = buf + 64;
|
||||||
|
chunk[0]=0xEF; chunk[1]=0xBE; chunk[2]=0xAD; chunk[3]=0xDE;
|
||||||
|
chunk[4]=0x01; chunk[10]=0x08;
|
||||||
|
uint16_t *fp16 = (uint16_t*)(chunk + 64);
|
||||||
|
for (NSUInteger j = 0; j < (NSUInteger)ch * ch; j++)
|
||||||
|
fp16[j] = (arc4random() & 0x03FF) | 0x2000;
|
||||||
|
return [NSData dataWithBytesNoCopy:buf length:total freeWhenDone:YES];
|
||||||
|
}
|
||||||
|
|
||||||
|
static NSString *genMIL(int ch, int sp) {
|
||||||
|
NSMutableString *m = [NSMutableString string];
|
||||||
|
[m appendString:@"program(1.3)\n[buildInfo = dict<string, string>({{\"coremlc-component-MIL\", \"3510.2.1\"}, {\"coremlc-version\", \"3505.4.1\"}, {\"coremltools-component-milinternal\", \"\"}, {\"coremltools-version\", \"9.0\"}})]\n{\n"];
|
||||||
|
[m appendFormat:@" func main<ios18>(tensor<fp32, [1, %d, 1, %d]> x) {\n", ch, sp];
|
||||||
|
[m appendString:
|
||||||
|
@" string c_pad_type = const()[name = string(\"c_pad_type\"), val = string(\"valid\")];\n"
|
||||||
|
@" tensor<int32, [2]> c_strides = const()[name = string(\"c_strides\"), val = tensor<int32, [2]>([1, 1])];\n"
|
||||||
|
@" tensor<int32, [4]> c_pad = const()[name = string(\"c_pad\"), val = tensor<int32, [4]>([0, 0, 0, 0])];\n"
|
||||||
|
@" tensor<int32, [2]> c_dilations = const()[name = string(\"c_dilations\"), val = tensor<int32, [2]>([1, 1])];\n"
|
||||||
|
@" int32 c_groups = const()[name = string(\"c_groups\"), val = int32(1)];\n"
|
||||||
|
@" string to_fp16 = const()[name = string(\"to_fp16\"), val = string(\"fp16\")];\n"];
|
||||||
|
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> x16 = cast(dtype = to_fp16, x = x)[name = string(\"cast_in\")];\n", ch, sp];
|
||||||
|
[m appendFormat:@" tensor<fp16, [%d, %d, 1, 1]> W = const()[name = string(\"W\"), val = tensor<fp16, [%d, %d, 1, 1]>(BLOBFILE(path = string(\"@model_path/weights/weight.bin\"), offset = uint64(64)))];\n", ch, ch, ch, ch];
|
||||||
|
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> y16 = conv(dilations = c_dilations, groups = c_groups, pad = c_pad, pad_type = c_pad_type, strides = c_strides, weight = W, x = x16)[name = string(\"conv\")];\n", ch, sp];
|
||||||
|
[m appendString:@" string to_fp32 = const()[name = string(\"to_fp32\"), val = string(\"fp32\")];\n"];
|
||||||
|
[m appendFormat:@" tensor<fp32, [1, %d, 1, %d]> y = cast(dtype = to_fp32, x = y16)[name = string(\"cast_out\")];\n", ch, sp];
|
||||||
|
[m appendString:@" } -> (y);\n}\n"];
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
double benchInMem(int ch, int sp) {
|
double benchInMem(int ch, int sp) {
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
NSError *e = nil;
|
NSError *e = nil;
|
||||||
NSString *path = [NSString stringWithFormat:@"/tmp/ane_sram_%dch_%dsp.mlpackage", ch, sp];
|
NSData *milData = [[genMIL(ch, sp) dataUsingEncoding:NSUTF8StringEncoding] copy];
|
||||||
NSURL *compiled = [MLModel compileModelAtURL:[NSURL fileURLWithPath:path] error:&e];
|
NSData *wb = buildWeightBlob(ch);
|
||||||
if (e) return -1;
|
|
||||||
|
|
||||||
NSData *milData = [[NSString stringWithContentsOfFile:
|
|
||||||
[[compiled path] stringByAppendingPathComponent:@"model.mil"]
|
|
||||||
encoding:NSUTF8StringEncoding error:nil] dataUsingEncoding:NSUTF8StringEncoding];
|
|
||||||
NSData *weightBlob = [NSData dataWithContentsOfFile:
|
|
||||||
[[compiled path] stringByAppendingPathComponent:@"weights/weight.bin"]];
|
|
||||||
|
|
||||||
Class Desc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
Class Desc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||||
Class IMM = NSClassFromString(@"_ANEInMemoryModel");
|
Class IMM = NSClassFromString(@"_ANEInMemoryModel");
|
||||||
|
|
@ -28,7 +54,7 @@ double benchInMem(int ch, int sp) {
|
||||||
Class AIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
Class AIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
||||||
|
|
||||||
NSDictionary *wdict = @{
|
NSDictionary *wdict = @{
|
||||||
@"@model_path/weights/weight.bin": @{@"offset": @64, @"data": weightBlob}
|
@"@model_path/weights/weight.bin": @{@"offset": @0, @"data": wb}
|
||||||
};
|
};
|
||||||
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
|
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
|
||||||
Desc, @selector(modelWithMILText:weights:optionsPlist:),
|
Desc, @selector(modelWithMILText:weights:optionsPlist:),
|
||||||
|
|
@ -43,7 +69,7 @@ double benchInMem(int ch, int sp) {
|
||||||
[fm createDirectoryAtPath:[tmpDir stringByAppendingPathComponent:@"weights"]
|
[fm createDirectoryAtPath:[tmpDir stringByAppendingPathComponent:@"weights"]
|
||||||
withIntermediateDirectories:YES attributes:nil error:nil];
|
withIntermediateDirectories:YES attributes:nil error:nil];
|
||||||
[milData writeToFile:[tmpDir stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
[milData writeToFile:[tmpDir stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
||||||
[weightBlob writeToFile:[tmpDir stringByAppendingPathComponent:@"weights/weight.bin"] atomically:YES];
|
[wb writeToFile:[tmpDir stringByAppendingPathComponent:@"weights/weight.bin"] atomically:YES];
|
||||||
|
|
||||||
BOOL ok = ((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
BOOL ok = ((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
||||||
model, @selector(compileWithQoS:options:error:), 21, @{}, &e);
|
model, @selector(compileWithQoS:options:error:), 21, @{}, &e);
|
||||||
|
|
|
||||||
107
sram_bench.m
107
sram_bench.m
|
|
@ -1,5 +1,4 @@
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
#import <CoreML/CoreML.h>
|
|
||||||
#import <objc/runtime.h>
|
#import <objc/runtime.h>
|
||||||
#import <objc/message.h>
|
#import <objc/message.h>
|
||||||
#import <dlfcn.h>
|
#import <dlfcn.h>
|
||||||
|
|
@ -8,25 +7,79 @@
|
||||||
|
|
||||||
static mach_timebase_info_data_t g_tb;
|
static mach_timebase_info_data_t g_tb;
|
||||||
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
||||||
static id g_client;
|
|
||||||
static Class AM, AR, AIO;
|
|
||||||
|
|
||||||
double bench(const char *path, int ch, int sp) {
|
static NSData *buildWeightBlob(int ch) {
|
||||||
|
NSUInteger wsize = (NSUInteger)ch * ch * 2;
|
||||||
|
NSUInteger total = 64 + 64 + wsize;
|
||||||
|
uint8_t *buf = calloc(total, 1);
|
||||||
|
buf[0] = 0x01; buf[4] = 0x02;
|
||||||
|
uint8_t *chunk = buf + 64;
|
||||||
|
chunk[0]=0xEF; chunk[1]=0xBE; chunk[2]=0xAD; chunk[3]=0xDE;
|
||||||
|
chunk[4]=0x01; chunk[10]=0x08;
|
||||||
|
uint16_t *fp16 = (uint16_t*)(chunk + 64);
|
||||||
|
for (NSUInteger j = 0; j < (NSUInteger)ch * ch; j++)
|
||||||
|
fp16[j] = (arc4random() & 0x03FF) | 0x2000;
|
||||||
|
return [NSData dataWithBytesNoCopy:buf length:total freeWhenDone:YES];
|
||||||
|
}
|
||||||
|
|
||||||
|
static NSString *genMIL(int ch, int sp) {
|
||||||
|
NSMutableString *m = [NSMutableString string];
|
||||||
|
[m appendString:@"program(1.3)\n[buildInfo = dict<string, string>({{\"coremlc-component-MIL\", \"3510.2.1\"}, {\"coremlc-version\", \"3505.4.1\"}, {\"coremltools-component-milinternal\", \"\"}, {\"coremltools-version\", \"9.0\"}})]\n{\n"];
|
||||||
|
[m appendFormat:@" func main<ios18>(tensor<fp32, [1, %d, 1, %d]> x) {\n", ch, sp];
|
||||||
|
[m appendString:
|
||||||
|
@" string c_pad_type = const()[name = string(\"c_pad_type\"), val = string(\"valid\")];\n"
|
||||||
|
@" tensor<int32, [2]> c_strides = const()[name = string(\"c_strides\"), val = tensor<int32, [2]>([1, 1])];\n"
|
||||||
|
@" tensor<int32, [4]> c_pad = const()[name = string(\"c_pad\"), val = tensor<int32, [4]>([0, 0, 0, 0])];\n"
|
||||||
|
@" tensor<int32, [2]> c_dilations = const()[name = string(\"c_dilations\"), val = tensor<int32, [2]>([1, 1])];\n"
|
||||||
|
@" int32 c_groups = const()[name = string(\"c_groups\"), val = int32(1)];\n"
|
||||||
|
@" string to_fp16 = const()[name = string(\"to_fp16\"), val = string(\"fp16\")];\n"];
|
||||||
|
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> x16 = cast(dtype = to_fp16, x = x)[name = string(\"cast_in\")];\n", ch, sp];
|
||||||
|
[m appendFormat:@" tensor<fp16, [%d, %d, 1, 1]> W = const()[name = string(\"W\"), val = tensor<fp16, [%d, %d, 1, 1]>(BLOBFILE(path = string(\"@model_path/weights/weight.bin\"), offset = uint64(64)))];\n", ch, ch, ch, ch];
|
||||||
|
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> y16 = conv(dilations = c_dilations, groups = c_groups, pad = c_pad, pad_type = c_pad_type, strides = c_strides, weight = W, x = x16)[name = string(\"conv\")];\n", ch, sp];
|
||||||
|
[m appendString:@" string to_fp32 = const()[name = string(\"to_fp32\"), val = string(\"fp32\")];\n"];
|
||||||
|
[m appendFormat:@" tensor<fp32, [1, %d, 1, %d]> y = cast(dtype = to_fp32, x = y16)[name = string(\"cast_out\")];\n", ch, sp];
|
||||||
|
[m appendString:@" } -> (y);\n}\n"];
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
double bench(int ch, int sp) {
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
NSError *e = nil;
|
NSError *e = nil;
|
||||||
NSURL *compiled = [MLModel compileModelAtURL:
|
NSData *milData = [[genMIL(ch, sp) dataUsingEncoding:NSUTF8StringEncoding] copy];
|
||||||
[NSURL fileURLWithPath:[NSString stringWithUTF8String:path]] error:&e];
|
NSData *wb = buildWeightBlob(ch);
|
||||||
if (e) return -1;
|
|
||||||
id model = ((id(*)(Class,SEL,id,id))objc_msgSend)(AM, @selector(modelAtURL:key:), compiled, @"s");
|
|
||||||
BOOL ok = ((BOOL(*)(id,SEL,id,id,NSUInteger,NSError**))objc_msgSend)(
|
|
||||||
g_client, @selector(compileModel:options:qos:error:), model,
|
|
||||||
@{@"kANEFModelType":@"kANEFModelMIL",@"kANEFNetPlistFilenameKey":@"model.mil"}, 21, &e);
|
|
||||||
if (!ok) return -2;
|
|
||||||
ok = ((BOOL(*)(id,SEL,id,id,NSUInteger,NSError**))objc_msgSend)(
|
|
||||||
g_client, @selector(loadModel:options:qos:error:), model, @{}, 21, &e);
|
|
||||||
if (!ok) return -3;
|
|
||||||
|
|
||||||
NSUInteger bytes = ch * sp * 4; // FP32 input
|
Class D = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||||
|
Class I = NSClassFromString(@"_ANEInMemoryModel");
|
||||||
|
Class AR = NSClassFromString(@"_ANERequest");
|
||||||
|
Class AIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
||||||
|
|
||||||
|
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
|
||||||
|
D, @selector(modelWithMILText:weights:optionsPlist:),
|
||||||
|
milData, @{@"@model_path/weights/weight.bin": @{@"offset": @0, @"data": wb}}, nil);
|
||||||
|
if (!desc) return -2;
|
||||||
|
|
||||||
|
id model = ((id(*)(Class,SEL,id))objc_msgSend)(
|
||||||
|
I, @selector(inMemoryModelWithDescriptor:), desc);
|
||||||
|
if (!model) return -3;
|
||||||
|
|
||||||
|
id hexId = ((id(*)(id,SEL))objc_msgSend)(model, @selector(hexStringIdentifier));
|
||||||
|
NSString *tmpDir = [NSTemporaryDirectory() stringByAppendingPathComponent:hexId];
|
||||||
|
NSFileManager *fm = [NSFileManager defaultManager];
|
||||||
|
[fm createDirectoryAtPath:[tmpDir stringByAppendingPathComponent:@"weights"]
|
||||||
|
withIntermediateDirectories:YES attributes:nil error:nil];
|
||||||
|
[milData writeToFile:[tmpDir stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
||||||
|
[wb writeToFile:[tmpDir stringByAppendingPathComponent:@"weights/weight.bin"] atomically:YES];
|
||||||
|
|
||||||
|
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
||||||
|
model, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
|
||||||
|
[fm removeItemAtPath:tmpDir error:nil]; return -4;
|
||||||
|
}
|
||||||
|
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
||||||
|
model, @selector(loadWithQoS:options:error:), 21, @{}, &e)) {
|
||||||
|
[fm removeItemAtPath:tmpDir error:nil]; return -5;
|
||||||
|
}
|
||||||
|
|
||||||
|
NSUInteger bytes = ch * sp * 4;
|
||||||
IOSurfaceRef ioIn = IOSurfaceCreate((__bridge CFDictionaryRef)@{
|
IOSurfaceRef ioIn = IOSurfaceCreate((__bridge CFDictionaryRef)@{
|
||||||
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
|
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
|
||||||
(id)kIOSurfaceBytesPerElement:@1,(id)kIOSurfaceBytesPerRow:@(bytes),
|
(id)kIOSurfaceBytesPerElement:@1,(id)kIOSurfaceBytesPerRow:@(bytes),
|
||||||
|
|
@ -35,7 +88,6 @@ double bench(const char *path, int ch, int sp) {
|
||||||
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
|
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
|
||||||
(id)kIOSurfaceBytesPerElement:@1,(id)kIOSurfaceBytesPerRow:@(bytes),
|
(id)kIOSurfaceBytesPerElement:@1,(id)kIOSurfaceBytesPerRow:@(bytes),
|
||||||
(id)kIOSurfaceAllocSize:@(bytes),(id)kIOSurfacePixelFormat:@0});
|
(id)kIOSurfaceAllocSize:@(bytes),(id)kIOSurfacePixelFormat:@0});
|
||||||
|
|
||||||
id wIn = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(AIO, @selector(objectWithIOSurface:), ioIn);
|
id wIn = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(AIO, @selector(objectWithIOSurface:), ioIn);
|
||||||
id wOut = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(AIO, @selector(objectWithIOSurface:), ioOut);
|
id wOut = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(AIO, @selector(objectWithIOSurface:), ioOut);
|
||||||
id req = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(AR,
|
id req = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(AR,
|
||||||
|
|
@ -43,19 +95,20 @@ double bench(const char *path, int ch, int sp) {
|
||||||
@[wIn], @[@0], @[wOut], @[@0], nil, nil, @0);
|
@[wIn], @[@0], @[wOut], @[@0], nil, nil, @0);
|
||||||
|
|
||||||
for (int i = 0; i < 5; i++)
|
for (int i = 0; i < 5; i++)
|
||||||
((BOOL(*)(id,SEL,id,id,id,NSUInteger,NSError**))objc_msgSend)(
|
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
|
||||||
g_client, @selector(evaluateWithModel:options:request:qos:error:), model, @{}, req, 21, &e);
|
model, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
|
||||||
|
|
||||||
int iters = 30;
|
int iters = 30;
|
||||||
uint64_t t0 = mach_absolute_time();
|
uint64_t t0 = mach_absolute_time();
|
||||||
for (int i = 0; i < iters; i++)
|
for (int i = 0; i < iters; i++)
|
||||||
((BOOL(*)(id,SEL,id,id,id,NSUInteger,NSError**))objc_msgSend)(
|
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
|
||||||
g_client, @selector(evaluateWithModel:options:request:qos:error:), model, @{}, req, 21, &e);
|
model, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
|
||||||
double ms = ticksToMs(mach_absolute_time() - t0) / iters;
|
double ms = ticksToMs(mach_absolute_time() - t0) / iters;
|
||||||
|
|
||||||
((void(*)(id,SEL,id,id,NSUInteger,NSError**))objc_msgSend)(
|
((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(
|
||||||
g_client, @selector(unloadModel:options:qos:error:), model, @{}, 21, &e);
|
model, @selector(unloadWithQoS:error:), 21, &e);
|
||||||
CFRelease(ioIn); CFRelease(ioOut);
|
CFRelease(ioIn); CFRelease(ioOut);
|
||||||
|
[fm removeItemAtPath:tmpDir error:nil];
|
||||||
return ms;
|
return ms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -63,10 +116,6 @@ double bench(const char *path, int ch, int sp) {
|
||||||
int main() {
|
int main() {
|
||||||
mach_timebase_info(&g_tb);
|
mach_timebase_info(&g_tb);
|
||||||
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
|
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
|
||||||
g_client = [NSClassFromString(@"_ANEClient") performSelector:@selector(sharedConnection)];
|
|
||||||
AM = NSClassFromString(@"_ANEModel");
|
|
||||||
AR = NSClassFromString(@"_ANERequest");
|
|
||||||
AIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
|
||||||
|
|
||||||
printf("=== ANE SRAM Probe: 1x1 Conv with Increasing Weight Size ===\n\n");
|
printf("=== ANE SRAM Probe: 1x1 Conv with Increasing Weight Size ===\n\n");
|
||||||
printf("%-25s %8s %8s %8s %10s %8s\n", "Config", "W (MB)", "Act(MB)", "Tot(MB)", "ms/eval", "TFLOPS");
|
printf("%-25s %8s %8s %8s %10s %8s\n", "Config", "W (MB)", "Act(MB)", "Tot(MB)", "ms/eval", "TFLOPS");
|
||||||
|
|
@ -82,9 +131,7 @@ int main() {
|
||||||
double tot = w_mb + 2 * a_mb;
|
double tot = w_mb + 2 * a_mb;
|
||||||
double gflop = 2.0 * ch * ch * sp / 1e9;
|
double gflop = 2.0 * ch * ch * sp / 1e9;
|
||||||
|
|
||||||
char path[256];
|
double ms = bench(ch, sp);
|
||||||
snprintf(path, sizeof(path), "/tmp/ane_sram_%dch_%dsp.mlpackage", ch, sp);
|
|
||||||
double ms = bench(path, ch, sp);
|
|
||||||
|
|
||||||
double tflops = (ms > 0) ? gflop / ms : -1;
|
double tflops = (ms > 0) ? gflop / ms : -1;
|
||||||
char label[64];
|
char label[64];
|
||||||
|
|
|
||||||
104
sram_probe.m
104
sram_probe.m
|
|
@ -1,5 +1,4 @@
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
#import <CoreML/CoreML.h>
|
|
||||||
#import <objc/runtime.h>
|
#import <objc/runtime.h>
|
||||||
#import <objc/message.h>
|
#import <objc/message.h>
|
||||||
#import <dlfcn.h>
|
#import <dlfcn.h>
|
||||||
|
|
@ -8,20 +7,78 @@
|
||||||
|
|
||||||
static mach_timebase_info_data_t g_tb;
|
static mach_timebase_info_data_t g_tb;
|
||||||
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
static double ticksToMs(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
||||||
static id g_client; static Class AM, AR, AIO;
|
|
||||||
|
|
||||||
double bench(const char *path, int ch, int sp) {
|
static NSData *buildWeightBlob(int ch) {
|
||||||
|
NSUInteger wsize = (NSUInteger)ch * ch * 2;
|
||||||
|
NSUInteger total = 64 + 64 + wsize;
|
||||||
|
uint8_t *buf = calloc(total, 1);
|
||||||
|
buf[0] = 0x01; buf[4] = 0x02;
|
||||||
|
uint8_t *chunk = buf + 64;
|
||||||
|
chunk[0]=0xEF; chunk[1]=0xBE; chunk[2]=0xAD; chunk[3]=0xDE;
|
||||||
|
chunk[4]=0x01; chunk[10]=0x08;
|
||||||
|
uint16_t *fp16 = (uint16_t*)(chunk + 64);
|
||||||
|
for (NSUInteger j = 0; j < (NSUInteger)ch * ch; j++)
|
||||||
|
fp16[j] = (arc4random() & 0x03FF) | 0x2000;
|
||||||
|
return [NSData dataWithBytesNoCopy:buf length:total freeWhenDone:YES];
|
||||||
|
}
|
||||||
|
|
||||||
|
static NSString *genMIL(int ch, int sp) {
|
||||||
|
NSMutableString *m = [NSMutableString string];
|
||||||
|
[m appendString:@"program(1.3)\n[buildInfo = dict<string, string>({{\"coremlc-component-MIL\", \"3510.2.1\"}, {\"coremlc-version\", \"3505.4.1\"}, {\"coremltools-component-milinternal\", \"\"}, {\"coremltools-version\", \"9.0\"}})]\n{\n"];
|
||||||
|
[m appendFormat:@" func main<ios18>(tensor<fp32, [1, %d, 1, %d]> x) {\n", ch, sp];
|
||||||
|
[m appendString:
|
||||||
|
@" string c_pad_type = const()[name = string(\"c_pad_type\"), val = string(\"valid\")];\n"
|
||||||
|
@" tensor<int32, [2]> c_strides = const()[name = string(\"c_strides\"), val = tensor<int32, [2]>([1, 1])];\n"
|
||||||
|
@" tensor<int32, [4]> c_pad = const()[name = string(\"c_pad\"), val = tensor<int32, [4]>([0, 0, 0, 0])];\n"
|
||||||
|
@" tensor<int32, [2]> c_dilations = const()[name = string(\"c_dilations\"), val = tensor<int32, [2]>([1, 1])];\n"
|
||||||
|
@" int32 c_groups = const()[name = string(\"c_groups\"), val = int32(1)];\n"
|
||||||
|
@" string to_fp16 = const()[name = string(\"to_fp16\"), val = string(\"fp16\")];\n"];
|
||||||
|
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> x16 = cast(dtype = to_fp16, x = x)[name = string(\"cast_in\")];\n", ch, sp];
|
||||||
|
[m appendFormat:@" tensor<fp16, [%d, %d, 1, 1]> W = const()[name = string(\"W\"), val = tensor<fp16, [%d, %d, 1, 1]>(BLOBFILE(path = string(\"@model_path/weights/weight.bin\"), offset = uint64(64)))];\n", ch, ch, ch, ch];
|
||||||
|
[m appendFormat:@" tensor<fp16, [1, %d, 1, %d]> y16 = conv(dilations = c_dilations, groups = c_groups, pad = c_pad, pad_type = c_pad_type, strides = c_strides, weight = W, x = x16)[name = string(\"conv\")];\n", ch, sp];
|
||||||
|
[m appendString:@" string to_fp32 = const()[name = string(\"to_fp32\"), val = string(\"fp32\")];\n"];
|
||||||
|
[m appendFormat:@" tensor<fp32, [1, %d, 1, %d]> y = cast(dtype = to_fp32, x = y16)[name = string(\"cast_out\")];\n", ch, sp];
|
||||||
|
[m appendString:@" } -> (y);\n}\n"];
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
double bench(int ch, int sp) {
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
NSError *e = nil;
|
NSError *e = nil;
|
||||||
NSURL *compiled = [MLModel compileModelAtURL:
|
NSData *milData = [[genMIL(ch, sp) dataUsingEncoding:NSUTF8StringEncoding] copy];
|
||||||
[NSURL fileURLWithPath:[NSString stringWithUTF8String:path]] error:&e];
|
NSData *wb = buildWeightBlob(ch);
|
||||||
if (e) return -1;
|
|
||||||
id model = ((id(*)(Class,SEL,id,id))objc_msgSend)(AM, @selector(modelAtURL:key:), compiled, @"s");
|
Class D = NSClassFromString(@"_ANEInMemoryModelDescriptor");
|
||||||
((BOOL(*)(id,SEL,id,id,NSUInteger,NSError**))objc_msgSend)(
|
Class I = NSClassFromString(@"_ANEInMemoryModel");
|
||||||
g_client, @selector(compileModel:options:qos:error:), model,
|
Class AR = NSClassFromString(@"_ANERequest");
|
||||||
@{@"kANEFModelType":@"kANEFModelMIL",@"kANEFNetPlistFilenameKey":@"model.mil"}, 21, &e);
|
Class AIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
||||||
((BOOL(*)(id,SEL,id,id,NSUInteger,NSError**))objc_msgSend)(
|
|
||||||
g_client, @selector(loadModel:options:qos:error:), model, @{}, 21, &e);
|
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(
|
||||||
|
D, @selector(modelWithMILText:weights:optionsPlist:),
|
||||||
|
milData, @{@"@model_path/weights/weight.bin": @{@"offset": @0, @"data": wb}}, nil);
|
||||||
|
if (!desc) return -2;
|
||||||
|
|
||||||
|
id model = ((id(*)(Class,SEL,id))objc_msgSend)(
|
||||||
|
I, @selector(inMemoryModelWithDescriptor:), desc);
|
||||||
|
if (!model) return -3;
|
||||||
|
|
||||||
|
id hexId = ((id(*)(id,SEL))objc_msgSend)(model, @selector(hexStringIdentifier));
|
||||||
|
NSString *tmpDir = [NSTemporaryDirectory() stringByAppendingPathComponent:hexId];
|
||||||
|
NSFileManager *fm = [NSFileManager defaultManager];
|
||||||
|
[fm createDirectoryAtPath:[tmpDir stringByAppendingPathComponent:@"weights"]
|
||||||
|
withIntermediateDirectories:YES attributes:nil error:nil];
|
||||||
|
[milData writeToFile:[tmpDir stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
||||||
|
[wb writeToFile:[tmpDir stringByAppendingPathComponent:@"weights/weight.bin"] atomically:YES];
|
||||||
|
|
||||||
|
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
||||||
|
model, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
|
||||||
|
[fm removeItemAtPath:tmpDir error:nil]; return -4;
|
||||||
|
}
|
||||||
|
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(
|
||||||
|
model, @selector(loadWithQoS:options:error:), 21, @{}, &e)) {
|
||||||
|
[fm removeItemAtPath:tmpDir error:nil]; return -5;
|
||||||
|
}
|
||||||
|
|
||||||
NSUInteger bytes = ch * sp * 4;
|
NSUInteger bytes = ch * sp * 4;
|
||||||
IOSurfaceRef ioIn = IOSurfaceCreate((__bridge CFDictionaryRef)@{
|
IOSurfaceRef ioIn = IOSurfaceCreate((__bridge CFDictionaryRef)@{
|
||||||
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
|
(id)kIOSurfaceWidth:@(bytes),(id)kIOSurfaceHeight:@1,
|
||||||
|
|
@ -36,18 +93,22 @@ double bench(const char *path, int ch, int sp) {
|
||||||
id req = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(AR,
|
id req = ((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(AR,
|
||||||
@selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
|
@selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
|
||||||
@[wIn], @[@0], @[wOut], @[@0], nil, nil, @0);
|
@[wIn], @[@0], @[wOut], @[@0], nil, nil, @0);
|
||||||
|
|
||||||
for (int i = 0; i < 5; i++)
|
for (int i = 0; i < 5; i++)
|
||||||
((BOOL(*)(id,SEL,id,id,id,NSUInteger,NSError**))objc_msgSend)(
|
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
|
||||||
g_client, @selector(evaluateWithModel:options:request:qos:error:), model, @{}, req, 21, &e);
|
model, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
|
||||||
|
|
||||||
int iters = 50;
|
int iters = 50;
|
||||||
uint64_t t0 = mach_absolute_time();
|
uint64_t t0 = mach_absolute_time();
|
||||||
for (int i = 0; i < iters; i++)
|
for (int i = 0; i < iters; i++)
|
||||||
((BOOL(*)(id,SEL,id,id,id,NSUInteger,NSError**))objc_msgSend)(
|
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(
|
||||||
g_client, @selector(evaluateWithModel:options:request:qos:error:), model, @{}, req, 21, &e);
|
model, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
|
||||||
double ms = ticksToMs(mach_absolute_time() - t0) / iters;
|
double ms = ticksToMs(mach_absolute_time() - t0) / iters;
|
||||||
((void(*)(id,SEL,id,id,NSUInteger,NSError**))objc_msgSend)(
|
|
||||||
g_client, @selector(unloadModel:options:qos:error:), model, @{}, 21, &e);
|
((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(
|
||||||
|
model, @selector(unloadWithQoS:error:), 21, &e);
|
||||||
CFRelease(ioIn); CFRelease(ioOut);
|
CFRelease(ioIn); CFRelease(ioOut);
|
||||||
|
[fm removeItemAtPath:tmpDir error:nil];
|
||||||
return ms;
|
return ms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -55,9 +116,6 @@ double bench(const char *path, int ch, int sp) {
|
||||||
int main() {
|
int main() {
|
||||||
mach_timebase_info(&g_tb);
|
mach_timebase_info(&g_tb);
|
||||||
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
|
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
|
||||||
g_client = [NSClassFromString(@"_ANEClient") performSelector:@selector(sharedConnection)];
|
|
||||||
AM = NSClassFromString(@"_ANEModel"); AR = NSClassFromString(@"_ANERequest");
|
|
||||||
AIO = NSClassFromString(@"_ANEIOSurfaceObject");
|
|
||||||
|
|
||||||
printf("=== ANE SRAM Fine Probe (weights only vary, spatial=64) ===\n\n");
|
printf("=== ANE SRAM Fine Probe (weights only vary, spatial=64) ===\n\n");
|
||||||
printf("%-12s %8s %10s %8s %12s\n", "Channels", "W (MB)", "ms/eval", "TFLOPS", "GFLOPS/MB");
|
printf("%-12s %8s %10s %8s %12s\n", "Channels", "W (MB)", "ms/eval", "TFLOPS", "GFLOPS/MB");
|
||||||
|
|
@ -70,9 +128,7 @@ int main() {
|
||||||
int ch = chs[i], sp = sps[i];
|
int ch = chs[i], sp = sps[i];
|
||||||
double w_mb = (double)ch * ch * 2 / 1024 / 1024;
|
double w_mb = (double)ch * ch * 2 / 1024 / 1024;
|
||||||
double gf = 2.0 * ch * ch * sp / 1e9;
|
double gf = 2.0 * ch * ch * sp / 1e9;
|
||||||
char path[256];
|
double ms = bench(ch, sp);
|
||||||
snprintf(path, sizeof(path), "/tmp/ane_sram_%dch_%dsp.mlpackage", ch, sp);
|
|
||||||
double ms = bench(path, ch, sp);
|
|
||||||
double tf = (ms > 0) ? gf / ms : 0;
|
double tf = (ms > 0) ? gf / ms : 0;
|
||||||
double eff = (ms > 0) ? tf * 1000 / w_mb : 0;
|
double eff = (ms > 0) ? tf * 1000 / w_mb : 0;
|
||||||
printf("%6d ch %7.1f %8.3f ms %7.2f %10.1f %s\n",
|
printf("%6d ch %7.1f %8.3f ms %7.2f %10.1f %s\n",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue