mirror of https://github.com/maderix/ANE.git
149 lines
7.2 KiB
Objective-C
149 lines
7.2 KiB
Objective-C
// test_bench_paths.m — Benchmark ANE evaluation paths at production dimensions
|
|
// Compares: standard, RT, processRequest, and ane_eval_rt wrapper
|
|
#import <Foundation/Foundation.h>
|
|
#import <objc/runtime.h>
|
|
#import <objc/message.h>
|
|
#import <dlfcn.h>
|
|
#import <IOSurface/IOSurface.h>
|
|
#import <mach/mach_time.h>
|
|
|
|
static mach_timebase_info_data_t g_tb;
|
|
static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; }
|
|
static int g_fp16_io = 0;
|
|
|
|
#include "ane_runtime.h"
|
|
|
|
static NSString *gen_bench_conv(int ch, int sp) {
|
|
return [NSString stringWithFormat:
|
|
@"program(1.0)\n[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{\"coremlc-version\", \"3505.4.1\"}})]\n{\n"
|
|
" func main<ios16>(tensor<fp16, [1, %d, 1, %d]> x) {\n"
|
|
" tensor<string, []> pt = const()[name=tensor<string, []>(\"pt\"), val=tensor<string, []>(\"valid\")];\n"
|
|
" tensor<int32, [2]> st = const()[name=tensor<string, []>(\"st\"), val=tensor<int32, [2]>([1,1])];\n"
|
|
" tensor<int32, [4]> pd = const()[name=tensor<string, []>(\"pd\"), val=tensor<int32, [4]>([0,0,0,0])];\n"
|
|
" tensor<int32, [2]> dl = const()[name=tensor<string, []>(\"dl\"), val=tensor<int32, [2]>([1,1])];\n"
|
|
" tensor<int32, []> gr = const()[name=tensor<string, []>(\"gr\"), val=tensor<int32, []>(1)];\n"
|
|
" tensor<fp16, [%d,%d,1,1]> W = const()[name=tensor<string, []>(\"W\"), "
|
|
"val=tensor<fp16, [%d,%d,1,1]>(BLOBFILE(path=tensor<string, []>(\"@model_path/weights/weight.bin\"), offset=tensor<uint64, []>(64)))];\n"
|
|
" tensor<fp16, [1,%d,1,%d]> y = conv(dilations=dl,groups=gr,pad=pd,pad_type=pt,strides=st,weight=W,x=x)"
|
|
"[name=tensor<string, []>(\"conv\")];\n"
|
|
" } -> (y);\n}\n", ch, sp, ch, ch, ch, ch, ch, sp];
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
@autoreleasepool {
|
|
setbuf(stdout, NULL);
|
|
mach_timebase_info(&g_tb);
|
|
|
|
printf("=== ANE Eval Path Benchmark (production dimensions) ===\n\n");
|
|
|
|
ane_init();
|
|
if (!g_ane_ok) { printf("FATAL: ANE not available\n"); return 1; }
|
|
|
|
typedef struct { int ch; int sp; const char *label; } TestConfig;
|
|
TestConfig configs[] = {
|
|
{64, 32, "64x32 (test)"},
|
|
{128, 64, "128x64 (small)"},
|
|
{256, 64, "256x64 (med)"},
|
|
{768, 256, "768x256 (prod)"},
|
|
{512, 64, "512x64 (large)"},
|
|
};
|
|
int nconfigs = sizeof(configs) / sizeof(configs[0]);
|
|
int WARMUP = 20, ITERS = 200;
|
|
|
|
id client = g_ane_client;
|
|
printf(" Client: %s | Warmup: %d | Iters: %d\n\n", client ? "OK" : "NO", WARMUP, ITERS);
|
|
printf("%-18s %10s %14s %14s %14s\n", "Config", "Standard", "RT", "ProcReq", "ane_eval_rt");
|
|
printf("%-18s %10s %14s %14s %14s\n", "------", "--------", "--", "-------", "-----------");
|
|
|
|
for (int ci = 0; ci < nconfigs; ci++) {
|
|
int CH = configs[ci].ch, SP = configs[ci].sp;
|
|
|
|
_Float16 *w = (_Float16*)calloc(CH*CH, sizeof(_Float16));
|
|
for (int i = 0; i < CH; i++) w[i*CH+i] = (_Float16)0.5f;
|
|
int ws = CH*CH*2, tot = 128+ws;
|
|
uint8_t *blob = (uint8_t*)calloc(tot, 1);
|
|
blob[0]=1; blob[4]=2; blob[64]=0xEF; blob[65]=0xBE; blob[66]=0xAD; blob[67]=0xDE; blob[68]=1;
|
|
*(uint32_t*)(blob+72)=ws; *(uint32_t*)(blob+80)=128;
|
|
memcpy(blob+128, w, ws);
|
|
NSData *wdata = [NSData dataWithBytesNoCopy:blob length:tot freeWhenDone:YES];
|
|
free(w);
|
|
|
|
g_fp16_io = 1;
|
|
NSString *mil = gen_bench_conv(CH, SP);
|
|
NSData *milData = [mil dataUsingEncoding:NSUTF8StringEncoding];
|
|
size_t ioBytes = CH * SP * 2;
|
|
ANEKernel *k = ane_compile(milData, wdata, 1, &ioBytes, 1, &ioBytes);
|
|
if (!k) { printf("%-18s (compile failed)\n", configs[ci].label); continue; }
|
|
|
|
IOSurfaceLock(k->ioInputs[0], 0, NULL);
|
|
_Float16 *inp = (_Float16*)IOSurfaceGetBaseAddress(k->ioInputs[0]);
|
|
for (int i = 0; i < CH*SP; i++) inp[i] = (_Float16)1.0f;
|
|
IOSurfaceUnlock(k->ioInputs[0], 0, NULL);
|
|
|
|
NSError *e = nil;
|
|
|
|
for (int i = 0; i < WARMUP; i++) ane_eval(k);
|
|
uint64_t t0 = mach_absolute_time();
|
|
for (int i = 0; i < ITERS; i++) ane_eval(k);
|
|
double std_ms = tb_ms(mach_absolute_time() - t0) / ITERS;
|
|
|
|
double rt_ms = -1;
|
|
if (client) {
|
|
@try {
|
|
for (int i = 0; i < WARMUP; i++)
|
|
((BOOL(*)(id,SEL,id,id,id,NSError**))objc_msgSend)(
|
|
client, @selector(evaluateRealTimeWithModel:options:request:error:),
|
|
k->model, @{}, k->request, &e);
|
|
t0 = mach_absolute_time();
|
|
for (int i = 0; i < ITERS; i++)
|
|
((BOOL(*)(id,SEL,id,id,id,NSError**))objc_msgSend)(
|
|
client, @selector(evaluateRealTimeWithModel:options:request:error:),
|
|
k->model, @{}, k->request, &e);
|
|
rt_ms = tb_ms(mach_absolute_time() - t0) / ITERS;
|
|
} @catch (NSException *ex) { rt_ms = -1; }
|
|
}
|
|
|
|
double proc_ms = -1;
|
|
@try {
|
|
id prog = [k->model valueForKey:@"program"];
|
|
id hexId = [k->model valueForKey:@"hexStringIdentifier"];
|
|
SEL procSel = @selector(processRequest:model:qos:qIndex:modelStringID:options:returnValue:error:);
|
|
if (prog && [prog respondsToSelector:procSel]) {
|
|
for (int i = 0; i < WARMUP; i++) {
|
|
BOOL rv = NO;
|
|
((BOOL(*)(id,SEL,id,id,unsigned int,int,id,id,BOOL*,NSError**))objc_msgSend)(
|
|
prog, procSel, k->request, k->model, 21, 0, hexId, @{}, &rv, &e);
|
|
}
|
|
t0 = mach_absolute_time();
|
|
for (int i = 0; i < ITERS; i++) {
|
|
BOOL rv = NO;
|
|
((BOOL(*)(id,SEL,id,id,unsigned int,int,id,id,BOOL*,NSError**))objc_msgSend)(
|
|
prog, procSel, k->request, k->model, 21, 0, hexId, @{}, &rv, &e);
|
|
}
|
|
proc_ms = tb_ms(mach_absolute_time() - t0) / ITERS;
|
|
}
|
|
} @catch (NSException *ex) { (void)ex; }
|
|
|
|
double wrap_ms = -1;
|
|
@try {
|
|
for (int i = 0; i < WARMUP; i++) ane_eval_rt(k);
|
|
t0 = mach_absolute_time();
|
|
for (int i = 0; i < ITERS; i++) ane_eval_rt(k);
|
|
wrap_ms = tb_ms(mach_absolute_time() - t0) / ITERS;
|
|
} @catch (NSException *ex) { wrap_ms = -1; }
|
|
|
|
char s[32], r[32], p[32], w2[32];
|
|
snprintf(s, 32, "%.3f ms", std_ms);
|
|
snprintf(r, 32, rt_ms >= 0 ? "%.3f (%.1fx)" : "N/A", rt_ms, std_ms/rt_ms);
|
|
snprintf(p, 32, proc_ms >= 0 ? "%.3f (%.1fx)" : "N/A", proc_ms, std_ms/proc_ms);
|
|
snprintf(w2, 32, wrap_ms >= 0 ? "%.3f (%.1fx)" : "N/A", wrap_ms, std_ms/wrap_ms);
|
|
printf("%-18s %10s %14s %14s %14s\n", configs[ci].label, s, r, p, w2);
|
|
|
|
ane_free(k);
|
|
}
|
|
|
|
printf("\n=== Benchmark complete ===\n");
|
|
}
|
|
return 0;
|
|
}
|