// test_bench_paths.m — Benchmark ANE evaluation paths at production dimensions // Compares: standard, RT, processRequest, and ane_eval_rt wrapper #import #import #import #import #import #import static mach_timebase_info_data_t g_tb; static double tb_ms(uint64_t t) { return (double)t * g_tb.numer / g_tb.denom / 1e6; } static int g_fp16_io = 0; #include "ane_runtime.h" static NSString *gen_bench_conv(int ch, int sp) { return [NSString stringWithFormat: @"program(1.0)\n[buildInfo = dict, tensor>({{\"coremlc-version\", \"3505.4.1\"}})]\n{\n" " func main(tensor x) {\n" " tensor pt = const()[name=tensor(\"pt\"), val=tensor(\"valid\")];\n" " tensor st = const()[name=tensor(\"st\"), val=tensor([1,1])];\n" " tensor pd = const()[name=tensor(\"pd\"), val=tensor([0,0,0,0])];\n" " tensor dl = const()[name=tensor(\"dl\"), val=tensor([1,1])];\n" " tensor gr = const()[name=tensor(\"gr\"), val=tensor(1)];\n" " tensor W = const()[name=tensor(\"W\"), " "val=tensor(BLOBFILE(path=tensor(\"@model_path/weights/weight.bin\"), offset=tensor(64)))];\n" " tensor y = conv(dilations=dl,groups=gr,pad=pd,pad_type=pt,strides=st,weight=W,x=x)" "[name=tensor(\"conv\")];\n" " } -> (y);\n}\n", ch, sp, ch, ch, ch, ch, ch, sp]; } int main(int argc, char **argv) { @autoreleasepool { setbuf(stdout, NULL); mach_timebase_info(&g_tb); printf("=== ANE Eval Path Benchmark (production dimensions) ===\n\n"); ane_init(); if (!g_ane_ok) { printf("FATAL: ANE not available\n"); return 1; } typedef struct { int ch; int sp; const char *label; } TestConfig; TestConfig configs[] = { {64, 32, "64x32 (test)"}, {128, 64, "128x64 (small)"}, {256, 64, "256x64 (med)"}, {768, 256, "768x256 (prod)"}, {512, 64, "512x64 (large)"}, }; int nconfigs = sizeof(configs) / sizeof(configs[0]); int WARMUP = 20, ITERS = 200; id client = g_ane_client; printf(" Client: %s | Warmup: %d | Iters: %d\n\n", client ? "OK" : "NO", WARMUP, ITERS); printf("%-18s %10s %14s %14s %14s\n", "Config", "Standard", "RT", "ProcReq", "ane_eval_rt"); printf("%-18s %10s %14s %14s %14s\n", "------", "--------", "--", "-------", "-----------"); for (int ci = 0; ci < nconfigs; ci++) { int CH = configs[ci].ch, SP = configs[ci].sp; _Float16 *w = (_Float16*)calloc(CH*CH, sizeof(_Float16)); for (int i = 0; i < CH; i++) w[i*CH+i] = (_Float16)0.5f; int ws = CH*CH*2, tot = 128+ws; uint8_t *blob = (uint8_t*)calloc(tot, 1); blob[0]=1; blob[4]=2; blob[64]=0xEF; blob[65]=0xBE; blob[66]=0xAD; blob[67]=0xDE; blob[68]=1; *(uint32_t*)(blob+72)=ws; *(uint32_t*)(blob+80)=128; memcpy(blob+128, w, ws); NSData *wdata = [NSData dataWithBytesNoCopy:blob length:tot freeWhenDone:YES]; free(w); g_fp16_io = 1; NSString *mil = gen_bench_conv(CH, SP); NSData *milData = [mil dataUsingEncoding:NSUTF8StringEncoding]; size_t ioBytes = CH * SP * 2; ANEKernel *k = ane_compile(milData, wdata, 1, &ioBytes, 1, &ioBytes); if (!k) { printf("%-18s (compile failed)\n", configs[ci].label); continue; } IOSurfaceLock(k->ioInputs[0], 0, NULL); _Float16 *inp = (_Float16*)IOSurfaceGetBaseAddress(k->ioInputs[0]); for (int i = 0; i < CH*SP; i++) inp[i] = (_Float16)1.0f; IOSurfaceUnlock(k->ioInputs[0], 0, NULL); NSError *e = nil; for (int i = 0; i < WARMUP; i++) ane_eval(k); uint64_t t0 = mach_absolute_time(); for (int i = 0; i < ITERS; i++) ane_eval(k); double std_ms = tb_ms(mach_absolute_time() - t0) / ITERS; double rt_ms = -1; if (client) { @try { for (int i = 0; i < WARMUP; i++) ((BOOL(*)(id,SEL,id,id,id,NSError**))objc_msgSend)( client, @selector(evaluateRealTimeWithModel:options:request:error:), k->model, @{}, k->request, &e); t0 = mach_absolute_time(); for (int i = 0; i < ITERS; i++) ((BOOL(*)(id,SEL,id,id,id,NSError**))objc_msgSend)( client, @selector(evaluateRealTimeWithModel:options:request:error:), k->model, @{}, k->request, &e); rt_ms = tb_ms(mach_absolute_time() - t0) / ITERS; } @catch (NSException *ex) { rt_ms = -1; } } double proc_ms = -1; @try { id prog = [k->model valueForKey:@"program"]; id hexId = [k->model valueForKey:@"hexStringIdentifier"]; SEL procSel = @selector(processRequest:model:qos:qIndex:modelStringID:options:returnValue:error:); if (prog && [prog respondsToSelector:procSel]) { for (int i = 0; i < WARMUP; i++) { BOOL rv = NO; ((BOOL(*)(id,SEL,id,id,unsigned int,int,id,id,BOOL*,NSError**))objc_msgSend)( prog, procSel, k->request, k->model, 21, 0, hexId, @{}, &rv, &e); } t0 = mach_absolute_time(); for (int i = 0; i < ITERS; i++) { BOOL rv = NO; ((BOOL(*)(id,SEL,id,id,unsigned int,int,id,id,BOOL*,NSError**))objc_msgSend)( prog, procSel, k->request, k->model, 21, 0, hexId, @{}, &rv, &e); } proc_ms = tb_ms(mach_absolute_time() - t0) / ITERS; } } @catch (NSException *ex) { (void)ex; } double wrap_ms = -1; @try { for (int i = 0; i < WARMUP; i++) ane_eval_rt(k); t0 = mach_absolute_time(); for (int i = 0; i < ITERS; i++) ane_eval_rt(k); wrap_ms = tb_ms(mach_absolute_time() - t0) / ITERS; } @catch (NSException *ex) { wrap_ms = -1; } char s[32], r[32], p[32], w2[32]; snprintf(s, 32, "%.3f ms", std_ms); snprintf(r, 32, rt_ms >= 0 ? "%.3f (%.1fx)" : "N/A", rt_ms, std_ms/rt_ms); snprintf(p, 32, proc_ms >= 0 ? "%.3f (%.1fx)" : "N/A", proc_ms, std_ms/proc_ms); snprintf(w2, 32, wrap_ms >= 0 ? "%.3f (%.1fx)" : "N/A", wrap_ms, std_ms/wrap_ms); printf("%-18s %10s %14s %14s %14s\n", configs[ci].label, s, r, p, w2); ane_free(k); } printf("\n=== Benchmark complete ===\n"); } return 0; }