mirror of https://github.com/maderix/ANE.git
135 lines
6.7 KiB
Objective-C
135 lines
6.7 KiB
Objective-C
// stories_io.h — IOSurface helpers, blob builders, NEON conversion
|
|
#pragma once
|
|
#include "stories_config.h"
|
|
#include <arm_neon.h>
|
|
|
|
static IOSurfaceRef make_surface(size_t bytes) {
|
|
return IOSurfaceCreate((__bridge CFDictionaryRef)@{
|
|
(id)kIOSurfaceWidth:@(bytes), (id)kIOSurfaceHeight:@1,
|
|
(id)kIOSurfaceBytesPerElement:@1, (id)kIOSurfaceBytesPerRow:@(bytes),
|
|
(id)kIOSurfaceAllocSize:@(bytes), (id)kIOSurfacePixelFormat:@0});
|
|
}
|
|
|
|
static NSData *build_blob(const float *w, int rows, int cols) {
|
|
int ws=rows*cols*2, tot=128+ws;
|
|
uint8_t *b=(uint8_t*)calloc(tot,1);
|
|
b[0]=1;b[4]=2;b[64]=0xEF;b[65]=0xBE;b[66]=0xAD;b[67]=0xDE;b[68]=1;
|
|
*(uint32_t*)(b+72)=ws;*(uint32_t*)(b+80)=128;
|
|
_Float16 *fp16=(_Float16*)(b+128);
|
|
for(int i=0;i<rows*cols;i++) fp16[i]=(_Float16)w[i];
|
|
return [NSData dataWithBytesNoCopy:b length:tot freeWhenDone:YES];
|
|
}
|
|
static NSData *build_blob_t(const float *w, int rows, int cols) {
|
|
int ws=cols*rows*2, tot=128+ws;
|
|
uint8_t *b=(uint8_t*)calloc(tot,1);
|
|
b[0]=1;b[4]=2;b[64]=0xEF;b[65]=0xBE;b[66]=0xAD;b[67]=0xDE;b[68]=1;
|
|
*(uint32_t*)(b+72)=ws;*(uint32_t*)(b+80)=128;
|
|
_Float16 *fp16=(_Float16*)(b+128);
|
|
for(int i=0;i<rows;i++) for(int j=0;j<cols;j++) fp16[j*rows+i]=(_Float16)w[i*cols+j];
|
|
return [NSData dataWithBytesNoCopy:b length:tot freeWhenDone:YES];
|
|
}
|
|
static NSData *build_blob_fp16(_Float16 *d, int cnt) {
|
|
int ws=cnt*2, tot=128+ws;
|
|
uint8_t *b=(uint8_t*)calloc(tot,1);
|
|
b[0]=1;b[4]=2;b[64]=0xEF;b[65]=0xBE;b[66]=0xAD;b[67]=0xDE;b[68]=1;
|
|
*(uint32_t*)(b+72)=ws;*(uint32_t*)(b+80)=128;
|
|
memcpy(b+128,d,ws);
|
|
return [NSData dataWithBytesNoCopy:b length:tot freeWhenDone:YES];
|
|
}
|
|
|
|
// NEON vectorized conversion
|
|
static void cvt_f16_f32(float *dst, const _Float16 *src, int n) {
|
|
int i = 0;
|
|
for (; i+7 < n; i += 8) {
|
|
float16x8_t h = vld1q_f16((const __fp16*)(src+i));
|
|
vst1q_f32(dst+i, vcvt_f32_f16(vget_low_f16(h)));
|
|
vst1q_f32(dst+i+4, vcvt_f32_f16(vget_high_f16(h)));
|
|
}
|
|
for (; i < n; i++) dst[i] = (float)src[i];
|
|
}
|
|
static void cvt_f32_f16(_Float16 *dst, const float *src, int n) {
|
|
int i = 0;
|
|
for (; i+7 < n; i += 8) {
|
|
float16x8_t h = vcombine_f16(vcvt_f16_f32(vld1q_f32(src+i)),
|
|
vcvt_f16_f32(vld1q_f32(src+i+4)));
|
|
vst1q_f16((__fp16*)(dst+i), h);
|
|
}
|
|
for (; i < n; i++) dst[i] = (_Float16)src[i];
|
|
}
|
|
|
|
// IOSurface I/O (channel-first [C,S] layout)
|
|
static void io_write_fp16(IOSurfaceRef s, const float *data, int channels, int sp) {
|
|
IOSurfaceLock(s, 0, NULL);
|
|
cvt_f32_f16((_Float16*)IOSurfaceGetBaseAddress(s), data, channels * sp);
|
|
IOSurfaceUnlock(s, 0, NULL);
|
|
}
|
|
static void io_read_fp16(IOSurfaceRef s, float *data, int ch_off, int channels, int sp) {
|
|
IOSurfaceLock(s, kIOSurfaceLockReadOnly, NULL);
|
|
cvt_f16_f32(data, (_Float16*)IOSurfaceGetBaseAddress(s) + ch_off * sp, channels * sp);
|
|
IOSurfaceUnlock(s, kIOSurfaceLockReadOnly, NULL);
|
|
}
|
|
static void io_copy(IOSurfaceRef dst, int dst_ch, IOSurfaceRef src, int src_ch, int channels, int sp) {
|
|
IOSurfaceLock(dst, 0, NULL);
|
|
IOSurfaceLock(src, kIOSurfaceLockReadOnly, NULL);
|
|
memcpy((_Float16*)IOSurfaceGetBaseAddress(dst) + dst_ch*sp,
|
|
(_Float16*)IOSurfaceGetBaseAddress(src) + src_ch*sp,
|
|
channels * sp * sizeof(_Float16));
|
|
IOSurfaceUnlock(src, kIOSurfaceLockReadOnly, NULL);
|
|
IOSurfaceUnlock(dst, 0, NULL);
|
|
}
|
|
static void io_write_fp16_at(IOSurfaceRef s, int ch_off, const float *data, int channels, int sp) {
|
|
IOSurfaceLock(s, 0, NULL);
|
|
cvt_f32_f16((_Float16*)IOSurfaceGetBaseAddress(s) + ch_off * sp, data, channels * sp);
|
|
IOSurfaceUnlock(s, 0, NULL);
|
|
}
|
|
|
|
// Kernel compile/eval
|
|
static Kern *compile_kern_mil_w(NSString *mil, NSDictionary *weights, int ic_bytes, int oc_bytes) {
|
|
@autoreleasepool {
|
|
NSData *md = [mil dataUsingEncoding:NSUTF8StringEncoding];
|
|
id desc = ((id(*)(Class,SEL,id,id,id))objc_msgSend)(g_D, @selector(modelWithMILText:weights:optionsPlist:), md, weights, nil);
|
|
if (!desc) { printf(" [compile] desc=NULL\n"); return NULL; }
|
|
id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(g_I, @selector(inMemoryModelWithDescriptor:), desc);
|
|
id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
|
|
NSString *td = [NSTemporaryDirectory() stringByAppendingPathComponent:hx];
|
|
[[NSFileManager defaultManager] createDirectoryAtPath:[td stringByAppendingPathComponent:@"weights"] withIntermediateDirectories:YES attributes:nil error:nil];
|
|
[md writeToFile:[td stringByAppendingPathComponent:@"model.mil"] atomically:YES];
|
|
for (NSString *path in weights) {
|
|
NSString *rel = [path stringByReplacingOccurrencesOfString:@"@model_path/" withString:@""];
|
|
[weights[path][@"data"] writeToFile:[td stringByAppendingPathComponent:rel] atomically:YES];
|
|
}
|
|
NSError *e = nil;
|
|
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(mdl, @selector(compileWithQoS:options:error:), 21, @{}, &e)) {
|
|
printf(" [compile] FAIL: %s\n", e ? [[e description] UTF8String] : "no error"); return NULL;
|
|
}
|
|
if (!((BOOL(*)(id,SEL,unsigned int,id,NSError**))objc_msgSend)(mdl, @selector(loadWithQoS:options:error:), 21, @{}, &e)) {
|
|
printf(" [compile] load FAIL\n"); return NULL;
|
|
}
|
|
__sync_fetch_and_add(&g_compile_count, 1);
|
|
Kern *k = (Kern*)calloc(1, sizeof(Kern));
|
|
k->model = (void*)CFBridgingRetain(mdl);
|
|
k->ioIn = make_surface(ic_bytes);
|
|
k->ioOut = make_surface(oc_bytes);
|
|
id wI = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(g_AIO, @selector(objectWithIOSurface:), k->ioIn);
|
|
id wO = ((id(*)(Class,SEL,IOSurfaceRef))objc_msgSend)(g_AIO, @selector(objectWithIOSurface:), k->ioOut);
|
|
k->request = (void*)CFBridgingRetain(((id(*)(Class,SEL,id,id,id,id,id,id,id))objc_msgSend)(g_AR,
|
|
@selector(requestWithInputs:inputIndices:outputs:outputIndices:weightsBuffer:perfStats:procedureIndex:),
|
|
@[wI], @[@0], @[wO], @[@0], nil, nil, @0));
|
|
k->tmpDir = (void*)CFBridgingRetain(td);
|
|
return k;
|
|
}
|
|
}
|
|
static void free_kern(Kern *k) {
|
|
if (!k) return;
|
|
id mdl = (__bridge id)k->model; NSError *e = nil;
|
|
((BOOL(*)(id,SEL,unsigned int,NSError**))objc_msgSend)(mdl, @selector(unloadWithQoS:error:), 21, &e);
|
|
CFRelease(k->ioIn); CFRelease(k->ioOut);
|
|
[[NSFileManager defaultManager] removeItemAtPath:(__bridge id)k->tmpDir error:nil];
|
|
CFRelease(k->model); CFRelease(k->request); CFRelease(k->tmpDir);
|
|
free(k);
|
|
}
|
|
static void ane_eval(Kern *k) {
|
|
id mdl = (__bridge id)k->model; id req = (__bridge id)k->request; NSError *e = nil;
|
|
((BOOL(*)(id,SEL,unsigned int,id,id,NSError**))objc_msgSend)(mdl, @selector(evaluateWithQoS:options:request:error:), 21, @{}, req, &e);
|
|
}
|