diff --git a/desktop/api.cpp b/desktop/api.cpp index bba0ba1..ac476c2 100644 --- a/desktop/api.cpp +++ b/desktop/api.cpp @@ -261,11 +261,10 @@ struct APIInstance { #include "renderer.cpp" #include "theme.cpp" - #define TEXT_RENDERER #include "text.cpp" #undef TEXT_RENDERER - +#include "profiling.cpp" #include "gui.cpp" #ifndef NO_API_TABLE @@ -1106,12 +1105,13 @@ EsMessage *EsMessageReceive() { ProcessMessageTiming timing = {}; double start = EsTimeStampMs(); UIProcessWindowManagerMessage((EsWindow *) message.object, &message.message, &timing); - EsPrint("Processed message from WM %x in %Fms (%Fms logic, %Fms layout, %Fms paint, %Fms update screen).\n", + EsPrint("Processed message from WM %x in %Fms (%Fms logic, %Fms layout, %Fms paint, %Fms update screen). Profiling buffer %F%% full.\n", type, EsTimeStampMs() - start, timing.endLogic - timing.startLogic, timing.endLayout - timing.startLayout, timing.endPaint - timing.startPaint, - timing.endUpdate - timing.startUpdate); + timing.endUpdate - timing.startUpdate, + profilingBufferSize ? profilingBufferPosition * 100.0 / profilingBufferSize : 0); #else UIProcessWindowManagerMessage((EsWindow *) message.object, &message.message, nullptr); #endif @@ -1470,6 +1470,7 @@ uintptr_t EsSystemGetOptimalWorkQueueThreadCount() { return EsSyscall(ES_SYSCALL_PROCESSOR_COUNT, 0, 0, 0, 0); } +__attribute__((no_instrument_function)) void ThreadInitialise(ThreadLocalStorage *local) { EsMemoryZero(local, sizeof(ThreadLocalStorage)); EsSyscall(ES_SYSCALL_THREAD_GET_ID, ES_CURRENT_THREAD, (uintptr_t) &local->id, 0, 0); diff --git a/desktop/gui.cpp b/desktop/gui.cpp index 13a0c09..e42b257 100644 --- a/desktop/gui.cpp +++ b/desktop/gui.cpp @@ -8035,6 +8035,13 @@ void InspectorVisualizeLayoutBounds(EsInstance *instance, EsElement *, EsCommand EsElementRepaint(window); } +#ifdef PROFILE_DESKTOP_FUNCTIONS +void InspectorBeginProfiling(EsInstance *, EsElement *, EsCommand *) { + size_t entryCount = 3000000; + ProfilingSetup((ProfilingEntry *) EsHeapAllocate(sizeof(ProfilingEntry) * entryCount, false), entryCount); +} +#endif + void InspectorAddElement2(EsMenu *menu, EsGeneric context) { InspectorWindow *inspector = (InspectorWindow *) menu->instance; if (inspector->selectedElement == -1) return; @@ -8094,6 +8101,10 @@ void InspectorSetup(EsWindow *window) { EsButtonOnCommand(inspector->visualizeLayoutBounds, InspectorVisualizeLayoutBounds); inspector->visualizePaintSteps = EsButtonCreate(toolbar, ES_BUTTON_TOOLBAR, 0, "Visualize paint steps"); EsButtonOnCommand(inspector->visualizePaintSteps, InspectorVisualizePaintSteps); + EsSpacerCreate(toolbar, ES_CELL_H_FILL); +#ifdef PROFILE_DESKTOP_FUNCTIONS + EsButtonOnCommand(EsButtonCreate(toolbar, ES_BUTTON_TOOLBAR, 0, "Begin profiling"), InspectorBeginProfiling); +#endif } inspector->elementList = EsListViewCreate(panel1, ES_CELL_FILL | ES_LIST_VIEW_COLUMNS | ES_LIST_VIEW_SINGLE_SELECT); diff --git a/desktop/profiling.cpp b/desktop/profiling.cpp new file mode 100644 index 0000000..02b8f18 --- /dev/null +++ b/desktop/profiling.cpp @@ -0,0 +1,55 @@ +// TODO Adjust time stamps for thread preemption. + +#include +#include + +struct ProfilingEntry { + void *thisFunction; + uint64_t timeStamp; +}; + +extern ptrdiff_t tlsStorageOffset; +extern "C" uintptr_t ProcessorTLSRead(uintptr_t offset); +extern "C" uint64_t ProcessorReadTimeStamp(); +void EnterDebugger(); + +extern size_t profilingBufferSize; +extern uintptr_t profilingBufferPosition; +void ProfilingSetup(ProfilingEntry *buffer, size_t size /* number of entries */); + +#ifdef PROFILING_IMPLEMENTATION + +ProfilingEntry *profilingBuffer; +size_t profilingBufferSize; +uintptr_t profilingBufferPosition; +uintptr_t profilingThread; + +#define PROFILING_FUNCTION(_exiting) \ + (void) callSite; \ + \ + if (profilingBufferPosition < profilingBufferSize && profilingThread == ProcessorTLSRead(tlsStorageOffset)) { \ + ProfilingEntry *entry = (ProfilingEntry *) &profilingBuffer[profilingBufferPosition++]; \ + entry->thisFunction = thisFunction; \ + entry->timeStamp = ProcessorReadTimeStamp() | ((uint64_t) _exiting << 63); \ + } else if (profilingBufferSize && profilingThread == ProcessorTLSRead(tlsStorageOffset)) { \ + profilingBufferSize = 0; \ + EnterDebugger(); \ + } + +extern "C" void __cyg_profile_func_enter(void *thisFunction, void *callSite) { + PROFILING_FUNCTION(0); +} + +extern "C" void __cyg_profile_func_exit(void *thisFunction, void *callSite) { + PROFILING_FUNCTION(1); +} + +void ProfilingSetup(ProfilingEntry *buffer, size_t size) { + profilingThread = ProcessorTLSRead(tlsStorageOffset); + __sync_synchronize(); + profilingBuffer = buffer; + profilingBufferSize = size; + profilingBufferPosition = 0; +} + +#endif diff --git a/desktop/syscall.cpp b/desktop/syscall.cpp index 3e8f031..420d072 100644 --- a/desktop/syscall.cpp +++ b/desktop/syscall.cpp @@ -120,6 +120,7 @@ int EsProcessGetExitStatus(EsHandle process) { void ThreadInitialise(ThreadLocalStorage *local); +__attribute__((no_instrument_function)) void ThreadEntry(EsGeneric argument, EsThreadEntryCallback entryFunction) { ThreadLocalStorage local; ThreadInitialise(&local); diff --git a/desktop/text.cpp b/desktop/text.cpp index c414113..afd0342 100644 --- a/desktop/text.cpp +++ b/desktop/text.cpp @@ -822,6 +822,7 @@ void FontDatabaseFree() { // --------------------------------- Blitting rendered glyphs. +__attribute__((no_instrument_function)) inline static void DrawStringPixel(int oX, int oY, void *bitmap, size_t stride, uint32_t textColor, uint32_t selectionColor, int32_t backgroundColor, uint32_t pixel, bool selected, bool fullAlpha) { uint32_t *destination = (uint32_t *) ((uint8_t *) bitmap + (oX) * 4 + (oY) * stride); diff --git a/shared/array.cpp b/shared/array.cpp index 63db363..25ba926 100644 --- a/shared/array.cpp +++ b/shared/array.cpp @@ -27,7 +27,7 @@ template struct Array { T *array; - inline size_t Length() { return array ? ArrayHeader(array)->length : 0; } + __attribute__((no_instrument_function)) inline size_t Length() { return array ? ArrayHeader(array)->length : 0; } inline T &First() { return array[0]; } inline T &Last() { return array[Length() - 1]; } inline void Delete(uintptr_t position) { _ArrayDelete(array, position, sizeof(T), 1); } diff --git a/shared/common.cpp b/shared/common.cpp index 8b6d75f..22cf359 100644 --- a/shared/common.cpp +++ b/shared/common.cpp @@ -95,6 +95,7 @@ EsRectangle EsRectangleBounding(EsRectangle a, EsRectangle b) { return a; } +__attribute__((no_instrument_function)) EsRectangle EsRectangleIntersection(EsRectangle a, EsRectangle b) { if (a.l < b.l) a.l = b.l; if (a.t < b.t) a.t = b.t; @@ -164,6 +165,7 @@ EsRectangle EsRectangleCut(EsRectangle a, int32_t amount, char side) { #if defined(SHARED_COMMON_WANT_RENDERING) || defined(SHARED_COMMON_WANT_ALL) ES_FUNCTION_OPTIMISE_O3 +__attribute__((no_instrument_function)) void BlendPixel(uint32_t *destinationPixel, uint32_t modified, bool fullAlpha) { if ((modified & 0xFF000000) == 0xFF000000) { *destinationPixel = modified; @@ -1252,6 +1254,7 @@ double EsDoubleParse(const char *nptr, ptrdiff_t maxBytes, char **endptr) { #ifdef SHARED_COMMON_WANT_ALL +__attribute__((no_instrument_function)) void EsMemoryCopy(void *_destination, const void *_source, size_t bytes) { // TODO Prevent this from being optimised out in the kernel. @@ -1282,6 +1285,7 @@ void EsMemoryCopy(void *_destination, const void *_source, size_t bytes) { } } +__attribute__((no_instrument_function)) void EsMemoryCopyReverse(void *_destination, void *_source, size_t bytes) { // TODO Prevent this from being optimised out in the kernel. @@ -1304,6 +1308,7 @@ void EsMemoryCopyReverse(void *_destination, void *_source, size_t bytes) { } } +__attribute__((no_instrument_function)) void EsMemoryZero(void *destination, size_t bytes) { // TODO Prevent this from being optimised out in the kernel. @@ -1316,6 +1321,7 @@ void EsMemoryZero(void *destination, size_t bytes) { } } +__attribute__((no_instrument_function)) void EsMemoryMove(void *_start, void *_end, intptr_t amount, bool zeroEmptySpace) { // TODO Prevent this from being optimised out in the kernel. @@ -1342,6 +1348,7 @@ void EsMemoryMove(void *_start, void *_end, intptr_t amount, bool zeroEmptySpace } } +__attribute__((no_instrument_function)) int EsMemoryCompare(const void *a, const void *b, size_t bytes) { if (!bytes) { return 0; @@ -1361,6 +1368,7 @@ int EsMemoryCompare(const void *a, const void *b, size_t bytes) { return 0; } +__attribute__((no_instrument_function)) uint8_t EsMemorySumBytes(uint8_t *source, size_t bytes) { if (!bytes) { return 0; @@ -1375,6 +1383,7 @@ uint8_t EsMemorySumBytes(uint8_t *source, size_t bytes) { return total; } +__attribute__((no_instrument_function)) void EsMemoryFill(void *from, void *to, uint8_t byte) { uint8_t *a = (uint8_t *) from; uint8_t *b = (uint8_t *) to; @@ -1679,12 +1688,14 @@ void LoadImage(const void *path, ptrdiff_t pathBytes, void *destination, int des #ifdef SHARED_COMMON_WANT_ALL +__attribute__((no_instrument_function)) void EsSpinlockAcquire(EsSpinlock *spinlock) { __sync_synchronize(); while (__sync_val_compare_and_swap(&spinlock->state, 0, 1)); __sync_synchronize(); } +__attribute__((no_instrument_function)) void EsSpinlockRelease(EsSpinlock *spinlock) { __sync_synchronize(); diff --git a/shared/math.cpp b/shared/math.cpp index dd49031..c85eb88 100644 --- a/shared/math.cpp +++ b/shared/math.cpp @@ -33,6 +33,7 @@ inline int DistanceSquared(int x1, int y1, int x2, int y2) { return dx * dx + dy * dy; } +__attribute__((no_instrument_function)) inline int ClampInteger(int low, int high, int integer) { if (integer < low) return low; if (integer > high) return high; @@ -51,6 +52,7 @@ inline intptr_t ClampIntptr(intptr_t low, intptr_t high, intptr_t integer) { return integer; } +__attribute__((no_instrument_function)) inline int MaximumInteger(int a, int b) { return a > b ? a : b; } @@ -58,14 +60,17 @@ inline int MaximumInteger(int a, int b) { #define MaximumInteger3 MaximumInteger #define MinimumInteger3 MinimumInteger +__attribute__((no_instrument_function)) inline int MaximumInteger(int a, int b, int c) { return MaximumInteger(MaximumInteger(a, b), c); } +__attribute__((no_instrument_function)) inline int MaximumInteger(int a, int b, int c, int d) { return MaximumInteger(MaximumInteger(a, b, c), d); } +__attribute__((no_instrument_function)) inline int MinimumInteger(int a, int b) { return a < b ? a : b; } @@ -99,6 +104,7 @@ float LinearMap(float inFrom, float inTo, float outFrom, float outTo, float valu return raw * (outTo - outFrom) + outFrom; } +__attribute__((no_instrument_function)) float LinearInterpolate(float from, float to, float progress) { return from + progress * (to - from); } diff --git a/util/build.c b/util/build.c index ea1d0c5..928b919 100644 --- a/util/build.c +++ b/util/build.c @@ -1139,6 +1139,8 @@ void DoCommand(const char *l) { BuildAndRun(OPTIMISE_OFF, true /* compile */, false /* debug */, EMULATOR_QEMU, LOG_NORMAL); } else if (0 == strcmp(l, "t3") || 0 == strcmp(l, "qemu-without-compile")) { BuildAndRun(OPTIMISE_OFF, false /* compile */, false /* debug */, EMULATOR_QEMU, LOG_NORMAL); + } else if (0 == strcmp(l, "t4")) { + BuildAndRun(OPTIMISE_FULL, true /* compile */, false /* debug */, EMULATOR_QEMU, LOG_NORMAL); } else if (0 == strcmp(l, "e")) { Run(EMULATOR_QEMU, LOG_NORMAL, DEBUG_LATER); } else if (0 == strcmp(l, "k") || 0 == strcmp(l, "qemu-with-kvm")) { diff --git a/util/build_common.h b/util/build_common.h index 435965d..9f0c98a 100644 --- a/util/build_common.h +++ b/util/build_common.h @@ -286,6 +286,7 @@ Option options[] = { { "Flag.ENABLE_POSIX_SUBSYSTEM", OPTION_TYPE_BOOL, { .b = false } }, { "Flag.DEBUG_BUILD", OPTION_TYPE_BOOL, { .b = true } }, { "Flag.USE_SMP", OPTION_TYPE_BOOL, { .b = true } }, + { "Flag.PROFILE_DESKTOP_FUNCTIONS", OPTION_TYPE_BOOL, { .b = false } }, { "Flag.BGA_RESOLUTION_WIDTH", OPTION_TYPE_STRING, { .s = "1600" } }, { "Flag.BGA_RESOLUTION_HEIGHT", OPTION_TYPE_STRING, { .s = "900" } }, { "Flag.VGA_TEXT_MODE", OPTION_TYPE_BOOL, { .b = false } }, diff --git a/util/build_core.c b/util/build_core.c index cc513f1..1c15c38 100644 --- a/util/build_core.c +++ b/util/build_core.c @@ -152,6 +152,7 @@ char apiLinkFlags2[4096] = " -lgcc "; char apiLinkFlags3[4096] = " -Wl,--end-group -Lroot/Applications/POSIX/lib "; char kernelLinkFlags[4096] = " -ffreestanding -nostdlib -lgcc -g -z max-page-size=0x1000 "; char commonAssemblyFlags[4096] = " -Fdwarf "; +const char *desktopProfilingFlags = ""; // Specific configuration options: @@ -568,10 +569,11 @@ void BuildDesktop(Application *application) { snprintf(buffer, sizeof(buffer), "arch/%s/api.s", target); ExecuteForApp(application, toolchainNasm, buffer, "-MD", "bin/api1.d", "-o", "bin/api1.o", ArgString(commonAssemblyFlags)); - ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/api.cpp", "-o", "bin/api2.o", ArgString(commonCompileFlags)); + ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/api.cpp", "-o", "bin/api2.o", ArgString(commonCompileFlags), ArgString(desktopProfilingFlags)); ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/posix.cpp", "-o", "bin/api3.o", ArgString(commonCompileFlags)); + ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/profiling.cpp", "-o", "bin/api4.o", "-DPROFILING_IMPLEMENTATION", ArgString(commonCompileFlags)); ExecuteForApp(application, toolchainCC, "-o", "bin/Desktop", "bin/crti.o", "bin/crtbegin.o", - "bin/api1.o", "bin/api2.o", "bin/api3.o", "bin/crtend.o", "bin/crtn.o", + "bin/api1.o", "bin/api2.o", "bin/api3.o", "bin/api4.o", "bin/crtend.o", "bin/crtn.o", ArgString(apiLinkFlags1), ArgString(apiLinkFlags2), ArgString(apiLinkFlags3)); ExecuteForApp(application, toolchainStrip, "-o", "bin/Desktop.no_symbols", "--strip-all", "bin/Desktop"); @@ -1350,6 +1352,8 @@ int main(int argc, char **argv) { } else if (0 == strcmp(s.key, "Flag.COM_OUTPUT") && atoi(s.value)) { strcat(commonAssemblyFlags, " -DCOM_OUTPUT "); strcat(commonCompileFlags, " -DCOM_OUTPUT "); + } else if (0 == strcmp(s.key, "Flag.PROFILE_DESKTOP_FUNCTIONS") && atoi(s.value)) { + desktopProfilingFlags = "-finstrument-functions"; } else if (0 == strcmp(s.key, "BuildCore.NoImportPOSIX")) { noImportPOSIX = !!atoi(s.value); } else if (0 == memcmp(s.key, "General.", 8)) { @@ -1575,6 +1579,7 @@ int main(int argc, char **argv) { ADD_DEPENDENCY_FILE(application, "bin/api1.d", "API1"); ADD_DEPENDENCY_FILE(application, "bin/api2.d", "API2"); ADD_DEPENDENCY_FILE(application, "bin/api3.d", "API3"); + ADD_DEPENDENCY_FILE(application, "bin/api4.d", "API4"); arrput(applications, application); }