improve profiling

This commit is contained in:
nakst 2021-11-28 16:28:01 +00:00
parent f9f135fc1f
commit 81f3ee6109
11 changed files with 101 additions and 7 deletions

View File

@ -261,11 +261,10 @@ struct APIInstance {
#include "renderer.cpp"
#include "theme.cpp"
#define TEXT_RENDERER
#include "text.cpp"
#undef TEXT_RENDERER
#include "profiling.cpp"
#include "gui.cpp"
#ifndef NO_API_TABLE
@ -1106,12 +1105,13 @@ EsMessage *EsMessageReceive() {
ProcessMessageTiming timing = {};
double start = EsTimeStampMs();
UIProcessWindowManagerMessage((EsWindow *) message.object, &message.message, &timing);
EsPrint("Processed message from WM %x in %Fms (%Fms logic, %Fms layout, %Fms paint, %Fms update screen).\n",
EsPrint("Processed message from WM %x in %Fms (%Fms logic, %Fms layout, %Fms paint, %Fms update screen). Profiling buffer %F%% full.\n",
type, EsTimeStampMs() - start,
timing.endLogic - timing.startLogic,
timing.endLayout - timing.startLayout,
timing.endPaint - timing.startPaint,
timing.endUpdate - timing.startUpdate);
timing.endUpdate - timing.startUpdate,
profilingBufferSize ? profilingBufferPosition * 100.0 / profilingBufferSize : 0);
#else
UIProcessWindowManagerMessage((EsWindow *) message.object, &message.message, nullptr);
#endif
@ -1470,6 +1470,7 @@ uintptr_t EsSystemGetOptimalWorkQueueThreadCount() {
return EsSyscall(ES_SYSCALL_PROCESSOR_COUNT, 0, 0, 0, 0);
}
__attribute__((no_instrument_function))
void ThreadInitialise(ThreadLocalStorage *local) {
EsMemoryZero(local, sizeof(ThreadLocalStorage));
EsSyscall(ES_SYSCALL_THREAD_GET_ID, ES_CURRENT_THREAD, (uintptr_t) &local->id, 0, 0);

View File

@ -8035,6 +8035,13 @@ void InspectorVisualizeLayoutBounds(EsInstance *instance, EsElement *, EsCommand
EsElementRepaint(window);
}
#ifdef PROFILE_DESKTOP_FUNCTIONS
void InspectorBeginProfiling(EsInstance *, EsElement *, EsCommand *) {
size_t entryCount = 3000000;
ProfilingSetup((ProfilingEntry *) EsHeapAllocate(sizeof(ProfilingEntry) * entryCount, false), entryCount);
}
#endif
void InspectorAddElement2(EsMenu *menu, EsGeneric context) {
InspectorWindow *inspector = (InspectorWindow *) menu->instance;
if (inspector->selectedElement == -1) return;
@ -8094,6 +8101,10 @@ void InspectorSetup(EsWindow *window) {
EsButtonOnCommand(inspector->visualizeLayoutBounds, InspectorVisualizeLayoutBounds);
inspector->visualizePaintSteps = EsButtonCreate(toolbar, ES_BUTTON_TOOLBAR, 0, "Visualize paint steps");
EsButtonOnCommand(inspector->visualizePaintSteps, InspectorVisualizePaintSteps);
EsSpacerCreate(toolbar, ES_CELL_H_FILL);
#ifdef PROFILE_DESKTOP_FUNCTIONS
EsButtonOnCommand(EsButtonCreate(toolbar, ES_BUTTON_TOOLBAR, 0, "Begin profiling"), InspectorBeginProfiling);
#endif
}
inspector->elementList = EsListViewCreate(panel1, ES_CELL_FILL | ES_LIST_VIEW_COLUMNS | ES_LIST_VIEW_SINGLE_SELECT);

55
desktop/profiling.cpp Normal file
View File

@ -0,0 +1,55 @@
// TODO Adjust time stamps for thread preemption.
#include <stdint.h>
#include <stddef.h>
struct ProfilingEntry {
void *thisFunction;
uint64_t timeStamp;
};
extern ptrdiff_t tlsStorageOffset;
extern "C" uintptr_t ProcessorTLSRead(uintptr_t offset);
extern "C" uint64_t ProcessorReadTimeStamp();
void EnterDebugger();
extern size_t profilingBufferSize;
extern uintptr_t profilingBufferPosition;
void ProfilingSetup(ProfilingEntry *buffer, size_t size /* number of entries */);
#ifdef PROFILING_IMPLEMENTATION
ProfilingEntry *profilingBuffer;
size_t profilingBufferSize;
uintptr_t profilingBufferPosition;
uintptr_t profilingThread;
#define PROFILING_FUNCTION(_exiting) \
(void) callSite; \
\
if (profilingBufferPosition < profilingBufferSize && profilingThread == ProcessorTLSRead(tlsStorageOffset)) { \
ProfilingEntry *entry = (ProfilingEntry *) &profilingBuffer[profilingBufferPosition++]; \
entry->thisFunction = thisFunction; \
entry->timeStamp = ProcessorReadTimeStamp() | ((uint64_t) _exiting << 63); \
} else if (profilingBufferSize && profilingThread == ProcessorTLSRead(tlsStorageOffset)) { \
profilingBufferSize = 0; \
EnterDebugger(); \
}
extern "C" void __cyg_profile_func_enter(void *thisFunction, void *callSite) {
PROFILING_FUNCTION(0);
}
extern "C" void __cyg_profile_func_exit(void *thisFunction, void *callSite) {
PROFILING_FUNCTION(1);
}
void ProfilingSetup(ProfilingEntry *buffer, size_t size) {
profilingThread = ProcessorTLSRead(tlsStorageOffset);
__sync_synchronize();
profilingBuffer = buffer;
profilingBufferSize = size;
profilingBufferPosition = 0;
}
#endif

View File

@ -120,6 +120,7 @@ int EsProcessGetExitStatus(EsHandle process) {
void ThreadInitialise(ThreadLocalStorage *local);
__attribute__((no_instrument_function))
void ThreadEntry(EsGeneric argument, EsThreadEntryCallback entryFunction) {
ThreadLocalStorage local;
ThreadInitialise(&local);

View File

@ -822,6 +822,7 @@ void FontDatabaseFree() {
// --------------------------------- Blitting rendered glyphs.
__attribute__((no_instrument_function))
inline static void DrawStringPixel(int oX, int oY, void *bitmap, size_t stride, uint32_t textColor,
uint32_t selectionColor, int32_t backgroundColor, uint32_t pixel, bool selected, bool fullAlpha) {
uint32_t *destination = (uint32_t *) ((uint8_t *) bitmap + (oX) * 4 + (oY) * stride);

View File

@ -27,7 +27,7 @@ template <class T, EsHeap *heap = nullptr>
struct Array {
T *array;
inline size_t Length() { return array ? ArrayHeader(array)->length : 0; }
__attribute__((no_instrument_function)) inline size_t Length() { return array ? ArrayHeader(array)->length : 0; }
inline T &First() { return array[0]; }
inline T &Last() { return array[Length() - 1]; }
inline void Delete(uintptr_t position) { _ArrayDelete(array, position, sizeof(T), 1); }

View File

@ -95,6 +95,7 @@ EsRectangle EsRectangleBounding(EsRectangle a, EsRectangle b) {
return a;
}
__attribute__((no_instrument_function))
EsRectangle EsRectangleIntersection(EsRectangle a, EsRectangle b) {
if (a.l < b.l) a.l = b.l;
if (a.t < b.t) a.t = b.t;
@ -164,6 +165,7 @@ EsRectangle EsRectangleCut(EsRectangle a, int32_t amount, char side) {
#if defined(SHARED_COMMON_WANT_RENDERING) || defined(SHARED_COMMON_WANT_ALL)
ES_FUNCTION_OPTIMISE_O3
__attribute__((no_instrument_function))
void BlendPixel(uint32_t *destinationPixel, uint32_t modified, bool fullAlpha) {
if ((modified & 0xFF000000) == 0xFF000000) {
*destinationPixel = modified;
@ -1252,6 +1254,7 @@ double EsDoubleParse(const char *nptr, ptrdiff_t maxBytes, char **endptr) {
#ifdef SHARED_COMMON_WANT_ALL
__attribute__((no_instrument_function))
void EsMemoryCopy(void *_destination, const void *_source, size_t bytes) {
// TODO Prevent this from being optimised out in the kernel.
@ -1282,6 +1285,7 @@ void EsMemoryCopy(void *_destination, const void *_source, size_t bytes) {
}
}
__attribute__((no_instrument_function))
void EsMemoryCopyReverse(void *_destination, void *_source, size_t bytes) {
// TODO Prevent this from being optimised out in the kernel.
@ -1304,6 +1308,7 @@ void EsMemoryCopyReverse(void *_destination, void *_source, size_t bytes) {
}
}
__attribute__((no_instrument_function))
void EsMemoryZero(void *destination, size_t bytes) {
// TODO Prevent this from being optimised out in the kernel.
@ -1316,6 +1321,7 @@ void EsMemoryZero(void *destination, size_t bytes) {
}
}
__attribute__((no_instrument_function))
void EsMemoryMove(void *_start, void *_end, intptr_t amount, bool zeroEmptySpace) {
// TODO Prevent this from being optimised out in the kernel.
@ -1342,6 +1348,7 @@ void EsMemoryMove(void *_start, void *_end, intptr_t amount, bool zeroEmptySpace
}
}
__attribute__((no_instrument_function))
int EsMemoryCompare(const void *a, const void *b, size_t bytes) {
if (!bytes) {
return 0;
@ -1361,6 +1368,7 @@ int EsMemoryCompare(const void *a, const void *b, size_t bytes) {
return 0;
}
__attribute__((no_instrument_function))
uint8_t EsMemorySumBytes(uint8_t *source, size_t bytes) {
if (!bytes) {
return 0;
@ -1375,6 +1383,7 @@ uint8_t EsMemorySumBytes(uint8_t *source, size_t bytes) {
return total;
}
__attribute__((no_instrument_function))
void EsMemoryFill(void *from, void *to, uint8_t byte) {
uint8_t *a = (uint8_t *) from;
uint8_t *b = (uint8_t *) to;
@ -1679,12 +1688,14 @@ void LoadImage(const void *path, ptrdiff_t pathBytes, void *destination, int des
#ifdef SHARED_COMMON_WANT_ALL
__attribute__((no_instrument_function))
void EsSpinlockAcquire(EsSpinlock *spinlock) {
__sync_synchronize();
while (__sync_val_compare_and_swap(&spinlock->state, 0, 1));
__sync_synchronize();
}
__attribute__((no_instrument_function))
void EsSpinlockRelease(EsSpinlock *spinlock) {
__sync_synchronize();

View File

@ -33,6 +33,7 @@ inline int DistanceSquared(int x1, int y1, int x2, int y2) {
return dx * dx + dy * dy;
}
__attribute__((no_instrument_function))
inline int ClampInteger(int low, int high, int integer) {
if (integer < low) return low;
if (integer > high) return high;
@ -51,6 +52,7 @@ inline intptr_t ClampIntptr(intptr_t low, intptr_t high, intptr_t integer) {
return integer;
}
__attribute__((no_instrument_function))
inline int MaximumInteger(int a, int b) {
return a > b ? a : b;
}
@ -58,14 +60,17 @@ inline int MaximumInteger(int a, int b) {
#define MaximumInteger3 MaximumInteger
#define MinimumInteger3 MinimumInteger
__attribute__((no_instrument_function))
inline int MaximumInteger(int a, int b, int c) {
return MaximumInteger(MaximumInteger(a, b), c);
}
__attribute__((no_instrument_function))
inline int MaximumInteger(int a, int b, int c, int d) {
return MaximumInteger(MaximumInteger(a, b, c), d);
}
__attribute__((no_instrument_function))
inline int MinimumInteger(int a, int b) {
return a < b ? a : b;
}
@ -99,6 +104,7 @@ float LinearMap(float inFrom, float inTo, float outFrom, float outTo, float valu
return raw * (outTo - outFrom) + outFrom;
}
__attribute__((no_instrument_function))
float LinearInterpolate(float from, float to, float progress) {
return from + progress * (to - from);
}

View File

@ -1139,6 +1139,8 @@ void DoCommand(const char *l) {
BuildAndRun(OPTIMISE_OFF, true /* compile */, false /* debug */, EMULATOR_QEMU, LOG_NORMAL);
} else if (0 == strcmp(l, "t3") || 0 == strcmp(l, "qemu-without-compile")) {
BuildAndRun(OPTIMISE_OFF, false /* compile */, false /* debug */, EMULATOR_QEMU, LOG_NORMAL);
} else if (0 == strcmp(l, "t4")) {
BuildAndRun(OPTIMISE_FULL, true /* compile */, false /* debug */, EMULATOR_QEMU, LOG_NORMAL);
} else if (0 == strcmp(l, "e")) {
Run(EMULATOR_QEMU, LOG_NORMAL, DEBUG_LATER);
} else if (0 == strcmp(l, "k") || 0 == strcmp(l, "qemu-with-kvm")) {

View File

@ -286,6 +286,7 @@ Option options[] = {
{ "Flag.ENABLE_POSIX_SUBSYSTEM", OPTION_TYPE_BOOL, { .b = false } },
{ "Flag.DEBUG_BUILD", OPTION_TYPE_BOOL, { .b = true } },
{ "Flag.USE_SMP", OPTION_TYPE_BOOL, { .b = true } },
{ "Flag.PROFILE_DESKTOP_FUNCTIONS", OPTION_TYPE_BOOL, { .b = false } },
{ "Flag.BGA_RESOLUTION_WIDTH", OPTION_TYPE_STRING, { .s = "1600" } },
{ "Flag.BGA_RESOLUTION_HEIGHT", OPTION_TYPE_STRING, { .s = "900" } },
{ "Flag.VGA_TEXT_MODE", OPTION_TYPE_BOOL, { .b = false } },

View File

@ -152,6 +152,7 @@ char apiLinkFlags2[4096] = " -lgcc ";
char apiLinkFlags3[4096] = " -Wl,--end-group -Lroot/Applications/POSIX/lib ";
char kernelLinkFlags[4096] = " -ffreestanding -nostdlib -lgcc -g -z max-page-size=0x1000 ";
char commonAssemblyFlags[4096] = " -Fdwarf ";
const char *desktopProfilingFlags = "";
// Specific configuration options:
@ -568,10 +569,11 @@ void BuildDesktop(Application *application) {
snprintf(buffer, sizeof(buffer), "arch/%s/api.s", target);
ExecuteForApp(application, toolchainNasm, buffer, "-MD", "bin/api1.d", "-o", "bin/api1.o", ArgString(commonAssemblyFlags));
ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/api.cpp", "-o", "bin/api2.o", ArgString(commonCompileFlags));
ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/api.cpp", "-o", "bin/api2.o", ArgString(commonCompileFlags), ArgString(desktopProfilingFlags));
ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/posix.cpp", "-o", "bin/api3.o", ArgString(commonCompileFlags));
ExecuteForApp(application, toolchainCXX, "-MD", "-c", "desktop/profiling.cpp", "-o", "bin/api4.o", "-DPROFILING_IMPLEMENTATION", ArgString(commonCompileFlags));
ExecuteForApp(application, toolchainCC, "-o", "bin/Desktop", "bin/crti.o", "bin/crtbegin.o",
"bin/api1.o", "bin/api2.o", "bin/api3.o", "bin/crtend.o", "bin/crtn.o",
"bin/api1.o", "bin/api2.o", "bin/api3.o", "bin/api4.o", "bin/crtend.o", "bin/crtn.o",
ArgString(apiLinkFlags1), ArgString(apiLinkFlags2), ArgString(apiLinkFlags3));
ExecuteForApp(application, toolchainStrip, "-o", "bin/Desktop.no_symbols", "--strip-all", "bin/Desktop");
@ -1350,6 +1352,8 @@ int main(int argc, char **argv) {
} else if (0 == strcmp(s.key, "Flag.COM_OUTPUT") && atoi(s.value)) {
strcat(commonAssemblyFlags, " -DCOM_OUTPUT ");
strcat(commonCompileFlags, " -DCOM_OUTPUT ");
} else if (0 == strcmp(s.key, "Flag.PROFILE_DESKTOP_FUNCTIONS") && atoi(s.value)) {
desktopProfilingFlags = "-finstrument-functions";
} else if (0 == strcmp(s.key, "BuildCore.NoImportPOSIX")) {
noImportPOSIX = !!atoi(s.value);
} else if (0 == memcmp(s.key, "General.", 8)) {
@ -1575,6 +1579,7 @@ int main(int argc, char **argv) {
ADD_DEPENDENCY_FILE(application, "bin/api1.d", "API1");
ADD_DEPENDENCY_FILE(application, "bin/api2.d", "API2");
ADD_DEPENDENCY_FILE(application, "bin/api3.d", "API3");
ADD_DEPENDENCY_FILE(application, "bin/api4.d", "API4");
arrput(applications, application);
}