mirror of https://gitlab.com/nakst/essence
scripting engine optimised string concat
This commit is contained in:
parent
fbf63d6b13
commit
971d450760
124
util/script.c
124
util/script.c
|
@ -1,6 +1,3 @@
|
||||||
// TODO StringJoin is extremely slow and uses a lot of memory.
|
|
||||||
// Add a StringBuilder? Or maybe a T_CONCAT heap object?
|
|
||||||
|
|
||||||
// TODO Basic missing features:
|
// TODO Basic missing features:
|
||||||
// - Other list operations: insert_many, delete, delete_many, delete_last.
|
// - Other list operations: insert_many, delete, delete_many, delete_last.
|
||||||
// - Maps: T[int], T[str].
|
// - Maps: T[int], T[str].
|
||||||
|
@ -164,10 +161,10 @@
|
||||||
uint64_t _index ## stackIndex = context->c->stack[context->c->stackPointer - stackIndex].i; \
|
uint64_t _index ## stackIndex = context->c->stack[context->c->stackPointer - stackIndex].i; \
|
||||||
if (context->heapEntriesAllocated <= _index ## stackIndex) return -1; \
|
if (context->heapEntriesAllocated <= _index ## stackIndex) return -1; \
|
||||||
HeapEntry *_entry ## stackIndex = &context->heap[_index ## stackIndex]; \
|
HeapEntry *_entry ## stackIndex = &context->heap[_index ## stackIndex]; \
|
||||||
if (_entry ## stackIndex->type != T_EOF && _entry ## stackIndex->type != T_STR) return -1; \
|
if (_entry ## stackIndex->type != T_EOF && _entry ## stackIndex->type != T_STR && _entry ## stackIndex->type != T_CONCAT) return -1; \
|
||||||
const char *textVariable; \
|
const char *textVariable; \
|
||||||
size_t bytesVariable; \
|
size_t bytesVariable; \
|
||||||
ScriptHeapEntryToString(_entry ## stackIndex, &textVariable, &bytesVariable);
|
ScriptHeapEntryToString(context, _entry ## stackIndex, &textVariable, &bytesVariable);
|
||||||
#define STACK_POP_STRING(textVariable, bytesVariable) \
|
#define STACK_POP_STRING(textVariable, bytesVariable) \
|
||||||
STACK_READ_STRING(textVariable, bytesVariable, 1); \
|
STACK_READ_STRING(textVariable, bytesVariable, 1); \
|
||||||
context->c->stackPointer--;
|
context->c->stackPointer--;
|
||||||
|
@ -279,6 +276,11 @@ typedef struct HeapEntry {
|
||||||
int64_t lambdaID;
|
int64_t lambdaID;
|
||||||
Value curryValue;
|
Value curryValue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t concat1, concat2;
|
||||||
|
size_t concatBytes;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
} HeapEntry;
|
} HeapEntry;
|
||||||
|
|
||||||
|
@ -3015,6 +3017,7 @@ bool ASTGenerate(Tokenizer *tokenizer, Node *root, ExecutionContext *context) {
|
||||||
// --------------------------------- Main script execution.
|
// --------------------------------- Main script execution.
|
||||||
|
|
||||||
void HeapGarbageCollectMark(ExecutionContext *context, uintptr_t index) {
|
void HeapGarbageCollectMark(ExecutionContext *context, uintptr_t index) {
|
||||||
|
start:;
|
||||||
Assert(index < context->heapEntriesAllocated);
|
Assert(index < context->heapEntriesAllocated);
|
||||||
if (context->heap[index].gcMark) return;
|
if (context->heap[index].gcMark) return;
|
||||||
context->heap[index].gcMark = true;
|
context->heap[index].gcMark = true;
|
||||||
|
@ -3033,6 +3036,19 @@ void HeapGarbageCollectMark(ExecutionContext *context, uintptr_t index) {
|
||||||
HeapGarbageCollectMark(context, context->heap[index].list[i].i);
|
HeapGarbageCollectMark(context, context->heap[index].list[i].i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (context->heap[index].type == T_CONCAT) {
|
||||||
|
uintptr_t index1 = context->heap[index].concat1;
|
||||||
|
uintptr_t index2 = context->heap[index].concat2;
|
||||||
|
|
||||||
|
if (context->heap[index1].type == T_CONCAT) {
|
||||||
|
HeapGarbageCollectMark(context, index2);
|
||||||
|
index = index1;
|
||||||
|
} else {
|
||||||
|
HeapGarbageCollectMark(context, index1);
|
||||||
|
index = index2;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto start;
|
||||||
} else if (context->heap[index].type == T_OP_DISCARD || context->heap[index].type == T_OP_ASSERT) {
|
} else if (context->heap[index].type == T_OP_DISCARD || context->heap[index].type == T_OP_ASSERT) {
|
||||||
HeapGarbageCollectMark(context, context->heap[index].lambdaID);
|
HeapGarbageCollectMark(context, context->heap[index].lambdaID);
|
||||||
} else if (context->heap[index].type == T_OP_CURRY) {
|
} else if (context->heap[index].type == T_OP_CURRY) {
|
||||||
|
@ -3054,7 +3070,8 @@ void HeapFreeEntry(ExecutionContext *context, uintptr_t i) {
|
||||||
} else if (context->heap[i].type == T_LIST) {
|
} else if (context->heap[i].type == T_LIST) {
|
||||||
AllocateResize(context->heap[i].list, 0);
|
AllocateResize(context->heap[i].list, 0);
|
||||||
} else if (context->heap[i].type == T_OP_DISCARD || context->heap[i].type == T_OP_ASSERT
|
} else if (context->heap[i].type == T_OP_DISCARD || context->heap[i].type == T_OP_ASSERT
|
||||||
|| context->heap[i].type == T_FUNCPTR || context->heap[i].type == T_OP_CURRY) {
|
|| context->heap[i].type == T_FUNCPTR || context->heap[i].type == T_OP_CURRY
|
||||||
|
|| context->heap[i].type == T_CONCAT) {
|
||||||
} else {
|
} else {
|
||||||
Assert(false);
|
Assert(false);
|
||||||
}
|
}
|
||||||
|
@ -3149,13 +3166,74 @@ void ScriptPrintNode(Node *node, int indent) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScriptHeapEntryToString(HeapEntry *entry, const char **text, size_t *bytes) {
|
size_t ScriptHeapEntryGetStringBytes(HeapEntry *entry) {
|
||||||
|
if (entry->type == T_STR) {
|
||||||
|
return entry->bytes;
|
||||||
|
} else if (entry->type == T_EOF) {
|
||||||
|
return 0;
|
||||||
|
} else if (entry->type == T_CONCAT) {
|
||||||
|
return entry->concatBytes;
|
||||||
|
} else {
|
||||||
|
Assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScriptHeapEntryConcatConvertToStringWrite(ExecutionContext *context, HeapEntry *entry, char *buffer) {
|
||||||
|
while (true) {
|
||||||
|
if (entry->type == T_STR) {
|
||||||
|
MemoryCopy(buffer, entry->text, entry->bytes);
|
||||||
|
} else if (entry->type == T_EOF) {
|
||||||
|
} else if (entry->type == T_CONCAT) {
|
||||||
|
HeapEntry *part1 = &context->heap[entry->concat1], *part2 = &context->heap[entry->concat2];
|
||||||
|
size_t part1Bytes = ScriptHeapEntryGetStringBytes(part1);
|
||||||
|
|
||||||
|
if (part1->type == T_CONCAT) {
|
||||||
|
ScriptHeapEntryConcatConvertToStringWrite(context, part2, buffer + part1Bytes);
|
||||||
|
Assert(part2->type != T_CONCAT);
|
||||||
|
entry = part1;
|
||||||
|
continue;
|
||||||
|
} else if (part2->type == T_CONCAT) {
|
||||||
|
ScriptHeapEntryConcatConvertToStringWrite(context, part1, buffer);
|
||||||
|
Assert(part1->type != T_CONCAT);
|
||||||
|
entry = part2;
|
||||||
|
buffer += part1Bytes;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
ScriptHeapEntryConcatConvertToStringWrite(context, part1, buffer);
|
||||||
|
ScriptHeapEntryConcatConvertToStringWrite(context, part2, buffer + part1Bytes);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScriptHeapEntryConcatConvertToString(ExecutionContext *context, HeapEntry *entry) {
|
||||||
|
// TODO Efficient concatenation of many strings.
|
||||||
|
// TODO Preventing stack overflow.
|
||||||
|
Assert(entry->type == T_CONCAT);
|
||||||
|
HeapEntry *part1 = &context->heap[entry->concat1], *part2 = &context->heap[entry->concat2];
|
||||||
|
size_t part1Bytes = ScriptHeapEntryGetStringBytes(part1), part2Bytes = ScriptHeapEntryGetStringBytes(part2);
|
||||||
|
Assert(entry->concatBytes == part1Bytes + part2Bytes);
|
||||||
|
entry->type = T_STR;
|
||||||
|
entry->bytes = part1Bytes + part2Bytes;
|
||||||
|
entry->text = AllocateResize(NULL, entry->bytes);
|
||||||
|
ScriptHeapEntryConcatConvertToStringWrite(context, part1, entry->text);
|
||||||
|
ScriptHeapEntryConcatConvertToStringWrite(context, part2, entry->text + part1Bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScriptHeapEntryToString(ExecutionContext *context, HeapEntry *entry, const char **text, size_t *bytes) {
|
||||||
if (entry->type == T_STR) {
|
if (entry->type == T_STR) {
|
||||||
*text = entry->text;
|
*text = entry->text;
|
||||||
*bytes = entry->bytes;
|
*bytes = entry->bytes;
|
||||||
} else if (entry->type == T_EOF) {
|
} else if (entry->type == T_EOF) {
|
||||||
*text = "";
|
*text = "";
|
||||||
*bytes = 0;
|
*bytes = 0;
|
||||||
|
} else if (entry->type == T_CONCAT) {
|
||||||
|
ScriptHeapEntryConcatConvertToString(context, entry);
|
||||||
|
ScriptHeapEntryToString(context, entry, text, bytes);
|
||||||
} else {
|
} else {
|
||||||
Assert(false);
|
Assert(false);
|
||||||
}
|
}
|
||||||
|
@ -3244,18 +3322,28 @@ int ScriptExecuteFunction(uintptr_t instructionPointer, ExecutionContext *contex
|
||||||
context->c->stackIsManaged[context->c->stackPointer] = true;
|
context->c->stackIsManaged[context->c->stackPointer] = true;
|
||||||
context->c->stack[context->c->stackPointer++] = v;
|
context->c->stack[context->c->stackPointer++] = v;
|
||||||
} else if (command == T_CONCAT) {
|
} else if (command == T_CONCAT) {
|
||||||
STACK_READ_STRING(text1, bytes1, 2);
|
if (context->c->stackPointer < 2) return -1;
|
||||||
STACK_READ_STRING(text2, bytes2, 1);
|
uint64_t index1 = context->c->stack[context->c->stackPointer - 2].i;
|
||||||
|
uint64_t index2 = context->c->stack[context->c->stackPointer - 1].i;
|
||||||
|
if (!context->c->stackIsManaged[context->c->stackPointer - 2]) return -1;
|
||||||
|
if (!context->c->stackIsManaged[context->c->stackPointer - 1]) return -1;
|
||||||
|
if (context->heapEntriesAllocated <= index1) return -1;
|
||||||
|
if (context->heapEntriesAllocated <= index2) return -1;
|
||||||
|
Assert(index1 <= 0xFFFFFFFF && index2 <= 0xFFFFFFFF);
|
||||||
|
size_t bytes1 = ScriptHeapEntryGetStringBytes(&context->heap[index1]);
|
||||||
|
size_t bytes2 = ScriptHeapEntryGetStringBytes(&context->heap[index2]);
|
||||||
|
uintptr_t index = HeapAllocate(context); // TODO Handle memory allocation failures here.
|
||||||
|
context->heap[index].type = T_CONCAT;
|
||||||
|
context->heap[index].concat1 = index1;
|
||||||
|
context->heap[index].concat2 = index2;
|
||||||
|
context->heap[index].concatBytes = bytes1 + bytes2;
|
||||||
|
|
||||||
|
// At most one argument can be a T_CONCAT (ohterwise converting to a string could stack overflow).
|
||||||
|
if (context->heap[index1].type == T_CONCAT && context->heap[index2].type == T_CONCAT) {
|
||||||
|
ScriptHeapEntryConcatConvertToString(context, bytes1 < bytes2 ? &context->heap[index1] : &context->heap[index2]);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO Handle memory allocation failures here.
|
|
||||||
uintptr_t index = HeapAllocate(context);
|
|
||||||
context->heap[index].type = T_STR;
|
|
||||||
context->heap[index].bytes = bytes1 + bytes2;
|
|
||||||
context->heap[index].text = (char *) AllocateResize(NULL, context->heap[index].bytes);
|
|
||||||
if (bytes1) MemoryCopy(context->heap[index].text + 0, text1, bytes1);
|
|
||||||
if (bytes2) MemoryCopy(context->heap[index].text + bytes1, text2, bytes2);
|
|
||||||
context->c->stack[context->c->stackPointer - 2].i = index;
|
context->c->stack[context->c->stackPointer - 2].i = index;
|
||||||
|
|
||||||
context->c->stackPointer--;
|
context->c->stackPointer--;
|
||||||
} else if (command == T_INTERPOLATE_STR || command == T_INTERPOLATE_BOOL
|
} else if (command == T_INTERPOLATE_STR || command == T_INTERPOLATE_BOOL
|
||||||
|| command == T_INTERPOLATE_INT || command == T_INTERPOLATE_FLOAT
|
|| command == T_INTERPOLATE_INT || command == T_INTERPOLATE_FLOAT
|
||||||
|
@ -5122,7 +5210,7 @@ int ExternalPersistWrite(ExecutionContext *context, Value *returnValue) {
|
||||||
HeapEntry *entry = &context->heap[context->globalVariables[k].i];
|
HeapEntry *entry = &context->heap[context->globalVariables[k].i];
|
||||||
const char *text;
|
const char *text;
|
||||||
size_t bytes;
|
size_t bytes;
|
||||||
ScriptHeapEntryToString(entry, &text, &bytes);
|
ScriptHeapEntryToString(context, entry, &text, &bytes);
|
||||||
uint32_t variableDataLength = bytes;
|
uint32_t variableDataLength = bytes;
|
||||||
fwrite(&variableDataLength, 1, sizeof(uint32_t), f);
|
fwrite(&variableDataLength, 1, sizeof(uint32_t), f);
|
||||||
fwrite(scope->entries[j]->token.text, 1, variableNameLength, f);
|
fwrite(scope->entries[j]->token.text, 1, variableNameLength, f);
|
||||||
|
|
Loading…
Reference in New Issue