mirror of https://gitlab.com/nakst/essence
scripting engine optimised string concat
This commit is contained in:
parent
fbf63d6b13
commit
971d450760
124
util/script.c
124
util/script.c
|
@ -1,6 +1,3 @@
|
|||
// TODO StringJoin is extremely slow and uses a lot of memory.
|
||||
// Add a StringBuilder? Or maybe a T_CONCAT heap object?
|
||||
|
||||
// TODO Basic missing features:
|
||||
// - Other list operations: insert_many, delete, delete_many, delete_last.
|
||||
// - Maps: T[int], T[str].
|
||||
|
@ -164,10 +161,10 @@
|
|||
uint64_t _index ## stackIndex = context->c->stack[context->c->stackPointer - stackIndex].i; \
|
||||
if (context->heapEntriesAllocated <= _index ## stackIndex) return -1; \
|
||||
HeapEntry *_entry ## stackIndex = &context->heap[_index ## stackIndex]; \
|
||||
if (_entry ## stackIndex->type != T_EOF && _entry ## stackIndex->type != T_STR) return -1; \
|
||||
if (_entry ## stackIndex->type != T_EOF && _entry ## stackIndex->type != T_STR && _entry ## stackIndex->type != T_CONCAT) return -1; \
|
||||
const char *textVariable; \
|
||||
size_t bytesVariable; \
|
||||
ScriptHeapEntryToString(_entry ## stackIndex, &textVariable, &bytesVariable);
|
||||
ScriptHeapEntryToString(context, _entry ## stackIndex, &textVariable, &bytesVariable);
|
||||
#define STACK_POP_STRING(textVariable, bytesVariable) \
|
||||
STACK_READ_STRING(textVariable, bytesVariable, 1); \
|
||||
context->c->stackPointer--;
|
||||
|
@ -279,6 +276,11 @@ typedef struct HeapEntry {
|
|||
int64_t lambdaID;
|
||||
Value curryValue;
|
||||
};
|
||||
|
||||
struct {
|
||||
uint32_t concat1, concat2;
|
||||
size_t concatBytes;
|
||||
};
|
||||
};
|
||||
} HeapEntry;
|
||||
|
||||
|
@ -3015,6 +3017,7 @@ bool ASTGenerate(Tokenizer *tokenizer, Node *root, ExecutionContext *context) {
|
|||
// --------------------------------- Main script execution.
|
||||
|
||||
void HeapGarbageCollectMark(ExecutionContext *context, uintptr_t index) {
|
||||
start:;
|
||||
Assert(index < context->heapEntriesAllocated);
|
||||
if (context->heap[index].gcMark) return;
|
||||
context->heap[index].gcMark = true;
|
||||
|
@ -3033,6 +3036,19 @@ void HeapGarbageCollectMark(ExecutionContext *context, uintptr_t index) {
|
|||
HeapGarbageCollectMark(context, context->heap[index].list[i].i);
|
||||
}
|
||||
}
|
||||
} else if (context->heap[index].type == T_CONCAT) {
|
||||
uintptr_t index1 = context->heap[index].concat1;
|
||||
uintptr_t index2 = context->heap[index].concat2;
|
||||
|
||||
if (context->heap[index1].type == T_CONCAT) {
|
||||
HeapGarbageCollectMark(context, index2);
|
||||
index = index1;
|
||||
} else {
|
||||
HeapGarbageCollectMark(context, index1);
|
||||
index = index2;
|
||||
}
|
||||
|
||||
goto start;
|
||||
} else if (context->heap[index].type == T_OP_DISCARD || context->heap[index].type == T_OP_ASSERT) {
|
||||
HeapGarbageCollectMark(context, context->heap[index].lambdaID);
|
||||
} else if (context->heap[index].type == T_OP_CURRY) {
|
||||
|
@ -3054,7 +3070,8 @@ void HeapFreeEntry(ExecutionContext *context, uintptr_t i) {
|
|||
} else if (context->heap[i].type == T_LIST) {
|
||||
AllocateResize(context->heap[i].list, 0);
|
||||
} else if (context->heap[i].type == T_OP_DISCARD || context->heap[i].type == T_OP_ASSERT
|
||||
|| context->heap[i].type == T_FUNCPTR || context->heap[i].type == T_OP_CURRY) {
|
||||
|| context->heap[i].type == T_FUNCPTR || context->heap[i].type == T_OP_CURRY
|
||||
|| context->heap[i].type == T_CONCAT) {
|
||||
} else {
|
||||
Assert(false);
|
||||
}
|
||||
|
@ -3149,13 +3166,74 @@ void ScriptPrintNode(Node *node, int indent) {
|
|||
}
|
||||
}
|
||||
|
||||
void ScriptHeapEntryToString(HeapEntry *entry, const char **text, size_t *bytes) {
|
||||
size_t ScriptHeapEntryGetStringBytes(HeapEntry *entry) {
|
||||
if (entry->type == T_STR) {
|
||||
return entry->bytes;
|
||||
} else if (entry->type == T_EOF) {
|
||||
return 0;
|
||||
} else if (entry->type == T_CONCAT) {
|
||||
return entry->concatBytes;
|
||||
} else {
|
||||
Assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
void ScriptHeapEntryConcatConvertToStringWrite(ExecutionContext *context, HeapEntry *entry, char *buffer) {
|
||||
while (true) {
|
||||
if (entry->type == T_STR) {
|
||||
MemoryCopy(buffer, entry->text, entry->bytes);
|
||||
} else if (entry->type == T_EOF) {
|
||||
} else if (entry->type == T_CONCAT) {
|
||||
HeapEntry *part1 = &context->heap[entry->concat1], *part2 = &context->heap[entry->concat2];
|
||||
size_t part1Bytes = ScriptHeapEntryGetStringBytes(part1);
|
||||
|
||||
if (part1->type == T_CONCAT) {
|
||||
ScriptHeapEntryConcatConvertToStringWrite(context, part2, buffer + part1Bytes);
|
||||
Assert(part2->type != T_CONCAT);
|
||||
entry = part1;
|
||||
continue;
|
||||
} else if (part2->type == T_CONCAT) {
|
||||
ScriptHeapEntryConcatConvertToStringWrite(context, part1, buffer);
|
||||
Assert(part1->type != T_CONCAT);
|
||||
entry = part2;
|
||||
buffer += part1Bytes;
|
||||
continue;
|
||||
} else {
|
||||
ScriptHeapEntryConcatConvertToStringWrite(context, part1, buffer);
|
||||
ScriptHeapEntryConcatConvertToStringWrite(context, part2, buffer + part1Bytes);
|
||||
}
|
||||
} else {
|
||||
Assert(false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScriptHeapEntryConcatConvertToString(ExecutionContext *context, HeapEntry *entry) {
|
||||
// TODO Efficient concatenation of many strings.
|
||||
// TODO Preventing stack overflow.
|
||||
Assert(entry->type == T_CONCAT);
|
||||
HeapEntry *part1 = &context->heap[entry->concat1], *part2 = &context->heap[entry->concat2];
|
||||
size_t part1Bytes = ScriptHeapEntryGetStringBytes(part1), part2Bytes = ScriptHeapEntryGetStringBytes(part2);
|
||||
Assert(entry->concatBytes == part1Bytes + part2Bytes);
|
||||
entry->type = T_STR;
|
||||
entry->bytes = part1Bytes + part2Bytes;
|
||||
entry->text = AllocateResize(NULL, entry->bytes);
|
||||
ScriptHeapEntryConcatConvertToStringWrite(context, part1, entry->text);
|
||||
ScriptHeapEntryConcatConvertToStringWrite(context, part2, entry->text + part1Bytes);
|
||||
}
|
||||
|
||||
void ScriptHeapEntryToString(ExecutionContext *context, HeapEntry *entry, const char **text, size_t *bytes) {
|
||||
if (entry->type == T_STR) {
|
||||
*text = entry->text;
|
||||
*bytes = entry->bytes;
|
||||
} else if (entry->type == T_EOF) {
|
||||
*text = "";
|
||||
*bytes = 0;
|
||||
} else if (entry->type == T_CONCAT) {
|
||||
ScriptHeapEntryConcatConvertToString(context, entry);
|
||||
ScriptHeapEntryToString(context, entry, text, bytes);
|
||||
} else {
|
||||
Assert(false);
|
||||
}
|
||||
|
@ -3244,18 +3322,28 @@ int ScriptExecuteFunction(uintptr_t instructionPointer, ExecutionContext *contex
|
|||
context->c->stackIsManaged[context->c->stackPointer] = true;
|
||||
context->c->stack[context->c->stackPointer++] = v;
|
||||
} else if (command == T_CONCAT) {
|
||||
STACK_READ_STRING(text1, bytes1, 2);
|
||||
STACK_READ_STRING(text2, bytes2, 1);
|
||||
if (context->c->stackPointer < 2) return -1;
|
||||
uint64_t index1 = context->c->stack[context->c->stackPointer - 2].i;
|
||||
uint64_t index2 = context->c->stack[context->c->stackPointer - 1].i;
|
||||
if (!context->c->stackIsManaged[context->c->stackPointer - 2]) return -1;
|
||||
if (!context->c->stackIsManaged[context->c->stackPointer - 1]) return -1;
|
||||
if (context->heapEntriesAllocated <= index1) return -1;
|
||||
if (context->heapEntriesAllocated <= index2) return -1;
|
||||
Assert(index1 <= 0xFFFFFFFF && index2 <= 0xFFFFFFFF);
|
||||
size_t bytes1 = ScriptHeapEntryGetStringBytes(&context->heap[index1]);
|
||||
size_t bytes2 = ScriptHeapEntryGetStringBytes(&context->heap[index2]);
|
||||
uintptr_t index = HeapAllocate(context); // TODO Handle memory allocation failures here.
|
||||
context->heap[index].type = T_CONCAT;
|
||||
context->heap[index].concat1 = index1;
|
||||
context->heap[index].concat2 = index2;
|
||||
context->heap[index].concatBytes = bytes1 + bytes2;
|
||||
|
||||
// At most one argument can be a T_CONCAT (ohterwise converting to a string could stack overflow).
|
||||
if (context->heap[index1].type == T_CONCAT && context->heap[index2].type == T_CONCAT) {
|
||||
ScriptHeapEntryConcatConvertToString(context, bytes1 < bytes2 ? &context->heap[index1] : &context->heap[index2]);
|
||||
}
|
||||
|
||||
// TODO Handle memory allocation failures here.
|
||||
uintptr_t index = HeapAllocate(context);
|
||||
context->heap[index].type = T_STR;
|
||||
context->heap[index].bytes = bytes1 + bytes2;
|
||||
context->heap[index].text = (char *) AllocateResize(NULL, context->heap[index].bytes);
|
||||
if (bytes1) MemoryCopy(context->heap[index].text + 0, text1, bytes1);
|
||||
if (bytes2) MemoryCopy(context->heap[index].text + bytes1, text2, bytes2);
|
||||
context->c->stack[context->c->stackPointer - 2].i = index;
|
||||
|
||||
context->c->stackPointer--;
|
||||
} else if (command == T_INTERPOLATE_STR || command == T_INTERPOLATE_BOOL
|
||||
|| command == T_INTERPOLATE_INT || command == T_INTERPOLATE_FLOAT
|
||||
|
@ -5122,7 +5210,7 @@ int ExternalPersistWrite(ExecutionContext *context, Value *returnValue) {
|
|||
HeapEntry *entry = &context->heap[context->globalVariables[k].i];
|
||||
const char *text;
|
||||
size_t bytes;
|
||||
ScriptHeapEntryToString(entry, &text, &bytes);
|
||||
ScriptHeapEntryToString(context, entry, &text, &bytes);
|
||||
uint32_t variableDataLength = bytes;
|
||||
fwrite(&variableDataLength, 1, sizeof(uint32_t), f);
|
||||
fwrite(scope->entries[j]->token.text, 1, variableNameLength, f);
|
||||
|
|
Loading…
Reference in New Issue