mirror of https://gitlab.com/nakst/essence
1252 lines
43 KiB
1252 lines
43 KiB
// This file is part of the Essence operating system.
// It is released under the terms of the MIT license -- see LICENSE.md.
// Written by: nakst.
// TODO Kernel driver:
// Extent allocation algorithm.
// TODO Design:
// Meta/flex block groups.
// Journal.
// Inline b-tree.
// Further data indirection.
// Hash collisions. (Probably just remove index and enumerate directory contents instead?)
#ifndef KERNEL
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#ifndef _BSD_SOURCE
#define _BSD_SOURCE
#ifndef OS_ESSENCE
typedef struct EsUniqueIdentifier {
uint8_t d[16];
} EsUniqueIdentifier;
#define ESFS_BOOT_SUPER_BLOCK_SIZE (8192) // The bootloader and superblock take up 16KB.
#define ESFS_DRIVE_MINIMUM_SIZE (1048576) // The minimum drive size that can be formatted.
#define ESFS_DRIVER_VERSION (10) // The current driver version.
#define ESFS_MAXIMUM_VOLUME_NAME_LENGTH (32) // The volume name limit.
#define ESFS_CORE_NODE_KERNEL (0) // The kernel core node.
#define ESFS_CORE_NODE_ROOT (1) // The root directory core node.
#define ESFS_CORE_NODE_COUNT (2) // The number of core nodes.
#define ESFS_SIGNATURE_STRING ("!EssenceFS2-----") // The signature in the superblock.
#define ESFS_DIRECTORY_ENTRY_SIGNATURE ("DirEntry") // The signature in directory entries.
#define ESFS_GROUP_DESCRIPTOR_SIGNATURE ("GDTE") // The signature in a group descriptor.
#define ESFS_INDEX_VERTEX_SIGNATURE ("INXE") // The signature in a index vertex.
#define ESFS_NODE_TYPE_FILE (1) // DirectoryEntry.nodeType: a file.
#define ESFS_NODE_TYPE_DIRECTORY (2) // DirectoryEntry.nodeType: a directory.
#define ESFS_ATTRIBUTE_DATA (1) // Contains the data of the file, or a list of DirectoryEntries.
#define ESFS_ATTRIBUTE_FILENAME (2) // The UTF-8 filename.
#define ESFS_ATTRIBUTE_DIRECTORY (3) // Additional information about the directory.
#define ESFS_INDIRECTION_DIRECT (1) // The data is stored in the attribute.
#define ESFS_INDIRECTION_L1 (2) // The attribute contains a extent list that points to the data.
#define ESFS_INDEX_MAX_DEPTH (16) // The maximum depth of the index tree. I'd be surprised if this gets past 8.
#define ESFS_VERTEX_KEY(vertex, key) ((IndexKey *) ((uint8_t *) vertex + vertex->offset) + key)
typedef struct Attribute {
/* 0 */ uint16_t type; // Attribute type.
/* 2 */ uint16_t size; // The size in bytes. Must be 8 byte aligned.
} Attribute;
typedef struct AttributeFilename {
/* 0 */ uint16_t type; // ESFS_ATTRIBUTE_FILENAME.
/* 2 */ uint16_t size; // The size in bytes. Must be 8 byte aligned.
/* 4 */ uint16_t length; // The length of the filename in bytes.
/* 6 */ uint16_t _unused; // Unused.
#define ESFS_FILENAME_HEADER_SIZE (8) // The size of the header of a AttributeFilename.
/* 8 */ uint8_t filename[1]; // The UTF-8 filename.
} AttributeFilename;
typedef struct AttributeDirectory {
/* 0 */ uint16_t type; // ESFS_ATTRIBUTE_DIRECTORY.
/* 2 */ uint16_t size; // The size in bytes. Must be 8 byte aligned.
/* 4 */ uint8_t _unused0[4];
/* 8 */ uint64_t childNodes; // The number of child nodes in the directory.
/* 16 */ uint64_t indexRootBlock; // The block containing the root IndexVertex for the directory.
/* 24 */ uint64_t totalSize; // The sum of sizes of all the directory's children in bytes.
} AttributeDirectory;
typedef struct AttributeData {
/* 0 */ uint16_t type; // ESFS_ATTRIBUTE_DATA.
/* 2 */ uint16_t size; // The size in bytes. Must be 8 byte aligned.
/* 4 */ uint8_t indirection; // The indirection used to access the data.
/* 5 */ uint8_t dataOffset; // The offset into the attribute where the data or extent list can be found.
/* 6 */ uint16_t count; // The number of data bytes in the attribute, or extents in the list.
/* 8 */ uint64_t _unused[3]; // Unused.
#define ESFS_DATA_OFFSET (32)
/* 32 */ uint8_t data[1]; // The data or extent list.
// Format of each extent in the extent list:
// uint8_t offsetSize : 3, countSize : 3, unused : 2; // The size of the offset and count fields in bytes - 1.
// uint8_t offset[offsetSize + 1]; // The first block in the extent, expressed as a signed offset from the start
// of the previous extent in the list, or from 0 for the first extent. Big endian.
// uint8_t count[countSize + 1]; // The number of blocks encompassed by the extent. Big endian.
} AttributeData;
typedef struct DirectoryEntry {
/* 0 */ char signature[8]; // Must be ESFS_DIRECTORY_ENTRY_SIGNATURE.
/* 8 */ EsUniqueIdentifier identifier; // Identifier of the node.
/* 24 */ uint32_t checksum; // CRC-32 checksum of DirectoryEntry.
/* 28 */ uint16_t attributeOffset; // Offset to the first attribute.
/* 30 */ uint8_t nodeType; // Node type.
/* 31 */ uint8_t attributeCount; // The number of attributes in the list.
/* 32 */ uint64_t creationTime, accessTime, modificationTime; // Timekeeping. In microseconds since 1st January 1970.
/* 56 */ uint64_t fileSize; // The amount of data referenced by the data attribute in bytes.
/* 64 */ EsUniqueIdentifier parent; // Identifier of the parent directory.
/* 80 */ EsUniqueIdentifier contentType; // Identifier of the file content type.
/* 96 */ uint8_t attributes[1024 - ESFS_ATTRIBUTE_OFFSET]; // Attribute list.
} DirectoryEntry;
typedef struct GroupDescriptor {
/* 0 */ char signature[4]; // Must be ESFS_GROUP_DESCRIPTOR_SIGNATURE.
/* 4 */ uint32_t blocksUsed; // The number of used blocks in the group.
/* 8 */ uint64_t blockBitmap; // The bitmap indicating which blocks in the group are used.
/* 16 */ uint32_t bitmapChecksum; // CRC-32 checksum of the bitmap.
/* 20 */ uint32_t checksum; // CRC-32 checksum of this descriptor.
/* 24 */ uint32_t largestExtent; // The largest number of contiguous blocks.
/* 28 */ uint32_t _unused[7]; // Unused.
} GroupDescriptor;
typedef struct DirectoryEntryReference {
/* 0 */ uint64_t block; // The block containing the directory entry.
/* 8 */ uint32_t offsetIntoBlock; // Offset into the block to find the directory entry.
/* 12 */ uint32_t _unused; // Unused.
} DirectoryEntryReference;
typedef struct IndexKey {
/* 0 */ uint64_t value; // The CRC-64 hashed node path. Ignored for the +1 vertex (assumed to be maximum possible).
/* 8 */ uint64_t child; // The block containing the child IndexVertex. Set to 0 for a leaf.
// All keys in the child should be less than this key.
// This is the only valid field in the +1 key.
/* 16 */ DirectoryEntryReference data; // The directory entry this key refers to.
} IndexKey;
typedef struct IndexVertex {
/* 0 */ char signature[4]; // Must be ESFS_INDEX_VERTEX_SIGNATURE.
/* 4 */ uint32_t checksum; // CRC-32 checksum of IndexVertex.
/* 8 */ uint16_t offset; // Offset to the first IndexKey.
/* 10 */ uint16_t count; // The number of IndexKeys, (superblock.blockSize - this.offset) / sizeof(IndexKey).
/* 12 */ uint16_t maxCount; // The maximum number of IndexKeys that can fit in the vertex.
/* 14 */ uint16_t _unused0; // Unused.
/* 16 */ uint64_t _unused1[2]; // Unused.
/* 32 */ IndexKey keys[1]; // There are this.count keys.
} IndexVertex;
typedef struct Superblock {
/* 0 */ char signature[16]; // The filesystem signature; should be ESFS_SIGNATURE_STRING.
/* 16 */ char volumeName[ESFS_MAXIMUM_VOLUME_NAME_LENGTH]; // The name of the volume.
/* 48 */ uint16_t requiredReadVersion; // If this is greater than the driver's version, then the filesystem cannot be read.
/* 50 */ uint16_t requiredWriteVersion; // If this is greater than the driver's version, then the filesystem cannot be written.
/* 52 */ uint32_t checksum; // CRC-32 checksum of Superblock.
/* 56 */ uint8_t mounted; // Non-zero to indicate that the volume is mounted, or was not properly unmounted.
/* 57 */ uint8_t _unused2[7];
/* 64 */ uint64_t blockSize; // The size of a block on the volume.
/* 72 */ uint64_t blockCount; // The number of blocks on the volume.
/* 80 */ uint64_t blocksUsed; // The number of blocks that are in use.
/* 88 */ uint32_t blocksPerGroup; // The number of blocks in a group.
/* 92 */ uint8_t _unused3[4];
/* 96 */ uint64_t groupCount; // The number of groups on the volume.
/* 104 */ uint64_t blocksPerGroupBlockBitmap; // The number of blocks used to a store a group's block bitmap.
/* 112 */ uint64_t gdtFirstBlock; // The first block in the group descriptor table.
/* 120 */ uint64_t directoryEntriesPerBlock; // The number of directory entries in a block.
/* 128 */ uint64_t _unused0; // Unused.
/* 136 */ EsUniqueIdentifier identifier; // The unique identifier for the volume.
/* 152 */ EsUniqueIdentifier osInstallation; // The unique identifier of the Essence installation this volume was made for. All zero for a non-installation volume.
/* 168 */ EsUniqueIdentifier nextIdentifier; // The identifier to give to the next created file.
/* 184 */ DirectoryEntryReference kernel; // The kernel. For convenient access by the bootloader.
/* 200 */ DirectoryEntryReference root; // The root directory.
/* 216 */ uint8_t _unused1[8192 - 216]; // Unused.
} Superblock;
uint64_t EncodeExtent(uint64_t extentStart, uint64_t previousExtentStart, uint64_t extentCount, uint8_t *encode) {
int64_t relativeStart = (int64_t) (extentStart - previousExtentStart);
uint64_t absoluteRelativeStart = (uint64_t) (relativeStart < 0 ? -relativeStart : relativeStart);
uint8_t startBytes =
absoluteRelativeStart < 0x80 ? 1
: absoluteRelativeStart < 0x8000 ? 2
: absoluteRelativeStart < 0x800000 ? 3
: absoluteRelativeStart < 0x80000000 ? 4
: absoluteRelativeStart < 0x8000000000 ? 5
: absoluteRelativeStart < 0x800000000000 ? 6
: absoluteRelativeStart < 0x80000000000000 ? 7
: 8;
uint8_t countBytes =
extentCount < 0x80 ? 1
: extentCount < 0x8000 ? 2
: extentCount < 0x800000 ? 3
: extentCount < 0x80000000 ? 4
: extentCount < 0x8000000000 ? 5
: extentCount < 0x800000000000 ? 6
: extentCount < 0x80000000000000 ? 7
: 8;
uint64_t position = 0;
encode[position++] = (startBytes - 1) + ((countBytes - 1) << 3);
for (int i = 0; i < startBytes; i++, position++) encode[position] = (uint8_t) (relativeStart >> ((startBytes - 1 - i) * 8));
for (int i = 0; i < countBytes; i++, position++) encode[position] = (uint8_t) (extentCount >> ((countBytes - 1 - i) * 8));
#if 0
Log("encode: %d/%d --> ", startBytes, countBytes);
for (unsigned i = 0; i < position; i++) {
Log("%.2X ", (uint32_t) encode[i]);
return position;
bool DecodeExtent(uint64_t *previousExtentStart, uint64_t *extentCount, uint8_t *extents, uint64_t *position, uint64_t end) {
uint64_t extentStart = 0;
*extentCount = 0;
if (*position == end) return false;
uint8_t header = extents[*position];
*position = *position + 1;
uint8_t startBytes = ((header >> 0) & 7) + 1;
uint8_t countBytes = ((header >> 3) & 7) + 1;
bool negative = false;
for (uint8_t i = 0; i < startBytes; i++) {
if (*position == end) return false;
uint8_t byte = extents[*position];
if (!i) negative = byte & 0x80;
extentStart <<= 8;
extentStart += byte;
*position = *position + 1;
for (uint8_t i = 0; i < countBytes; i++) {
if (*position == end) return false;
*extentCount = *extentCount << 8;
*extentCount = *extentCount + extents[*position];
*position = *position + 1;
if (negative) {
for (uint64_t i = startBytes; i < sizeof(uint64_t) / sizeof(uint8_t); i++) {
extentStart |= (uint64_t) 0xFF << (i * 8);
*previousExtentStart = *previousExtentStart + extentStart;
return true;
#ifndef KERNEL
uint64_t blockSize;
Superblock superblock;
GroupDescriptor *groupDescriptorTable;
uint64_t copiedCount;
bool ReadBlock(uint64_t block, uint64_t count, void *buffer);
bool WriteBlock(uint64_t block, uint64_t count, void *buffer);
bool WriteBytes(uint64_t offset, uint64_t count, void *buffer);
bool ReadDirectoryEntryReference(DirectoryEntryReference reference, DirectoryEntry *entry) {
uint8_t buffer[superblock.blockSize];
if (!ReadBlock(reference.block, 1, buffer)) {
return false;
memcpy(entry, buffer + reference.offsetIntoBlock, sizeof(DirectoryEntry));
return true;
bool WriteDirectoryEntryReference(DirectoryEntryReference reference, DirectoryEntry *entry) {
entry->checksum = 0;
entry->checksum = CalculateCRC32(entry, sizeof(DirectoryEntry), 0);
uint8_t buffer[superblock.blockSize];
if (ReadBlock(reference.block, 1, buffer)) {
memcpy(buffer + reference.offsetIntoBlock, entry, sizeof(DirectoryEntry));
return WriteBlock(reference.block, 1, buffer);
} else {
return false;
Attribute *FindAttribute(DirectoryEntry *entry, uint16_t type) {
Attribute *attribute = (Attribute *) ((uint8_t *) entry + entry->attributeOffset);
int count = 0;
while (attribute->type != type) {
attribute = (Attribute *) ((uint8_t *) attribute + attribute->size);
if (count++ == entry->attributeCount) {
Log("Could not find attribute %d.\n", type);
return attribute;
void GenerateUniqueIdentifier(EsUniqueIdentifier *u, bool random) {
if (random) {
for (int i = 0; i < 16; i++) {
u->d[i] = rand();
} else {
*u = superblock.nextIdentifier;
for (int i = 0; i < 16; i++) {
if (superblock.nextIdentifier.d[i]) break;
IndexKey *InsertKeyIntoVertex(uint64_t newKey, IndexVertex *vertex) {
// Find where in this vertex we should insert the key.
int position;
for (position = 0; position < vertex->count; position++) {
if (newKey < ESFS_VERTEX_KEY(vertex, position)->value) {
// Insert the key.
// Log("%d//%d\n", vertex->count, vertex->maxCount);
assert(vertex->count != vertex->maxCount);
IndexKey *insertionPosition = ESFS_VERTEX_KEY(vertex, position);
memmove(insertionPosition + 1, insertionPosition,
(vertex->count + 1 - position) * sizeof(IndexKey));
insertionPosition->value = newKey;
// Update the checksum.
vertex->checksum = 0;
vertex->checksum = CalculateCRC32(vertex, superblock.blockSize, 0);
return insertionPosition;
bool AllocateExtent(uint64_t increaseBlocks, uint64_t *extentStart, uint64_t *extentCount) {
// Log("used %ld/%ld, need %ld more\n", superblock.blocksUsed, superblock.blockCount, increaseBlocks);
// Find a group to allocate the next extent from.
GroupDescriptor *target = NULL;
for (uint64_t i = 0; !target && i < superblock.groupCount; i++) {
GroupDescriptor *group = groupDescriptorTable + i;
if (!group->blocksUsed) group->largestExtent = superblock.blocksPerGroup - superblock.blocksPerGroupBlockBitmap;
if (group->largestExtent >= increaseBlocks) target = group;
for (uint64_t i = 0; !target && i < superblock.groupCount; i++) {
GroupDescriptor *group = groupDescriptorTable + i;
if (superblock.blocksPerGroup - group->blocksUsed >= increaseBlocks) target = group;
for (uint64_t i = 0; !target && i < superblock.groupCount; i++) {
GroupDescriptor *group = groupDescriptorTable + i;
if (superblock.blocksPerGroup != group->blocksUsed) target = group;
if (!target) {
Log("Out of space.\n");
// Load the bitmap, find the largest extent, and mark it as in use.
uint8_t bitmap[superblock.blocksPerGroupBlockBitmap * superblock.blockSize];
if (target->blockBitmap) {
if (!ReadBlock(target->blockBitmap, superblock.blocksPerGroupBlockBitmap, bitmap)) {
return false;
} else {
memset(bitmap, 0, superblock.blocksPerGroupBlockBitmap * superblock.blockSize);
for (uint64_t i = 0; i < superblock.blocksPerGroupBlockBitmap; i++) bitmap[i / 8] |= 1 << (i % 8);
target->blockBitmap = superblock.blocksPerGroup * (target - groupDescriptorTable);
target->blocksUsed = superblock.blocksPerGroupBlockBitmap;
uint64_t largestExtentStart = 0, largestExtentCount = 0, i = 0;
while (i < superblock.blocksPerGroup) {
if (bitmap[i / 8] & (1 << (i % 8))) {
} else {
uint64_t start = i, count = 0;
while (i < superblock.blocksPerGroup) {
if (bitmap[i / 8] & (1 << (i % 8))) break;
else count++, i++;
if (largestExtentCount < count) {
largestExtentStart = start;
largestExtentCount = count;
assert(largestExtentCount == target->largestExtent);
*extentStart = largestExtentStart;
*extentCount = largestExtentCount;
if (*extentCount > increaseBlocks) {
*extentCount = increaseBlocks;
for (uint64_t i = *extentStart; i < *extentStart + *extentCount; i++) {
bitmap[i / 8] |= 1 << (i % 8);
uint64_t largestExtentCount = 0, i = 0;
while (i < superblock.blocksPerGroup) {
if (bitmap[i / 8] & (1 << (i % 8))) {
} else {
uint64_t count = 0;
while (i < superblock.blocksPerGroup) {
if (bitmap[i / 8] & (1 << (i % 8))) break;
else count++, i++;
if (largestExtentCount < count) {
largestExtentCount = count;
target->blocksUsed += *extentCount;
target->largestExtent = largestExtentCount;
assert(superblock.blocksPerGroup == target->blocksUsed || target->largestExtent);
target->bitmapChecksum = CalculateCRC32(bitmap, sizeof(bitmap), 0);
target->checksum = 0;
target->checksum = CalculateCRC32(target, sizeof(GroupDescriptor), 0);
if (!WriteBlock(target->blockBitmap, superblock.blocksPerGroupBlockBitmap, bitmap)) {
return false;
*extentStart = *extentStart + (target - groupDescriptorTable) * superblock.blocksPerGroup;
superblock.blocksUsed += *extentCount;
// Log("allocate extent: %ld -> %ld (for %ld)\n", extentStart, extentStart + extentCount, increaseBlocks);
return true;
bool AccessNode(DirectoryEntry *node, void *buffer, uint64_t offsetIntoFile, uint64_t totalCount, DirectoryEntryReference *reference, bool read) {
if (!totalCount) return true;
AttributeData *dataAttribute = (AttributeData *) FindAttribute(node, ESFS_ATTRIBUTE_DATA);
if (dataAttribute->indirection == ESFS_INDIRECTION_DIRECT && read) {
memcpy(buffer, dataAttribute->data + offsetIntoFile, totalCount);
return true;
} else if (dataAttribute->indirection == ESFS_INDIRECTION_DIRECT && !read) {
memcpy(dataAttribute->data + offsetIntoFile, buffer, totalCount);
return true;
assert(dataAttribute->indirection == ESFS_INDIRECTION_L1);
int decoded = -1;
// Log("\twrite %ld bytes at %ld\n", totalCount, offsetIntoFile);
uint64_t block = offsetIntoFile / superblock.blockSize;
uint64_t offset = offsetIntoFile % superblock.blockSize;
uint64_t count = superblock.blockSize - offset;
if (totalCount < count) count = totalCount;
// Log("block: %ld, offset: %ld, count: %ld\n", block, offset, count);
// Find the extent.
uint8_t *extents = ((uint8_t *) dataAttribute + dataAttribute->dataOffset);
uint64_t position = 0, blockInFile = 0, extentStart = 0;
assert(dataAttribute->count || !node->fileSize);
bool found = false;
for (uint64_t i = 0; i < dataAttribute->count; i++) {
uint64_t extentCount = 0;
DecodeExtent(&extentStart, &extentCount, extents, &position, dataAttribute->size - dataAttribute->dataOffset);
if (decoded < (int) i) {
decoded = i;
if (blockInFile + extentCount > block) {
uint64_t offsetIntoExtent = block - blockInFile;
block = extentStart + offsetIntoExtent;
found = true;
blockInFile += extentCount;
uint8_t blockBuffer[superblock.blockSize];
if (read || count != superblock.blockSize) {
if (!ReadBlock(block, 1, blockBuffer)) {
return false;
if (read) {
memcpy(buffer, blockBuffer + offset, count);
} else {
memcpy(blockBuffer + offset, buffer, count);
if (!WriteBlock(block, 1, blockBuffer)) {
return false;
if (reference) {
reference->block = block;
reference->offsetIntoBlock = offset;
totalCount -= count;
if (totalCount) {
offsetIntoFile += count;
buffer = (uint8_t *) buffer + count;
goto next;
return true;
bool ResizeNode(DirectoryEntry *entry, uint64_t newSize) {
assert(newSize >= entry->fileSize);
AttributeData *dataAttribute = (AttributeData *) FindAttribute(entry, ESFS_ATTRIBUTE_DATA);
if (newSize < (uint64_t) (dataAttribute->size - dataAttribute->dataOffset) && entry->nodeType == ESFS_NODE_TYPE_FILE) {
dataAttribute->indirection = ESFS_INDIRECTION_DIRECT;
dataAttribute->count = entry->fileSize = newSize;
return true;
// Log("\tresize to %lu\n", newSize);
dataAttribute->indirection = ESFS_INDIRECTION_L1;
uint64_t oldSize = entry->fileSize;
uint64_t oldBlocks = (oldSize + superblock.blockSize - 1) / superblock.blockSize;
uint64_t newBlocks = (newSize + superblock.blockSize - 1) / superblock.blockSize;
entry->fileSize = newSize;
if (oldBlocks == newBlocks) {
// Do nothing.
} else if (oldBlocks < newBlocks) {
uint64_t increaseBlocks = newBlocks - oldBlocks;
uint64_t previousExtentStart = 0, previousExtentCount = 0, previousExtentStart2 = 0;
uint8_t *extents = ((uint8_t *) dataAttribute + dataAttribute->dataOffset);
uint64_t offsetIntoExtentList = 0;
uint64_t previousOffsetIntoExtentList = 0;
for (uint64_t i = 0; i < dataAttribute->count; i++) {
previousOffsetIntoExtentList = offsetIntoExtentList;
previousExtentStart2 = previousExtentStart;
DecodeExtent(&previousExtentStart, &previousExtentCount, extents, &offsetIntoExtentList, dataAttribute->size - dataAttribute->dataOffset);
// Log("%ld/%ld/%ld\n", previousExtentStart, extentCount, offsetIntoExtentList);
assert(previousExtentStart < superblock.blockCount);
while (increaseBlocks) {
uint64_t extentStart, extentCount, encodedLength;
uint8_t encode[32];
if (!AllocateExtent(increaseBlocks, &extentStart, &extentCount)) {
return false;
if (extentStart == previousExtentStart + previousExtentCount) {
offsetIntoExtentList = previousOffsetIntoExtentList;
encodedLength = EncodeExtent(previousExtentStart, previousExtentStart2, extentCount + previousExtentCount, encode);
} else {
encodedLength = EncodeExtent(extentStart, previousExtentStart, extentCount, encode);
// Log("\t@%ld alloc %ld, %ld\n", offsetIntoExtentList, extentStart, extentCount);
// Log("%ld vs %ld\n", offsetIntoExtentList + encodedLength, (uint64_t) (dataAttribute->size - dataAttribute->dataOffset));
if (offsetIntoExtentList + encodedLength > (uint64_t) (dataAttribute->size - dataAttribute->dataOffset)) {
Log("Unimplemented - indirection past L1.\n");
memcpy(extents + offsetIntoExtentList, encode, encodedLength);
offsetIntoExtentList += encodedLength;
increaseBlocks -= extentCount;
previousExtentStart = extentStart;
} else {
Log("Unimplemented - node truncation.\n");
return true;
#if 0
void PrintTree(uint64_t block, int indent = 2, uint64_t lowerThan = -1) {
if (!block) return;
uint8_t buffer[superblock.blockSize];
ReadBlock(block, 1, buffer);
IndexVertex *node = (IndexVertex *) buffer;
const char *spaces = "| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ";
Log("%.*snode with %d+1 keys [%p]\n", indent, spaces, node->count, node);
for (uint64_t i = 0; i <= node->count; i++) {
if (i && i != node->count && node->keys[i].value <= node->keys[i - 1].value) {
Log("%.*s %lu VIOLATION [1]\n", indent, spaces, i);
uint64_t next = lowerThan;
if (i == node->count) {
Log("%.*s %lu last key\n", indent, spaces, i);
} else {
if (node->keys[i].value >= lowerThan) {
Log("%.*s %lu VIOLATION [2]\n", indent, spaces, i);
Log("%.*s %lu key = %lu\n", indent, spaces, i, node->keys[i].value);
next = node->keys[i].value;
PrintTree(node->keys[i].child, indent + 4, next);
void NewDirectoryEntry(DirectoryEntry *entry, uint8_t nodeType, EsUniqueIdentifier parentUID, const char *name) {
memcpy(entry->signature, ESFS_DIRECTORY_ENTRY_SIGNATURE, 8);
GenerateUniqueIdentifier(&entry->identifier, false);
entry->attributeOffset = ESFS_ATTRIBUTE_OFFSET;
entry->nodeType = nodeType;
entry->parent = parentUID;
uint8_t *position = (uint8_t *) entry + entry->attributeOffset;
size_t newFilenameSize = ((strlen(name) + ESFS_FILENAME_HEADER_SIZE - 1) & ~7) + 8; // Size of name + size of header, rounded up to the nearest 8 bytes.
AttributeDirectory *directory = (AttributeDirectory *) position;
directory->size = sizeof(AttributeDirectory);
directory->indexRootBlock = 0;
directory->totalSize = 0;
position += directory->size;
AttributeData *data = (AttributeData *) position;
data->size = sizeof(DirectoryEntry) - newFilenameSize - (position - (uint8_t *) entry);
data->dataOffset = ESFS_DATA_OFFSET;
position += data->size;
AttributeFilename *filename = (AttributeFilename *) position;
filename->size = newFilenameSize;
filename->length = strlen(name);
memcpy(filename->filename, name, filename->length);
position += filename->size;
assert(position == (uint8_t *) (entry + 1));
entry->checksum = 0;
entry->checksum = CalculateCRC32(entry, sizeof(DirectoryEntry), 0);
bool AddNode(const char *name, uint8_t nodeType, DirectoryEntry *outputEntry, DirectoryEntryReference *outputReference,
DirectoryEntryReference directoryReference) {
// Log("add %s to %s\n", name, path);
// Step 1: Resize the directory so that it can fit another directory entry.
DirectoryEntry directory;
if (!ReadDirectoryEntryReference(directoryReference, &directory)) return false;
AttributeData *dataAttribute = (AttributeData *) FindAttribute(&directory, ESFS_ATTRIBUTE_DATA);
AttributeDirectory *directoryAttribute = (AttributeDirectory *) FindAttribute(&directory, ESFS_ATTRIBUTE_DIRECTORY);
assert(dataAttribute->indirection == ESFS_INDIRECTION_L1);
if (!(directoryAttribute->childNodes % superblock.directoryEntriesPerBlock)) {
// Log("increasing directory to fit %ld entries\n========={\n", (directory.fileSize + superblock.blockSize) / sizeof(DirectoryEntry));
if (!ResizeNode(&directory, directory.fileSize + superblock.blockSize)) return false;
// Log("========}\n");
if (!WriteDirectoryEntryReference(directoryReference, &directory)) {
return false;
// Step 2: Create the directory entry, and write it to the directory.
DirectoryEntryReference reference = {};
DirectoryEntry entry = {};
NewDirectoryEntry(&entry, nodeType, directory.identifier, name);
// Log("\tchild nodes: %ld\n", directoryAttribute->childNodes);
if (!AccessNode(&directory, &entry, (directoryAttribute->childNodes - 1) * sizeof(DirectoryEntry), sizeof(DirectoryEntry), &reference, false)) {
return false;
// Step 3: Add the node into the index.
uint64_t newKey = CalculateCRC64(name, strlen(name), 0);
// Log("adding file '%s' to index...\n", name);
// Find the leaf to insert the key into.
uint8_t buffer[superblock.blockSize];
memset(buffer, 0, superblock.blockSize);
IndexVertex *vertex = (IndexVertex *) buffer;
uint64_t depth = 0, blocks[ESFS_INDEX_MAX_DEPTH] = { directoryAttribute->indexRootBlock };
if (blocks[0] == 0) {
// Directory is empty - create the root vertex.
uint64_t _unused;
if (!AllocateExtent(1, &directoryAttribute->indexRootBlock, &_unused)) {
return false;
blocks[0] = directoryAttribute->indexRootBlock;
vertex->maxCount = (superblock.blockSize - ESFS_INDEX_KEY_OFFSET) / sizeof(IndexKey) - 1 /* +1 key */;
vertex->offset = ESFS_INDEX_KEY_OFFSET;
memcpy(vertex->signature, ESFS_INDEX_VERTEX_SIGNATURE, 4);
// Log("rootBlock = %ld for %s\n", directoryAttribute->indexRootBlock, path);
} else {
if (!ReadBlock(blocks[0], 1, vertex)) {
return false;
// Log("start = %ld for %s\n", blocks[0], path);
while (true) {
for (int i = 0; i < vertex->count; i++) {
if (ESFS_VERTEX_KEY(vertex, i)->value == newKey) {
// The key is already in the tree.
Log("The file already exists.");
for (int i = 0; i <= vertex->count; i++) {
IndexKey *key = ESFS_VERTEX_KEY(vertex, i);
if ((i == vertex->count || newKey < key->value) && key->child) {
blocks[++depth] = key->child;
if (!ReadBlock(key->child, 1, vertex)) {
return false;
goto next;
// Insert the key into the vertex.
InsertKeyIntoVertex(newKey, vertex)->data = reference;
// While the vertex is full...
assert(vertex->count <= vertex->maxCount);
while (vertex->count == vertex->maxCount) {
// Log("\tsplit!\n");
char _buffer0[superblock.blockSize];
char _buffer1[superblock.blockSize];
memset(_buffer0, 0, superblock.blockSize);
memset(_buffer1, 0, superblock.blockSize);
// Create a new sibling.
uint64_t siblingBlock = 0, _unused;
if (!AllocateExtent(1, &siblingBlock, &_unused)) return false;
IndexVertex *sibling = (IndexVertex *) _buffer0;
sibling->maxCount = (superblock.blockSize - ESFS_INDEX_KEY_OFFSET) / sizeof(IndexKey) - 1 /* +1 key */;
sibling->offset = ESFS_INDEX_KEY_OFFSET;
memcpy(sibling->signature, ESFS_INDEX_VERTEX_SIGNATURE, 4);
// Load the parent vertex.
bool newRoot = !depth;
IndexVertex *parent = (IndexVertex *) _buffer1;
if (newRoot) {
// Create a new root block.
// Log("\t(new root)\n");
blocks[1] = blocks[0];
uint64_t _unused;
if (!AllocateExtent(1, &blocks[0], &_unused)) return false;
parent->maxCount = (superblock.blockSize - ESFS_INDEX_KEY_OFFSET) / sizeof(IndexKey) - 1 /* +1 key */;
parent->offset = ESFS_INDEX_KEY_OFFSET;
memcpy(parent->signature, ESFS_INDEX_VERTEX_SIGNATURE, 4);
// The superblock points to the new root, and the +1 key of the new root points to the old root.
// It has no other keys yet.
parent->keys[0].child = blocks[1];
directoryAttribute->indexRootBlock = blocks[0];
} else {
if (!ReadBlock(blocks[depth - 1], 1, parent)) {
return false;
IndexKey *parentKeys = (IndexKey *) ((uint8_t *) parent + parent->offset);
IndexKey *vertexKeys = (IndexKey *) ((uint8_t *) vertex + vertex->offset);
IndexKey *siblingKeys = (IndexKey *) ((uint8_t *) sibling + sibling->offset);
// Change the link to this vertex to point to the sibling.
int found = 0;
for (uint64_t i = 0; i <= parent->count; i++) {
if (parentKeys[i].child == blocks[depth]) {
parentKeys[i].child = siblingBlock;
assert(found == 1);
// Move the median key to the parent.
// If this makes the parent full we'll fix it next iteration.
uint64_t median = (vertex->maxCount - 1) / 2;
uint64_t newKey = vertexKeys[median].value;
for (uint64_t i = 0; i <= parent->count; i++) {
if (i == parent->count || newKey < parentKeys[i].value) {
memmove(parentKeys + i + 1, parentKeys + i, (++parent->count - i) * sizeof(IndexKey));
parentKeys[i].value = newKey;
parentKeys[i].data = vertexKeys[median].data;
parentKeys[i].child = blocks[depth];
// Move all keys above the median key to the new sibling.
sibling->count = vertex->count - median /*Kept in the node*/ - 1 /*Added to the parent*/;
vertex->count = median; // The data on the median key becomes the +1 key's data.
memcpy(siblingKeys, vertexKeys + median + 1, (sibling->count + 1) * sizeof(IndexKey));
// Write the blocks.
sibling->checksum = 0; sibling->checksum = CalculateCRC32(sibling, superblock.blockSize, 0);
vertex->checksum = 0; vertex->checksum = CalculateCRC32(vertex, superblock.blockSize, 0);
if (!WriteBlock(siblingBlock, 1, sibling)) return false;
if (!WriteBlock(blocks[depth], 1, vertex)) return false;
// Check if the parent vertex is full.
memcpy(vertex, parent, superblock.blockSize);
// Write the block.
vertex->checksum = 0; vertex->checksum = CalculateCRC32(vertex, superblock.blockSize, 0);
if (!WriteBlock(blocks[depth], 1, vertex)) return false;
if (outputEntry) *outputEntry = entry;
if (outputReference) *outputReference = reference;
// PrintTree(directoryAttribute->indexRootBlock);
if (!WriteDirectoryEntryReference(directoryReference, &directory)) {
return false;
return true;
bool MountVolume() {
// Read the superblock.
if (!ReadBlock(1, 1, &superblock)) {
return false;
if (superblock.mounted) {
Log("EsFS: Volume not unmounted, exiting...\n");
superblock.mounted = 1;
if (!WriteBlock(1, 1, &superblock)) {
return false;
blockSize = superblock.blockSize;
// Read the group descriptor table.
groupDescriptorTable = (GroupDescriptor *) malloc(superblock.groupCount * sizeof(GroupDescriptor) + superblock.blockSize - 1);
if (!ReadBlock(superblock.gdtFirstBlock, (superblock.groupCount * sizeof(GroupDescriptor) + superblock.blockSize - 1) / superblock.blockSize, groupDescriptorTable)) {
return false;
return true;
void UnmountVolume() {
WriteBlock(superblock.gdtFirstBlock, (superblock.groupCount * sizeof(GroupDescriptor) + superblock.blockSize - 1) / superblock.blockSize, groupDescriptorTable);
superblock.mounted = 0;
superblock.checksum = 0;
superblock.checksum = CalculateCRC32(&superblock, sizeof(Superblock), 0);
WriteBlock(1, 1, &superblock);
bool FindNode(const char *cName, DirectoryEntry *node, DirectoryEntryReference directoryReference) {
DirectoryEntry directory;
if (!ReadDirectoryEntryReference(directoryReference, &directory)) return false;
AttributeDirectory *directoryAttribute = (AttributeDirectory *) FindAttribute(&directory, ESFS_ATTRIBUTE_DIRECTORY);
for (uintptr_t i = 0; i < directoryAttribute->childNodes; i++) {
if (!AccessNode(&directory, node, sizeof(DirectoryEntry) * i, sizeof(DirectoryEntry), NULL, true)) {
return false;
AttributeFilename *filename = (AttributeFilename *) FindAttribute(node, ESFS_ATTRIBUTE_FILENAME);
if (filename->length == strlen(cName) && 0 == memcmp(filename->filename, cName, filename->length)) {
return true;
Log("Could not find '%s'.\n", cName);
return false;
#if 0
void Read(char *target, DirectoryEntryReference parentDirectory) {
DirectoryEntryReference outputReference;
DirectoryEntry entry;
if (!FindNode(target, &entry, parentDirectory)) return;
char *data = (char *) malloc(entry.fileSize);
AccessNode(&entry, data, 0, entry.fileSize, &outputReference, true);
fwrite(data, 1, entry.fileSize, stdout);
typedef struct ImportNode {
const char *name, *path;
struct ImportNode *children;
bool isFile;
} ImportNode;
int64_t Import(ImportNode node, DirectoryEntryReference parentDirectory) {
uint64_t totalSize = 0;
for (uintptr_t i = 0; i < arrlenu(node.children); i++) {
if (node.children[i].isFile) {
size_t fileLength;
void *data = LoadFile(node.children[i].path, &fileLength);
if (!data) {
Log("Warning: Could not read file '%s'!\n", node.children[i].path);
} else {
copiedCount += fileLength;
DirectoryEntryReference reference;
DirectoryEntry entry;
if (!AddNode(node.children[i].name, ESFS_NODE_TYPE_FILE, &entry, &reference, parentDirectory)) {
return -1;
if (!ResizeNode(&entry, fileLength)) {
return -1;
totalSize += fileLength;
if (!AccessNode(&entry, data, 0, fileLength, NULL, false)) {
return -1;
if (!WriteDirectoryEntryReference(reference, &entry)) {
return -1;
} else {
DirectoryEntryReference reference;
if (!AddNode(node.children[i].name, ESFS_NODE_TYPE_DIRECTORY, NULL, &reference, parentDirectory)) return -1;
int64_t size = Import(node.children[i], reference);
if (size == -1) return -1;
DirectoryEntry directory;
if (!ReadDirectoryEntryReference(reference, &directory)) return -1;
AttributeDirectory *directoryAttribute = (AttributeDirectory *) FindAttribute(&directory, ESFS_ATTRIBUTE_DIRECTORY);
directoryAttribute->totalSize = size;
if (!WriteDirectoryEntryReference(reference, &directory)) return -1;
totalSize += size;
return totalSize;
bool Format(uint64_t driveSize, const char *volumeName, EsUniqueIdentifier osInstallation,
void *kernel, size_t kernelBytes) {
assert(sizeof(Superblock) == 8192);
if (driveSize < ESFS_DRIVE_MINIMUM_SIZE) {
Log("Error: Cannot create a drive of %d bytes (too small).\n", (int) driveSize);
if (strlen(volumeName) > ESFS_MAXIMUM_VOLUME_NAME_LENGTH) {
Log("Error: Volume name '%s' is too long; must be <= %d bytes.\n", volumeName, (int) ESFS_MAXIMUM_VOLUME_NAME_LENGTH);
// Format the volume.
memcpy(superblock.signature, ESFS_SIGNATURE_STRING, 16);
memcpy(superblock.volumeName, volumeName, strlen(volumeName));
superblock.requiredReadVersion = ESFS_DRIVER_VERSION;
superblock.requiredWriteVersion = ESFS_DRIVER_VERSION;
if (driveSize < 2048ll * 1024 * 1024) { // < 2GB
superblock.blockSize = 2048; // Must be >= sizeof(DirectoryEntry).
} else if (driveSize < 2ll * 1024 * 1024 * 1024 * 1024) { // < 2TB
superblock.blockSize = 4096;
} else if (driveSize < 256ll * 1024 * 1024 * 1024 * 1024) { // < 256TB
superblock.blockSize = 8192;
} else {
superblock.blockSize = 16384;
superblock.blockCount = driveSize / superblock.blockSize;
superblock.blocksPerGroup = 32768;
if (superblock.blockCount < superblock.blocksPerGroup) superblock.blocksPerGroup = superblock.blockCount / 2;
superblock.groupCount = (superblock.blockCount + superblock.blocksPerGroup - 1) / superblock.blocksPerGroup;
superblock.blocksPerGroupBlockBitmap = ((superblock.blocksPerGroup + 7) / 8 + superblock.blockSize - 1) / superblock.blockSize;
superblock.directoryEntriesPerBlock = superblock.blockSize / sizeof(DirectoryEntry);
uint64_t blockGDT = ESFS_BOOT_SUPER_BLOCK_SIZE * 2 / superblock.blockSize + 1;
uint64_t blockGroup0Bitmap = blockGDT + (superblock.groupCount * sizeof(GroupDescriptor) + superblock.blockSize - 1) / superblock.blockSize;
uint64_t blockCoreNodes = blockGroup0Bitmap + superblock.blocksPerGroupBlockBitmap;
uint64_t end = blockCoreNodes + ((ESFS_CORE_NODE_COUNT + superblock.directoryEntriesPerBlock - 1) / superblock.directoryEntriesPerBlock);
superblock.blocksUsed = end;
superblock.gdtFirstBlock = blockGDT;
GenerateUniqueIdentifier(&superblock.identifier, true);
superblock.osInstallation = osInstallation;
DirectoryEntry coreNodes[ESFS_CORE_NODE_COUNT] = {};
superblock.root.block = (blockCoreNodes * superblock.blockSize + sizeof(DirectoryEntry) * ESFS_CORE_NODE_ROOT) / superblock.blockSize;
superblock.root.offsetIntoBlock = (blockCoreNodes * superblock.blockSize + sizeof(DirectoryEntry) * ESFS_CORE_NODE_ROOT) % superblock.blockSize;
superblock.kernel.block = (blockCoreNodes * superblock.blockSize + sizeof(DirectoryEntry) * ESFS_CORE_NODE_KERNEL) / superblock.blockSize;
superblock.kernel.offsetIntoBlock = (blockCoreNodes * superblock.blockSize + sizeof(DirectoryEntry) * ESFS_CORE_NODE_KERNEL) % superblock.blockSize;
// Root directory.
DirectoryEntry *entry = coreNodes + ESFS_CORE_NODE_ROOT;
memcpy(entry->signature, ESFS_DIRECTORY_ENTRY_SIGNATURE, 8);
GenerateUniqueIdentifier(&entry->identifier, false);
entry->attributeOffset = ESFS_ATTRIBUTE_OFFSET;
entry->attributeCount = 2;
AttributeDirectory *directory = (AttributeDirectory *) entry->attributes;
directory->size = sizeof(AttributeDirectory);
AttributeData *data = (AttributeData *) ((uint8_t *) directory + directory->size);
data->size = sizeof(DirectoryEntry) - ESFS_ATTRIBUTE_OFFSET - directory->size;
data->indirection = ESFS_INDIRECTION_L1;
data->dataOffset = ESFS_ATTRIBUTE_OFFSET;
entry->checksum = CalculateCRC32(entry, sizeof(DirectoryEntry), 0);
if (!WriteBytes(blockCoreNodes * superblock.blockSize, sizeof(coreNodes), &coreNodes)) {
return false;
superblock.checksum = CalculateCRC32(&superblock, sizeof(Superblock), 0);
return false;
GroupDescriptor *buffer = (GroupDescriptor *) malloc(superblock.groupCount * sizeof(GroupDescriptor));
memset(buffer, 0, superblock.groupCount * sizeof(GroupDescriptor));
for (uintptr_t i = 0; i < superblock.groupCount; i++) {
memcpy(buffer[i].signature, ESFS_GROUP_DESCRIPTOR_SIGNATURE, 4);
buffer[i].largestExtent = superblock.blocksPerGroup - superblock.blocksPerGroupBlockBitmap;
if (i == 0) {
uint8_t firstGroupBitmap[superblock.blocksPerGroupBlockBitmap * superblock.blockSize];
memset(firstGroupBitmap, 0, superblock.blocksPerGroupBlockBitmap * superblock.blockSize);
for (uint64_t i = 0; i < superblock.blocksUsed; i++) firstGroupBitmap[i / 8] |= 1 << (i % 8);
buffer[i].blocksUsed = superblock.blocksUsed;
buffer[i].blockBitmap = blockGroup0Bitmap;
buffer[i].bitmapChecksum = CalculateCRC32(firstGroupBitmap, sizeof(firstGroupBitmap), 0);
buffer[i].largestExtent = superblock.blocksPerGroup - superblock.blocksUsed;
if (!WriteBytes(blockGroup0Bitmap * superblock.blockSize, sizeof(firstGroupBitmap), &firstGroupBitmap)) {
return false;
buffer[i].checksum = CalculateCRC32(buffer + i, sizeof(GroupDescriptor), 0);
if (!WriteBytes(superblock.gdtFirstBlock * superblock.blockSize, superblock.groupCount * sizeof(GroupDescriptor), buffer)) {
return false;
// Add the kernel.
if (MountVolume()) {
DirectoryEntryReference reference = superblock.kernel;
DirectoryEntry entry;
EsUniqueIdentifier unused = {};
NewDirectoryEntry(&entry, ESFS_NODE_TYPE_FILE, unused, "Kernel");
if (WriteDirectoryEntryReference(reference, &entry)) {
if (ResizeNode(&entry, kernelBytes)) {
if (AccessNode(&entry, kernel, 0, kernelBytes, NULL, false)) {
WriteDirectoryEntryReference(reference, &entry);
} else {
return false;
} else {
return false;
} else {
return false;
} else {
return false;
return true;