// This file is part of the Essence operating system. // It is released under the terms of the MIT license -- see LICENSE.md. // Written by: nakst. // TODO Soft page faults. // TODO Paging file. // TODO Large pages. // TODO Locking memory. // TODO No execute permissions. // TODO NUMA? // TODO Cache coloring. #define GUARD_PAGES #ifndef IMPLEMENTATION // A contiguous range of addresses in an MMSpace. struct MMRegion { uintptr_t baseAddress; size_t pageCount; uint32_t flags; struct { union { struct { uintptr_t offset; } physical; struct { struct MMSharedRegion *region; uintptr_t offset; } shared; struct { struct FSFile *node; EsFileOffset offset; size_t zeroedBytes; uint64_t fileHandleFlags; } file; struct { RangeSet commit; // TODO Currently guarded by MMSpace::reserveMutex, maybe give it its own mutex? size_t commitPageCount; MMRegion *guardBefore, *guardAfter; } normal; }; KWriterLock pin; // Take SHARED when using the region in a system call. Take EXCLUSIVE to free the region. KMutex mapMutex; // Access the page tables for a specific region, e.g. to atomically check if a mapping exists and if not, create it. } data; union { struct { AVLItem itemBase; union { AVLItem itemSize; LinkedItem itemNonGuard; }; }; struct { bool used; } core; }; }; // A virtual address space. // One per process. struct MMSpace { MMArchVAS data; // Architecture specific data. AVLTree // Key = freeRegionsBase, // Base address freeRegionsSize, // Page count usedRegions; // Base address LinkedList usedRegionsNonGuard; KMutex reserveMutex; // Acquire to Access the region trees. volatile int32_t referenceCount; // One per CPU using the space, and +1 while the process is alive. // We don't bother tracking for kernelMMSpace. bool user; // Regions in the space may be accessed from userspace. uint64_t commit; // An *approximate* commit in pages. TODO Better memory usage tracking. uint64_t reserve; // The number of reserved pages. KAsyncTask removeAsyncTask; // The asynchronous task for deallocating the memory space once it's no longer in use. }; // A physical page of memory. struct MMPageFrame { volatile enum : uint8_t { // Frames that can't be used. UNUSABLE, // Not connected to RAM. BAD, // Connected to RAM with errors. TODO // Frames that aren't referenced. ZEROED, // Cleared with zeros. FREE, // Contains whatever data is had when it was freed. // Frames that are referenced by an invalid [shared] page mapping. // In shared regions, each invalid page mapping points to the shared page mapping. // This means only one variable must be updated to reuse the frame. STANDBY, // Data has been written to page file or backing file. // Frames that are referenced by one or more valid page mappings. ACTIVE, } state; volatile uint8_t flags; // The reference to this frame in a CCCachedSection. volatile uintptr_t *cacheReference; union { struct { // For STANDBY, MODIFIED, UPDATED, ZEROED and FREE. // The frame's position in a list. volatile uintptr_t next, *previous; } list; struct { // For ACTIVE. // For file pages, this tracks how many valid page table entries point to this frame. volatile uintptr_t references; } active; }; }; // A shared memory region. struct MMSharedRegion { size_t sizeBytes; volatile size_t handles; KMutex mutex; void *data; }; // An object pool, for fast allocation and deallocation of objects of constant size. // (There is no guarantee that the objects will be contiguous in memory.) struct Pool { void *Add(size_t elementSize); // Aligned to the size of a pointer. void Remove(void *element); size_t elementSize; void *cache[POOL_CACHE_COUNT]; size_t cacheEntries; KMutex mutex; }; // Physical memory manager state. struct PMM { MMPageFrame *pageFrames; bool pageFrameDatabaseInitialised; uintptr_t pageFrameDatabaseCount; uintptr_t firstFreePage; uintptr_t firstZeroedPage; uintptr_t firstStandbyPage, lastStandbyPage; Bitset freeOrZeroedPageBitset; // Used for allocating large pages. uintptr_t countZeroedPages, countFreePages, countStandbyPages, countActivePages; #define MM_REMAINING_COMMIT() (pmm.commitLimit - pmm.commitPageable - pmm.commitFixed) int64_t commitFixed, commitPageable, commitFixedLimit, commitLimit; // Acquire to: KMutex commitMutex, // (Un)commit pages. pageFrameMutex; // Allocate or free pages. KMutex pmManipulationLock; KSpinlock pmManipulationProcessorLock; void *pmManipulationRegion; Thread *zeroPageThread; KEvent zeroPageEvent; LinkedList objectCacheList; KMutex objectCacheListMutex; // Events for when the number of available pages is low. #define MM_AVAILABLE_PAGES() (pmm.countZeroedPages + pmm.countFreePages + pmm.countStandbyPages) KEvent availableCritical, availableLow; KEvent availableNotCritical; // Event for when the object cache should be trimmed. #define MM_OBJECT_CACHE_SHOULD_TRIM() (pmm.approximateTotalObjectCacheBytes / K_PAGE_SIZE > MM_OBJECT_CACHE_PAGES_MAXIMUM()) uintptr_t approximateTotalObjectCacheBytes; KEvent trimObjectCaches; // These variables will be cleared if the object they point to is removed. // See MMUnreserve and Scheduler::RemoveProcess. struct Process *nextProcessToBalance; MMRegion *nextRegionToBalance; uintptr_t balanceResumePosition; }; // See MMPhysicalAllocate. alignas(K_PAGE_SIZE) uint8_t earlyZeroBuffer[K_PAGE_SIZE]; // Constants. // MMArchMapPage. #define MM_MAP_PAGE_NOT_CACHEABLE (1 << 0) #define MM_MAP_PAGE_USER (1 << 1) #define MM_MAP_PAGE_OVERWRITE (1 << 2) #define MM_MAP_PAGE_COMMIT_TABLES_NOW (1 << 3) #define MM_MAP_PAGE_READ_ONLY (1 << 4) #define MM_MAP_PAGE_COPIED (1 << 5) #define MM_MAP_PAGE_NO_NEW_TABLES (1 << 6) #define MM_MAP_PAGE_FRAME_LOCK_ACQUIRED (1 << 7) #define MM_MAP_PAGE_WRITE_COMBINING (1 << 8) #define MM_MAP_PAGE_IGNORE_IF_MAPPED (1 << 9) // MMArchUnmapPages. #define MM_UNMAP_PAGES_FREE (1 << 0) #define MM_UNMAP_PAGES_FREE_COPIED (1 << 1) #define MM_UNMAP_PAGES_BALANCE_FILE (1 << 2) // MMPhysicalAllocate. // --> Moved to module.h. // MMHandlePageFault. #define MM_HANDLE_PAGE_FAULT_WRITE (1 << 0) #define MM_HANDLE_PAGE_FAULT_LOCK_ACQUIRED (1 << 1) #define MM_HANDLE_PAGE_FAULT_FOR_SUPERVISOR (1 << 2) // MMStandardAllocate - flags passed through into MMReserve. // --> Moved to module.h. // MMReserve - region types. #define MM_REGION_PHYSICAL (0x0100) // The region is mapped to device memory. #define MM_REGION_NORMAL (0x0200) // A normal region. #define MM_REGION_SHARED (0x0400) // The region's contents is shared via a MMSharedRegion. #define MM_REGION_GUARD (0x0800) // A guard region, to make sure we don't accidentally go into other regions. #define MM_REGION_CACHE (0x1000) // Used for the file cache. #define MM_REGION_FILE (0x2000) // A mapped file. #define MM_SHARED_ENTRY_PRESENT (1) // Forward declarations. bool MMHandlePageFault(MMSpace *space, uintptr_t address, unsigned flags); bool MMUnmapFilePage(uintptr_t frameNumber); // Returns true if the page became inactive. void MMSharedDestroyRegion(MMSharedRegion *region); // Public memory manager functions. void *MMMapPhysical(MMSpace *space, uintptr_t address, size_t bytes, uint64_t caching); void *MMStandardAllocate(MMSpace *space, size_t bytes, uint32_t flags, void *baseAddress, bool commitAll); bool MMFree(MMSpace *space, void *address, size_t expectedSize, bool userOnly); MMRegion *MMReserve(MMSpace *space, size_t bytes, unsigned flags, uintptr_t forcedAddress = 0, bool generateGuardPages = false); bool MMCommit(uint64_t bytes, bool fixed); void MMDecommit(uint64_t bytes, bool fixed); bool MMCommitRange(MMSpace *space, MMRegion *region, uintptr_t offset, size_t pages); bool MMDecommitRange(MMSpace *space, MMRegion *region, uintptr_t offset, size_t pages); uintptr_t MMPhysicalAllocate(unsigned flags, uintptr_t count, uintptr_t align, uintptr_t below); void MMInitialise(); MMSharedRegion *MMSharedCreateRegion(size_t sizeBytes, bool fixed, uintptr_t below); MMRegion *MMFindAndPinRegion(MMSpace *space, uintptr_t address, uintptr_t size); void MMUnpinRegion(MMSpace *space, MMRegion *region); void MMSpaceDestroy(MMSpace *space); bool MMSpaceInitialise(MMSpace *space); void MMPhysicalFree(uintptr_t page, bool mutexAlreadyAcquired, size_t count); void MMUnreserve(MMSpace *space, MMRegion *remove, bool unmapPages, bool guardRegion = false); MMRegion *MMFindRegion(MMSpace *space, uintptr_t address); void *MMMapFile(MMSpace *space, struct FSFile *node, EsFileOffset offset, size_t bytes, int protection, void *baseAddress, size_t zeroedBytes = 0, uint32_t additionalFlags = ES_FLAGS_DEFAULT); // Physical memory manipulation. void PMZero(uintptr_t *pages, size_t pageCount, bool contiguous /* True if `pages[0]` contains the base address and all other are contiguous, false if `pages` contains an array of physical addresses. */); void PMZeroPartial(uintptr_t page, uintptr_t start, uintptr_t end); void PMCheckZeroed(uintptr_t page); void PMCopy(uintptr_t page, void *source, size_t pageCount); #endif #ifdef IMPLEMENTATION // Globals. MMSpace _kernelMMSpace, _coreMMSpace; PMM pmm; MMRegion *mmCoreRegions = (MMRegion *) MM_CORE_REGIONS_START; size_t mmCoreRegionCount, mmCoreRegionArrayCommit; MMSharedRegion *mmGlobalDataRegion, *mmAPITableRegion; GlobalData *mmGlobalData; // Shared with all processes. // Code! void MMUpdateAvailablePageCount(bool increase) { if (MM_AVAILABLE_PAGES() >= MM_CRITICAL_AVAILABLE_PAGES_THRESHOLD) { KEventSet(&pmm.availableNotCritical, true); KEventReset(&pmm.availableCritical); } else { KEventReset(&pmm.availableNotCritical); KEventSet(&pmm.availableCritical, true); if (!increase) { KernelLog(LOG_ERROR, "Memory", "critical page limit hit", "Critical number of available pages remain: %d (%dKB).\n", MM_AVAILABLE_PAGES(), MM_AVAILABLE_PAGES() * K_PAGE_SIZE / 1024); } } if (MM_AVAILABLE_PAGES() >= MM_LOW_AVAILABLE_PAGES_THRESHOLD) { KEventReset(&pmm.availableLow); } else { KEventSet(&pmm.availableLow, true); } } void MMPhysicalInsertZeroedPage(uintptr_t page) { if (GetCurrentThread() != pmm.zeroPageThread) { KernelPanic("MMPhysicalInsertZeroedPage - Inserting a zeroed page not on the MMZeroPageThread.\n"); } MMPageFrame *frame = pmm.pageFrames + page; frame->state = MMPageFrame::ZEROED; { frame->list.next = pmm.firstZeroedPage; frame->list.previous = &pmm.firstZeroedPage; if (pmm.firstZeroedPage) pmm.pageFrames[pmm.firstZeroedPage].list.previous = &frame->list.next; pmm.firstZeroedPage = page; } pmm.countZeroedPages++; pmm.freeOrZeroedPageBitset.Put(page); MMUpdateAvailablePageCount(true); } void MMPhysicalInsertFreePagesStart() { } void MMPhysicalInsertFreePagesNext(uintptr_t page) { MMPageFrame *frame = pmm.pageFrames + page; frame->state = MMPageFrame::FREE; { frame->list.next = pmm.firstFreePage; frame->list.previous = &pmm.firstFreePage; if (pmm.firstFreePage) pmm.pageFrames[pmm.firstFreePage].list.previous = &frame->list.next; pmm.firstFreePage = page; } pmm.freeOrZeroedPageBitset.Put(page); pmm.countFreePages++; } void MMPhysicalInsertFreePagesEnd() { if (pmm.countFreePages > MM_ZERO_PAGE_THRESHOLD) { KEventSet(&pmm.zeroPageEvent, true); } MMUpdateAvailablePageCount(true); } void MMPhysicalActivatePages(uintptr_t pages, uintptr_t count, unsigned flags) { (void) flags; KMutexAssertLocked(&pmm.pageFrameMutex); for (uintptr_t i = 0; i < count; i++) { MMPageFrame *frame = pmm.pageFrames + pages + i; if (frame->state == MMPageFrame::FREE) { pmm.countFreePages--; } else if (frame->state == MMPageFrame::ZEROED) { pmm.countZeroedPages--; } else if (frame->state == MMPageFrame::STANDBY) { pmm.countStandbyPages--; if (pmm.lastStandbyPage == pages + i) { if (frame->list.previous == &pmm.firstStandbyPage) { // There are no more pages in the list. pmm.lastStandbyPage = 0; } else { pmm.lastStandbyPage = ((uintptr_t) frame->list.previous - (uintptr_t) pmm.pageFrames) / sizeof(MMPageFrame); } } } else { KernelPanic("MMPhysicalActivatePages - Corrupt page frame database (4).\n"); } // Unlink the frame from its list. *frame->list.previous = frame->list.next; if (frame->list.next) pmm.pageFrames[frame->list.next].list.previous = frame->list.previous; EsMemoryZero(frame, sizeof(MMPageFrame)); frame->state = MMPageFrame::ACTIVE; } pmm.countActivePages += count; MMUpdateAvailablePageCount(false); } uintptr_t MMPhysicalAllocate(unsigned flags, uintptr_t count, uintptr_t align, uintptr_t below) { bool mutexAlreadyAcquired = flags & MM_PHYSICAL_ALLOCATE_LOCK_ACQUIRED; if (!mutexAlreadyAcquired) KMutexAcquire(&pmm.pageFrameMutex); else KMutexAssertLocked(&pmm.pageFrameMutex); EsDefer(if (!mutexAlreadyAcquired) KMutexRelease(&pmm.pageFrameMutex);); intptr_t commitNow = count * K_PAGE_SIZE; if (flags & MM_PHYSICAL_ALLOCATE_COMMIT_NOW) { if (!MMCommit(commitNow, true)) return 0; } else commitNow = 0; bool simple = count == 1 && align == 1 && below == 0; if (!pmm.pageFrameDatabaseInitialised) { // Early page allocation before the page frame database is initialised. if (!simple) { KernelPanic("MMPhysicalAllocate - Non-simple allocation before initialisation of the page frame database.\n"); } uintptr_t page = MMArchEarlyAllocatePage(); if (flags & MM_PHYSICAL_ALLOCATE_ZEROED) { // TODO Hack! MMArchMapPage(coreMMSpace, page, (uintptr_t) earlyZeroBuffer, MM_MAP_PAGE_OVERWRITE | MM_MAP_PAGE_NO_NEW_TABLES | MM_MAP_PAGE_FRAME_LOCK_ACQUIRED); EsMemoryZero(earlyZeroBuffer, K_PAGE_SIZE); } return page; } else if (!simple) { // Slow path. // TODO Use standby pages. uintptr_t pages = pmm.freeOrZeroedPageBitset.Get(count, align, below); if (pages == (uintptr_t) -1) goto fail; MMPhysicalActivatePages(pages, count, flags); uintptr_t address = pages << K_PAGE_BITS; if (flags & MM_PHYSICAL_ALLOCATE_ZEROED) PMZero(&address, count, true); return address; } else { uintptr_t page = 0; bool notZeroed = false; if (!page) page = pmm.firstZeroedPage; if (!page) page = pmm.firstFreePage, notZeroed = true; if (!page) page = pmm.lastStandbyPage, notZeroed = true; if (!page) goto fail; MMPageFrame *frame = pmm.pageFrames + page; if (frame->state == MMPageFrame::ACTIVE) { KernelPanic("MMPhysicalAllocate - Corrupt page frame database (2).\n"); } if (frame->state == MMPageFrame::STANDBY) { // EsPrint("Clear RT %x\n", frame); if (*frame->cacheReference != ((page << K_PAGE_BITS) | MM_SHARED_ENTRY_PRESENT)) { KernelPanic("MMPhysicalAllocate - Corrupt shared reference back pointer in frame %x.\n", frame); } // Clear the entry in the CCCachedSection that referenced this standby frame. *frame->cacheReference = 0; // TODO If the CCCachedSection is empty, remove it from its CCSpace. } else { pmm.freeOrZeroedPageBitset.Take(page); } MMPhysicalActivatePages(page, 1, flags); // EsPrint("PAGE FRAME ALLOCATE: %x\n", page << K_PAGE_BITS); uintptr_t address = page << K_PAGE_BITS; if (notZeroed && (flags & MM_PHYSICAL_ALLOCATE_ZEROED)) PMZero(&address, 1, false); // if (!notZeroed) PMCheckZeroed(address); return address; } fail:; if (!(flags & MM_PHYSICAL_ALLOCATE_CAN_FAIL)) { EsPrint("Out of memory. Committed %d/%d fixed and %d pageable out of a maximum %d.\n", pmm.commitFixed, pmm.commitFixedLimit, pmm.commitPageable, pmm.commitLimit); KernelPanic("MMPhysicalAllocate - Out of memory.\n"); } MMDecommit(commitNow, true); return 0; } void MMPhysicalFree(uintptr_t page, bool mutexAlreadyAcquired, size_t count) { if (!page) KernelPanic("MMPhysicalFree - Invalid page.\n"); if (mutexAlreadyAcquired) KMutexAssertLocked(&pmm.pageFrameMutex); else KMutexAcquire(&pmm.pageFrameMutex); if (!pmm.pageFrameDatabaseInitialised) KernelPanic("MMPhysicalFree - PMM not yet initialised.\n"); page >>= K_PAGE_BITS; MMPhysicalInsertFreePagesStart(); for (uintptr_t i = 0; i < count; i++, page++) { MMPageFrame *frame = pmm.pageFrames + page; if (frame->state == MMPageFrame::FREE) { KernelPanic("MMPhysicalFree - Attempting to free a FREE page.\n"); } if (pmm.commitFixedLimit) { pmm.countActivePages--; } MMPhysicalInsertFreePagesNext(page); } MMPhysicalInsertFreePagesEnd(); if (!mutexAlreadyAcquired) KMutexRelease(&pmm.pageFrameMutex); } void MMCheckUnusable(uintptr_t physicalStart, size_t bytes) { for (uintptr_t i = physicalStart / K_PAGE_SIZE; i < (physicalStart + bytes + K_PAGE_SIZE - 1) / K_PAGE_SIZE && i < pmm.pageFrameDatabaseCount; i++) { if (pmm.pageFrames[i].state != MMPageFrame::UNUSABLE) { KernelPanic("MMCheckUnusable - Page frame at address %x should be unusable.\n", i * K_PAGE_SIZE); } } } MMRegion *MMFindRegion(MMSpace *space, uintptr_t address) { KMutexAssertLocked(&space->reserveMutex); if (space == coreMMSpace) { for (uintptr_t i = 0; i < mmCoreRegionCount; i++) { MMRegion *region = mmCoreRegions + i; if (region->core.used && region->baseAddress <= address && region->baseAddress + region->pageCount * K_PAGE_SIZE > address) { return region; } } } else { AVLItem *item = TreeFind(&space->usedRegions, MakeShortKey(address), TREE_SEARCH_LARGEST_BELOW_OR_EQUAL); if (!item) return nullptr; MMRegion *region = item->thisItem; if (region->baseAddress > address) KernelPanic("MMFindRegion - Broken usedRegions tree.\n"); if (region->baseAddress + region->pageCount * K_PAGE_SIZE <= address) return nullptr; return region; } return nullptr; } uintptr_t MMSharedLookupPage(MMSharedRegion *region, uintptr_t pageIndex) { KMutexAcquire(®ion->mutex); if (pageIndex >= region->sizeBytes >> K_PAGE_BITS) KernelPanic("MMSharedLookupPage - Page %d out of range in region %x.\n", pageIndex, region); uintptr_t entry = ((uintptr_t *) region->data)[pageIndex]; KMutexRelease(®ion->mutex); return entry; } bool MMHandlePageFault(MMSpace *space, uintptr_t address, unsigned faultFlags) { // EsPrint("HandlePageFault: %x/%x/%x\n", space, address, faultFlags); address &= ~(K_PAGE_SIZE - 1); bool lockAcquired = faultFlags & MM_HANDLE_PAGE_FAULT_LOCK_ACQUIRED; MMRegion *region; if (!lockAcquired && MM_AVAILABLE_PAGES() < MM_CRITICAL_AVAILABLE_PAGES_THRESHOLD && GetCurrentThread() && !GetCurrentThread()->isPageGenerator) { KernelLog(LOG_ERROR, "Memory", "waiting for non-critical state", "Page fault on non-generator thread, waiting for more available pages.\n"); KEventWait(&pmm.availableNotCritical); } { if (!lockAcquired) KMutexAcquire(&space->reserveMutex); else KMutexAssertLocked(&space->reserveMutex); EsDefer(if (!lockAcquired) KMutexRelease(&space->reserveMutex)); // Find the region, and pin it (so it can't be freed). region = MMFindRegion(space, address); if (!region) return false; if (!KWriterLockTake(®ion->data.pin, K_LOCK_SHARED, true /* poll */)) return false; } EsDefer(KWriterLockReturn(®ion->data.pin, K_LOCK_SHARED)); KMutexAcquire(®ion->data.mapMutex); EsDefer(KMutexRelease(®ion->data.mapMutex)); if (MMArchTranslateAddress(space, address, faultFlags & MM_HANDLE_PAGE_FAULT_WRITE)) { // Spurious page fault. return true; } bool copyOnWrite = false, markModified = false; if (faultFlags & MM_HANDLE_PAGE_FAULT_WRITE) { if (region->flags & MM_REGION_COPY_ON_WRITE) { // This is copy-on-write page that needs to be copied. copyOnWrite = true; } else if (region->flags & MM_REGION_READ_ONLY) { // The page was read-only. KernelLog(LOG_ERROR, "Memory", "read only page fault", "MMHandlePageFault - Page was read only.\n"); return false; } else { // We mapped the page as read-only so we could track whether it has been written to. // It has now been written to. markModified = true; } } uintptr_t offsetIntoRegion = address - region->baseAddress; uint64_t needZeroPages = 0; bool zeroPage = true; if (space->user) { needZeroPages = MM_PHYSICAL_ALLOCATE_ZEROED; zeroPage = false; } unsigned flags = ES_FLAGS_DEFAULT; if (space->user) flags |= MM_MAP_PAGE_USER; if (region->flags & MM_REGION_NOT_CACHEABLE) flags |= MM_MAP_PAGE_NOT_CACHEABLE; if (region->flags & MM_REGION_WRITE_COMBINING) flags |= MM_MAP_PAGE_WRITE_COMBINING; if (!markModified && !(region->flags & MM_REGION_FIXED) && (region->flags & MM_REGION_FILE)) flags |= MM_MAP_PAGE_READ_ONLY; if (region->flags & MM_REGION_PHYSICAL) { MMArchMapPage(space, region->data.physical.offset + address - region->baseAddress, address, flags); return true; } else if (region->flags & MM_REGION_SHARED) { MMSharedRegion *sharedRegion = region->data.shared.region; if (!sharedRegion->handles) { KernelPanic("MMHandlePageFault - Shared region has no handles.\n"); } KMutexAcquire(&sharedRegion->mutex); uintptr_t offset = address - region->baseAddress + region->data.shared.offset; if (offset >= sharedRegion->sizeBytes) { KMutexRelease(&sharedRegion->mutex); KernelLog(LOG_ERROR, "Memory", "outside shared size", "MMHandlePageFault - Attempting to access shared memory past end of region.\n"); return false; } uintptr_t *entry = (uintptr_t *) sharedRegion->data + (offset / K_PAGE_SIZE); if (*entry & MM_SHARED_ENTRY_PRESENT) zeroPage = false; else *entry = MMPhysicalAllocate(needZeroPages) | MM_SHARED_ENTRY_PRESENT; MMArchMapPage(space, *entry & ~(K_PAGE_SIZE - 1), address, flags); if (zeroPage) EsMemoryZero((void *) address, K_PAGE_SIZE); KMutexRelease(&sharedRegion->mutex); return true; } else if (region->flags & MM_REGION_FILE) { if (address >= region->baseAddress + (region->pageCount << K_PAGE_BITS) - region->data.file.zeroedBytes) { // EsPrint("%x:%d\n", address, needZeroPages); MMArchMapPage(space, MMPhysicalAllocate(needZeroPages), address, (flags & ~MM_MAP_PAGE_READ_ONLY) | MM_MAP_PAGE_COPIED); if (zeroPage) EsMemoryZero((void *) address, K_PAGE_SIZE); return true; } if (copyOnWrite) { doCopyOnWrite:; uintptr_t existingTranslation = MMArchTranslateAddress(space, address); if (existingTranslation) { // We need to use PMCopy, because as soon as the page is mapped for the user, // other threads can access it. uintptr_t page = MMPhysicalAllocate(ES_FLAGS_DEFAULT); PMCopy(page, (void *) address, 1); MMArchUnmapPages(space, address, 1, MM_UNMAP_PAGES_BALANCE_FILE); MMArchMapPage(space, page, address, (flags & ~MM_MAP_PAGE_READ_ONLY) | MM_MAP_PAGE_COPIED); return true; } else { // EsPrint("Write to unmapped, %x.\n", address); } } KMutexRelease(®ion->data.mapMutex); size_t pagesToRead = 16; if (region->pageCount - (offsetIntoRegion + region->data.file.zeroedBytes) / K_PAGE_SIZE < pagesToRead) { pagesToRead = region->pageCount - (offsetIntoRegion + region->data.file.zeroedBytes) / K_PAGE_SIZE; } // This shouldn't block, because mapped files cannot be resized. KWriterLockTake(®ion->data.file.node->resizeLock, K_LOCK_SHARED); EsError error = CCSpaceAccess(®ion->data.file.node->cache, (void *) address, offsetIntoRegion + region->data.file.offset, pagesToRead * K_PAGE_SIZE, CC_ACCESS_MAP, space, flags); KWriterLockReturn(®ion->data.file.node->resizeLock, K_LOCK_SHARED); KMutexAcquire(®ion->data.mapMutex); if (error != ES_SUCCESS) { KernelLog(LOG_ERROR, "Memory", "mapped file read error", "MMHandlePageFault - Could not read from file %x. Error: %d.\n", region->data.file.node, error); return false; } if (copyOnWrite) { goto doCopyOnWrite; } return true; } else if (region->flags & MM_REGION_NORMAL) { if (!(region->flags & MM_REGION_NO_COMMIT_TRACKING)) { if (!region->data.normal.commit.Contains(offsetIntoRegion >> K_PAGE_BITS)) { KernelLog(LOG_ERROR, "Memory", "outside commit range", "MMHandlePageFault - Attempting to access uncommitted memory in reserved region.\n"); return false; } } MMArchMapPage(space, MMPhysicalAllocate(needZeroPages), address, flags); if (zeroPage) EsMemoryZero((void *) address, K_PAGE_SIZE); return true; } else if (region->flags & MM_REGION_GUARD) { KernelLog(LOG_ERROR, "Memory", "guard page hit", "MMHandlePageFault - Guard page hit!\n"); return false; } else { KernelLog(LOG_ERROR, "Memory", "cannot fault in region", "MMHandlePageFault - Unsupported region type (flags: %x).\n", region->flags); return false; } } MMRegion *MMReserve(MMSpace *space, size_t bytes, unsigned flags, uintptr_t forcedAddress, bool generateGuardPages) { // TODO Handling EsHeapAllocate failures. MMRegion *outputRegion = nullptr; size_t pagesNeeded = ((bytes + K_PAGE_SIZE - 1) & ~(K_PAGE_SIZE - 1)) / K_PAGE_SIZE; if (!pagesNeeded) return nullptr; KMutexAssertLocked(&space->reserveMutex); if (space == coreMMSpace) { if (mmCoreRegionCount == MM_CORE_REGIONS_COUNT) { return nullptr; } if (forcedAddress) { KernelPanic("MMReserve - Using a forced address in coreMMSpace.\n"); } { uintptr_t newRegionCount = mmCoreRegionCount + 1; uintptr_t commitPagesNeeded = newRegionCount * sizeof(MMRegion) / K_PAGE_SIZE + 1; while (mmCoreRegionArrayCommit < commitPagesNeeded) { if (!MMCommit(K_PAGE_SIZE, true)) return nullptr; mmCoreRegionArrayCommit++; } } for (uintptr_t i = 0; i < mmCoreRegionCount; i++) { MMRegion *region = mmCoreRegions + i; if (!region->core.used && region->pageCount >= pagesNeeded) { if (region->pageCount > pagesNeeded) { MMRegion *split = mmCoreRegions + mmCoreRegionCount++; EsMemoryCopy(split, region, sizeof(MMRegion)); split->baseAddress += pagesNeeded * K_PAGE_SIZE; split->pageCount -= pagesNeeded; } region->core.used = true; region->pageCount = pagesNeeded; region->flags = flags; EsMemoryZero(®ion->data, sizeof(region->data)); outputRegion = region; goto done; } } } else if (forcedAddress) { AVLItem *item; // EsPrint("reserve forced: %x\n", forcedAddress); // Check for a collision. item = TreeFind(&space->usedRegions, MakeShortKey(forcedAddress), TREE_SEARCH_EXACT); if (item) return nullptr; item = TreeFind(&space->usedRegions, MakeShortKey(forcedAddress), TREE_SEARCH_SMALLEST_ABOVE_OR_EQUAL); if (item && item->thisItem->baseAddress < forcedAddress + pagesNeeded * K_PAGE_SIZE) return nullptr; item = TreeFind(&space->usedRegions, MakeShortKey(forcedAddress + pagesNeeded * K_PAGE_SIZE - 1), TREE_SEARCH_LARGEST_BELOW_OR_EQUAL); if (item && item->thisItem->baseAddress + item->thisItem->pageCount * K_PAGE_SIZE > forcedAddress) return nullptr; item = TreeFind(&space->freeRegionsBase, MakeShortKey(forcedAddress), TREE_SEARCH_EXACT); if (item) return nullptr; item = TreeFind(&space->freeRegionsBase, MakeShortKey(forcedAddress), TREE_SEARCH_SMALLEST_ABOVE_OR_EQUAL); if (item && item->thisItem->baseAddress < forcedAddress + pagesNeeded * K_PAGE_SIZE) return nullptr; item = TreeFind(&space->freeRegionsBase, MakeShortKey(forcedAddress + pagesNeeded * K_PAGE_SIZE - 1), TREE_SEARCH_LARGEST_BELOW_OR_EQUAL); if (item && item->thisItem->baseAddress + item->thisItem->pageCount * K_PAGE_SIZE > forcedAddress) return nullptr; // EsPrint("(no collisions)\n"); MMRegion *region = (MMRegion *) EsHeapAllocate(sizeof(MMRegion), true, K_CORE); region->baseAddress = forcedAddress; region->pageCount = pagesNeeded; region->flags = flags; TreeInsert(&space->usedRegions, ®ion->itemBase, region, MakeShortKey(region->baseAddress)); EsMemoryZero(®ion->data, sizeof(region->data)); outputRegion = region; } else { #ifdef GUARD_PAGES size_t guardPagesNeeded = generateGuardPages ? 2 : 0; #else size_t guardPagesNeeded = 0; #endif AVLItem *item = TreeFind(&space->freeRegionsSize, MakeShortKey(pagesNeeded + guardPagesNeeded), TREE_SEARCH_SMALLEST_ABOVE_OR_EQUAL); if (!item) { goto done; } MMRegion *region = item->thisItem; TreeRemove(&space->freeRegionsBase, ®ion->itemBase); TreeRemove(&space->freeRegionsSize, ®ion->itemSize); if (region->pageCount > pagesNeeded + guardPagesNeeded) { MMRegion *split = (MMRegion *) EsHeapAllocate(sizeof(MMRegion), true, K_CORE); EsMemoryCopy(split, region, sizeof(MMRegion)); split->baseAddress += (pagesNeeded + guardPagesNeeded) * K_PAGE_SIZE; split->pageCount -= (pagesNeeded + guardPagesNeeded); TreeInsert(&space->freeRegionsBase, &split->itemBase, split, MakeShortKey(split->baseAddress)); TreeInsert(&space->freeRegionsSize, &split->itemSize, split, MakeShortKey(split->pageCount), AVL_DUPLICATE_KEYS_ALLOW); } EsMemoryZero(®ion->data, sizeof(region->data)); region->pageCount = pagesNeeded; region->flags = flags; if (guardPagesNeeded) { MMRegion *guardBefore = (MMRegion *) EsHeapAllocate(sizeof(MMRegion), true, K_CORE); MMRegion *guardAfter = (MMRegion *) EsHeapAllocate(sizeof(MMRegion), true, K_CORE); EsMemoryCopy(guardBefore, region, sizeof(MMRegion)); EsMemoryCopy(guardAfter, region, sizeof(MMRegion)); guardAfter->baseAddress += K_PAGE_SIZE * (pagesNeeded + 1); guardBefore->pageCount = guardAfter->pageCount = 1; guardBefore->flags = guardAfter->flags = MM_REGION_GUARD; region->baseAddress += K_PAGE_SIZE; region->data.normal.guardBefore = guardBefore; region->data.normal.guardAfter = guardAfter; EsMemoryZero(&guardBefore->itemNonGuard, sizeof(guardBefore->itemNonGuard)); EsMemoryZero(&guardAfter->itemNonGuard, sizeof(guardAfter->itemNonGuard)); TreeInsert(&space->usedRegions, &guardBefore->itemBase, guardBefore, MakeShortKey(guardBefore->baseAddress)); TreeInsert(&space->usedRegions, &guardAfter ->itemBase, guardAfter, MakeShortKey(guardAfter->baseAddress)); #if 0 EsPrint("Guarded region: %x->%x/%x->%x/%x->%x\n", guardBefore->baseAddress, guardBefore->pageCount * K_PAGE_SIZE + guardBefore->baseAddress, region->baseAddress, region->pageCount * K_PAGE_SIZE + region->baseAddress, guardAfter->baseAddress, guardAfter->pageCount * K_PAGE_SIZE + guardAfter->baseAddress); #endif } TreeInsert(&space->usedRegions, ®ion->itemBase, region, MakeShortKey(region->baseAddress)); outputRegion = region; goto done; } done:; // EsPrint("reserve: %x -> %x\n", address, (uintptr_t) address + pagesNeeded * K_PAGE_SIZE); if (outputRegion) { // We've now got an address range for the region. // So we should commit the page tables that will be needed to map it. if (!MMArchCommitPageTables(space, outputRegion)) { // We couldn't commit the leading page tables. // So we'll have to unreserve the region. MMUnreserve(space, outputRegion, false); return nullptr; } if (space != coreMMSpace) { EsMemoryZero(&outputRegion->itemNonGuard, sizeof(outputRegion->itemNonGuard)); outputRegion->itemNonGuard.thisItem = outputRegion; space->usedRegionsNonGuard.InsertEnd(&outputRegion->itemNonGuard); } space->reserve += pagesNeeded; } // EsPrint("Reserve: %x->%x\n", outputRegion->baseAddress, outputRegion->pageCount * K_PAGE_SIZE + outputRegion->baseAddress); return outputRegion; } void MMUnreserve(MMSpace *space, MMRegion *remove, bool unmapPages, bool guardRegion) { // EsPrint("unreserve: %x, %x, %d, %d\n", remove->baseAddress, remove->flags, unmapPages, guardRegion); // EsDefer(EsPrint("unreserve complete\n");); KMutexAssertLocked(&space->reserveMutex); if (pmm.nextRegionToBalance == remove) { // If the balance thread paused while balancing this region, // switch to the next region. pmm.nextRegionToBalance = remove->itemNonGuard.nextItem ? remove->itemNonGuard.nextItem->thisItem : nullptr; pmm.balanceResumePosition = 0; } if (!remove) { KernelPanic("MMUnreserve - Region to remove was null.\n"); } if (remove->flags & MM_REGION_NORMAL) { if (remove->data.normal.guardBefore) MMUnreserve(space, remove->data.normal.guardBefore, false, true); if (remove->data.normal.guardAfter) MMUnreserve(space, remove->data.normal.guardAfter, false, true); } else if ((remove->flags & MM_REGION_GUARD) && !guardRegion) { // You can't free a guard region! // TODO Error. KernelLog(LOG_ERROR, "Memory", "attempt to unreserve guard page", "MMUnreserve - Attempting to unreserve a guard page.\n"); return; } if (remove->itemNonGuard.list && !guardRegion) { // EsPrint("Remove item non guard...\n"); remove->itemNonGuard.RemoveFromList(); // EsPrint("Removed.\n"); } if (unmapPages) { MMArchUnmapPages(space, remove->baseAddress, remove->pageCount, ES_FLAGS_DEFAULT); } space->reserve += remove->pageCount; if (space == coreMMSpace) { remove->core.used = false; intptr_t remove1 = -1, remove2 = -1; for (uintptr_t i = 0; i < mmCoreRegionCount && (remove1 != -1 || remove2 != 1); i++) { MMRegion *r = mmCoreRegions + i; if (r->core.used) continue; if (r == remove) continue; if (r->baseAddress == remove->baseAddress + (remove->pageCount << K_PAGE_BITS)) { remove->pageCount += r->pageCount; remove1 = i; } else if (remove->baseAddress == r->baseAddress + (r->pageCount << K_PAGE_BITS)) { remove->pageCount += r->pageCount; remove->baseAddress = r->baseAddress; remove2 = i; } } if (remove1 != -1) { mmCoreRegions[remove1] = mmCoreRegions[--mmCoreRegionCount]; if ((uintptr_t) remove2 == mmCoreRegionCount) remove2 = remove1; } if (remove2 != -1) { mmCoreRegions[remove2] = mmCoreRegions[--mmCoreRegionCount]; } } else { TreeRemove(&space->usedRegions, &remove->itemBase); uintptr_t address = remove->baseAddress; { AVLItem *before = TreeFind(&space->freeRegionsBase, MakeShortKey(address), TREE_SEARCH_LARGEST_BELOW_OR_EQUAL); if (before && before->thisItem->baseAddress + before->thisItem->pageCount * K_PAGE_SIZE == remove->baseAddress) { remove->baseAddress = before->thisItem->baseAddress; remove->pageCount += before->thisItem->pageCount; TreeRemove(&space->freeRegionsBase, before); TreeRemove(&space->freeRegionsSize, &before->thisItem->itemSize); EsHeapFree(before->thisItem, sizeof(MMRegion), K_CORE); } } { AVLItem *after = TreeFind(&space->freeRegionsBase, MakeShortKey(address), TREE_SEARCH_SMALLEST_ABOVE_OR_EQUAL); if (after && remove->baseAddress + remove->pageCount * K_PAGE_SIZE == after->thisItem->baseAddress) { remove->pageCount += after->thisItem->pageCount; TreeRemove(&space->freeRegionsBase, after); TreeRemove(&space->freeRegionsSize, &after->thisItem->itemSize); EsHeapFree(after->thisItem, sizeof(MMRegion), K_CORE); } } TreeInsert(&space->freeRegionsBase, &remove->itemBase, remove, MakeShortKey(remove->baseAddress)); TreeInsert(&space->freeRegionsSize, &remove->itemSize, remove, MakeShortKey(remove->pageCount), AVL_DUPLICATE_KEYS_ALLOW); } } void *MMMapFile(MMSpace *space, FSFile *node, EsFileOffset offset, size_t bytes, int protection, void *baseAddress, size_t zeroedBytes, uint32_t additionalFlags) { if (protection != ES_MEMORY_MAP_OBJECT_READ_ONLY && protection != ES_MEMORY_MAP_OBJECT_COPY_ON_WRITE) { return nullptr; } if (node->directoryEntry->type != ES_NODE_FILE) { return nullptr; } MMRegion *region = nullptr; uint64_t fileHandleFlags = ES_NODE_PREVENT_RESIZE | (protection == ES_MEMORY_MAP_OBJECT_READ_WRITE ? ES_FILE_WRITE_SHARED : ES_FILE_READ_SHARED); bool decommit = false; // Register a handle to the node. // If this successes, past this point we file cannot be resized. if (!OpenHandleToObject(node, KERNEL_OBJECT_NODE, fileHandleFlags)) { return nullptr; } // Acquire the file's cache mutex before the space's reserve mutex, // so that we can handle page faults without locking the reserve mutex. KMutexAcquire(&node->cache.cachedSectionsMutex); EsDefer(KMutexRelease(&node->cache.cachedSectionsMutex)); KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); // Check the parameters. uintptr_t offsetIntoPage = offset & (K_PAGE_SIZE - 1); if (node->directoryEntry->totalSize <= offset) { goto fail; } if (offsetIntoPage) bytes += offset & (K_PAGE_SIZE - 1); // Reserve the region. region = MMReserve(space, bytes + zeroedBytes, MM_REGION_FILE | additionalFlags | ((protection == ES_MEMORY_MAP_OBJECT_READ_ONLY || protection == ES_MEMORY_MAP_OBJECT_COPY_ON_WRITE) ? MM_REGION_READ_ONLY : 0) | (protection == ES_MEMORY_MAP_OBJECT_COPY_ON_WRITE ? MM_REGION_COPY_ON_WRITE : 0), baseAddress ? (uintptr_t) baseAddress - offsetIntoPage : 0); if (!region) { goto fail; } // Commit copy on write regions. if (protection == ES_MEMORY_MAP_OBJECT_COPY_ON_WRITE) { if (!MMCommit(bytes + zeroedBytes, false)) { goto fail; } decommit = true; } // Initialise data. region->data.file.node = node; region->data.file.offset = offset - offsetIntoPage; region->data.file.zeroedBytes = zeroedBytes; region->data.file.fileHandleFlags = fileHandleFlags; // Make sure the region is covered by MMCachedSections in the file. // (This is why we needed the file's cache mutex.) // We'll allocate in coreMMSpace as needed. if (!CCSpaceCover(&node->cache, region->data.file.offset, region->data.file.offset + bytes)) { goto fail; } return (uint8_t *) region->baseAddress + offsetIntoPage; fail:; if (region) MMUnreserve(space, region, false /* No pages have been mapped. */); if (decommit) MMDecommit(bytes + zeroedBytes, false); CloseHandleToObject(node, KERNEL_OBJECT_NODE, fileHandleFlags); return nullptr; } void *MMMapShared(MMSpace *space, MMSharedRegion *sharedRegion, uintptr_t offset, size_t bytes, uint32_t additionalFlags, void *baseAddress) { MMRegion *region; OpenHandleToObject(sharedRegion, KERNEL_OBJECT_SHMEM); KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); if (offset & (K_PAGE_SIZE - 1)) bytes += offset & (K_PAGE_SIZE - 1); if (sharedRegion->sizeBytes <= offset) goto fail; if (sharedRegion->sizeBytes < offset + bytes) goto fail; region = MMReserve(space, bytes, MM_REGION_SHARED | additionalFlags, (uintptr_t) baseAddress); if (!region) goto fail; if (!(region->flags & MM_REGION_SHARED)) KernelPanic("MMMapShared - Cannot commit into non-shared region.\n"); if (region->data.shared.region) KernelPanic("MMMapShared - A shared region has already been bound.\n"); region->data.shared.region = sharedRegion; region->data.shared.offset = offset & ~(K_PAGE_SIZE - 1); return (uint8_t *) region->baseAddress + (offset & (K_PAGE_SIZE - 1)); fail:; CloseHandleToObject(sharedRegion, KERNEL_OBJECT_SHMEM); return nullptr; } void MMDecommit(uint64_t bytes, bool fixed) { // EsPrint("De-Commit %d %d\n", bytes, fixed); if (bytes & (K_PAGE_SIZE - 1)) KernelPanic("MMDecommit - Expected multiple of K_PAGE_SIZE bytes.\n"); int64_t pagesNeeded = bytes / K_PAGE_SIZE; KMutexAcquire(&pmm.commitMutex); EsDefer(KMutexRelease(&pmm.commitMutex)); if (fixed) { if (pmm.commitFixed < pagesNeeded) KernelPanic("MMDecommit - Decommitted too many pages.\n"); pmm.commitFixed -= pagesNeeded; } else { if (pmm.commitPageable < pagesNeeded) KernelPanic("MMDecommit - Decommitted too many pages.\n"); pmm.commitPageable -= pagesNeeded; } KernelLog(LOG_VERBOSE, "Memory", "decommit", "Decommit %D%z. Now at %D.\n", bytes, fixed ? ", fixed" : "", (pmm.commitFixed + pmm.commitPageable) << K_PAGE_BITS); } bool MMCommit(uint64_t bytes, bool fixed) { if (bytes & (K_PAGE_SIZE - 1)) KernelPanic("MMCommit - Expected multiple of K_PAGE_SIZE bytes.\n"); int64_t pagesNeeded = bytes / K_PAGE_SIZE; KMutexAcquire(&pmm.commitMutex); EsDefer(KMutexRelease(&pmm.commitMutex)); if (pmm.commitLimit) { if (fixed) { if (pagesNeeded > pmm.commitFixedLimit - pmm.commitFixed) { KernelLog(LOG_ERROR, "Memory", "failed fixed commit", "Failed fixed commit %d pages (currently %d/%d).\n", pagesNeeded, pmm.commitFixed, pmm.commitFixedLimit); return false; } if (MM_AVAILABLE_PAGES() - pagesNeeded < MM_CRITICAL_AVAILABLE_PAGES_THRESHOLD && !GetCurrentThread()->isPageGenerator) { KernelLog(LOG_ERROR, "Memory", "failed fixed commit", "Failed fixed commit %d as the available pages (%d, %d, %d) would cross the critical threshold, %d.\n.", pagesNeeded, pmm.countZeroedPages, pmm.countFreePages, pmm.countStandbyPages, MM_CRITICAL_AVAILABLE_PAGES_THRESHOLD); return false; } pmm.commitFixed += pagesNeeded; } else { if (pagesNeeded > MM_REMAINING_COMMIT() - (intptr_t) (GetCurrentThread()->isPageGenerator ? 0 : MM_CRITICAL_REMAINING_COMMIT_THRESHOLD)) { KernelLog(LOG_ERROR, "Memory", "failed pageable commit", "Failed pageable commit %d pages (currently %d/%d).\n", pagesNeeded, pmm.commitPageable + pmm.commitFixed, pmm.commitLimit); return false; } pmm.commitPageable += pagesNeeded; } if (MM_OBJECT_CACHE_SHOULD_TRIM()) { KEventSet(&pmm.trimObjectCaches, true); } } else { // We haven't started tracking commit counts yet. } KernelLog(LOG_VERBOSE, "Memory", "commit", "Commit %D%z. Now at %D.\n", bytes, fixed ? ", fixed" : "", (pmm.commitFixed + pmm.commitPageable) << K_PAGE_BITS); return true; } bool MMCommitRange(MMSpace *space, MMRegion *region, uintptr_t pageOffset, size_t pageCount) { KMutexAssertLocked(&space->reserveMutex); if (region->flags & MM_REGION_NO_COMMIT_TRACKING) { KernelPanic("MMCommitRange - Region does not support commit tracking.\n"); } if (pageOffset >= region->pageCount || pageCount > region->pageCount - pageOffset) { KernelPanic("MMCommitRange - Invalid region offset and page count.\n"); } if (~region->flags & MM_REGION_NORMAL) { KernelPanic("MMCommitRange - Cannot commit into non-normal region.\n"); } intptr_t delta = 0; region->data.normal.commit.Set(pageOffset, pageOffset + pageCount, &delta, false); if (delta < 0) { KernelPanic("MMCommitRange - Invalid delta calculation adding %x, %x to %x.\n", pageOffset, pageCount, region); } if (delta == 0) { return true; } { if (!MMCommit(delta * K_PAGE_SIZE, region->flags & MM_REGION_FIXED)) { return false; } region->data.normal.commitPageCount += delta; space->commit += delta; if (region->data.normal.commitPageCount > region->pageCount) { KernelPanic("MMCommitRange - Invalid delta calculation increases region %x commit past page count.\n", region); } } if (!region->data.normal.commit.Set(pageOffset, pageOffset + pageCount, nullptr, true)) { MMDecommit(delta * K_PAGE_SIZE, region->flags & MM_REGION_FIXED); region->data.normal.commitPageCount -= delta; space->commit -= delta; return false; } if (region->flags & MM_REGION_FIXED) { for (uintptr_t i = pageOffset; i < pageOffset + pageCount; i++) { // TODO Don't call into MMHandlePageFault. I don't like MM_HANDLE_PAGE_FAULT_LOCK_ACQUIRED. if (!MMHandlePageFault(space, region->baseAddress + i * K_PAGE_SIZE, MM_HANDLE_PAGE_FAULT_LOCK_ACQUIRED)) { KernelPanic("MMCommitRange - Unable to fix pages.\n"); } } } return true; } bool MMDecommitRange(MMSpace *space, MMRegion *region, uintptr_t pageOffset, size_t pageCount) { KMutexAssertLocked(&space->reserveMutex); if (region->flags & MM_REGION_NO_COMMIT_TRACKING) { KernelPanic("MMDecommitRange - Region does not support commit tracking.\n"); } if (pageOffset >= region->pageCount || pageCount > region->pageCount - pageOffset) { KernelPanic("MMDecommitRange - Invalid region offset and page count.\n"); } if (~region->flags & MM_REGION_NORMAL) { KernelPanic("MMDecommitRange - Cannot decommit from non-normal region.\n"); } intptr_t delta = 0; if (!region->data.normal.commit.Clear(pageOffset, pageOffset + pageCount, &delta, true)) { return false; } if (delta > 0) { KernelPanic("MMDecommitRange - Invalid delta calculation removing %x, %x from %x.\n", pageOffset, pageCount, region); } delta = -delta; if (region->data.normal.commitPageCount < (size_t) delta) { KernelPanic("MMDecommitRange - Invalid delta calculation decreases region %x commit below zero.\n", region); } // EsPrint("\tdecommit = %x\n", pagesRemoved); MMDecommit(delta * K_PAGE_SIZE, region->flags & MM_REGION_FIXED); space->commit -= delta; region->data.normal.commitPageCount -= delta; MMArchUnmapPages(space, region->baseAddress + pageOffset * K_PAGE_SIZE, pageCount, MM_UNMAP_PAGES_FREE); return true; } void MMAllowWriteCombiningCaching(MMSpace *space, void *virtualAddress) { if (!space) space = kernelMMSpace; KMutexAcquire(&space->reserveMutex); MMRegion *region = MMFindRegion(space, (uintptr_t) virtualAddress); if (!(region->flags & MM_REGION_NOT_CACHEABLE)) { KernelPanic("MMAllowWriteCombiningCaching - Region was cachable.\n"); } region->flags &= ~MM_REGION_NOT_CACHEABLE; region->flags |= MM_REGION_WRITE_COMBINING; MMArchUnmapPages(space, region->baseAddress, region->pageCount, ES_FLAGS_DEFAULT); KMutexRelease(&space->reserveMutex); for (uintptr_t i = 0; i < region->pageCount; i++) { MMHandlePageFault(space, region->baseAddress + i * K_PAGE_SIZE, ES_FLAGS_DEFAULT); } } size_t MMGetRegionPageCount(MMSpace *space, void *address) { if (!space) space = kernelMMSpace; KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); MMRegion *region = MMFindRegion(space, (uintptr_t) address); return region->pageCount; } void *MMMapPhysical(MMSpace *space, uintptr_t offset, size_t bytes, uint64_t caching) { if (!space) space = kernelMMSpace; uintptr_t offset2 = offset & (K_PAGE_SIZE - 1); offset -= offset2; if (offset2) bytes += K_PAGE_SIZE; MMRegion *region; { KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); region = MMReserve(space, bytes, MM_REGION_PHYSICAL | MM_REGION_FIXED | caching); if (!region) return nullptr; region->data.physical.offset = offset; } for (uintptr_t i = 0; i < region->pageCount; i++) { MMHandlePageFault(space, region->baseAddress + i * K_PAGE_SIZE, ES_FLAGS_DEFAULT); } return (uint8_t *) region->baseAddress + offset2; } bool MMSharedResizeRegion(MMSharedRegion *region, size_t sizeBytes) { KMutexAcquire(®ion->mutex); EsDefer(KMutexRelease(®ion->mutex)); sizeBytes = (sizeBytes + K_PAGE_SIZE - 1) & ~(K_PAGE_SIZE - 1); size_t pages = sizeBytes / K_PAGE_SIZE; size_t oldPages = region->sizeBytes / K_PAGE_SIZE; void *oldData = region->data; void *newData = EsHeapAllocate(pages * sizeof(void *), true, K_CORE); if (!newData && pages) { return false; } if (oldPages > pages) { MMDecommit(K_PAGE_SIZE * (oldPages - pages), true); } else if (pages > oldPages) { if (!MMCommit(K_PAGE_SIZE * (pages - oldPages), true)) { EsHeapFree(newData, pages * sizeof(void *), K_CORE); return false; } } region->sizeBytes = sizeBytes; region->data = newData; // The old shared memory region was empty. if (!oldData) return true; if (oldPages > pages) { for (uintptr_t i = pages; i < oldPages; i++) { uintptr_t *addresses = (uintptr_t *) oldData; uintptr_t address = addresses[i]; if (address & MM_SHARED_ENTRY_PRESENT) MMPhysicalFree(address); } } uintptr_t copy = oldPages > pages ? pages : oldPages; EsMemoryCopy(region->data, oldData, sizeof(void *) * copy); EsHeapFree(oldData, oldPages * sizeof(void *), K_CORE); return true; } MMSharedRegion *MMSharedCreateRegion(size_t sizeBytes, bool fixed, uintptr_t below) { if (!sizeBytes) return nullptr; MMSharedRegion *region = (MMSharedRegion *) EsHeapAllocate(sizeof(MMSharedRegion), true, K_CORE); if (!region) return nullptr; region->handles = 1; if (!MMSharedResizeRegion(region, sizeBytes)) { EsHeapFree(region, 0, K_CORE); return nullptr; } if (fixed) { for (uintptr_t i = 0; i < region->sizeBytes >> K_PAGE_BITS; i++) { ((uintptr_t *) region->data)[i] = MMPhysicalAllocate(MM_PHYSICAL_ALLOCATE_ZEROED, 1, 1, below) | MM_SHARED_ENTRY_PRESENT; } } return region; } void MMSharedDestroyRegion(MMSharedRegion *region) { MMSharedResizeRegion(region, 0); // TODO Check leaks. EsHeapFree(region, 0, K_CORE); } void *MMStandardAllocate(MMSpace *space, size_t bytes, uint32_t flags, void *baseAddress, bool commitAll) { if (!space) space = kernelMMSpace; KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); MMRegion *region = MMReserve(space, bytes, flags | MM_REGION_NORMAL, (uintptr_t) baseAddress, true); if (!region) return nullptr; if (commitAll) { if (!MMCommitRange(space, region, 0, region->pageCount)) { MMUnreserve(space, region, false /* No pages have been mapped. */); return nullptr; } } return (void *) region->baseAddress; } bool MMFree(MMSpace *space, void *address, size_t expectedSize, bool userOnly) { if (!space) space = kernelMMSpace; MMSharedRegion *sharedRegionToFree = nullptr; FSFile *nodeToFree = nullptr; uint64_t fileHandleFlags = 0; { KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); MMRegion *region = MMFindRegion(space, (uintptr_t) address); if (!region) { return false; } if (userOnly && (~region->flags & MM_REGION_USER)) { KernelLog(LOG_ERROR, "Memory", "attempt to free non-user region", "MMFree - A user process is attempting to free a region %x that is not marked with MM_REGION_USER.\n", region); return false; } if (!KWriterLockTake(®ion->data.pin, K_LOCK_EXCLUSIVE, true /* poll */)) { KernelLog(LOG_ERROR, "Memory", "attempt to free in use region", "MMFree - Attempting to free a region %x that is currently in by a system call.\n", region); return false; } if (region->baseAddress != (uintptr_t) address && (~region->flags & MM_REGION_PHYSICAL /* physical region bases are not page aligned */)) { KernelLog(LOG_ERROR, "Memory", "incorrect base address", "MMFree - Passed the address %x to free region %x, which has baseAddress of %x.\n", address, region, region->baseAddress); return false; } if (expectedSize && (expectedSize + K_PAGE_SIZE - 1) / K_PAGE_SIZE != region->pageCount) { KernelLog(LOG_ERROR, "Memory", "incorrect free size", "MMFree - The region page count is %d, but the expected free size is %d.\n", region->pageCount, (expectedSize + K_PAGE_SIZE - 1) / K_PAGE_SIZE); return false; } bool unmapPages = true; if (region->flags & MM_REGION_NORMAL) { if (!MMDecommitRange(space, region, 0, region->pageCount)) { KernelPanic("MMFree - Could not decommit entire region %x (should not fail).\n", region); } if (region->data.normal.commitPageCount) { KernelPanic("MMFree - After decommiting range covering the entire region (%x), some pages were still commited.\n", region); } region->data.normal.commit.ranges.Free(); unmapPages = false; } else if (region->flags & MM_REGION_SHARED) { sharedRegionToFree = region->data.shared.region; } else if (region->flags & MM_REGION_FILE) { MMArchUnmapPages(space, region->baseAddress, region->pageCount, MM_UNMAP_PAGES_FREE_COPIED | MM_UNMAP_PAGES_BALANCE_FILE); unmapPages = false; FSFile *node = region->data.file.node; EsFileOffset removeStart = RoundDown(region->data.file.offset, (EsFileOffset) K_PAGE_SIZE); EsFileOffset removeEnd = RoundUp(removeStart + (region->pageCount << K_PAGE_BITS) - region->data.file.zeroedBytes, (EsFileOffset) K_PAGE_SIZE); nodeToFree = node; fileHandleFlags = region->data.file.fileHandleFlags; KMutexAcquire(&node->cache.cachedSectionsMutex); CCSpaceUncover(&node->cache, removeStart, removeEnd); KMutexRelease(&node->cache.cachedSectionsMutex); if (region->flags & MM_REGION_COPY_ON_WRITE) { MMDecommit(region->pageCount << K_PAGE_BITS, false); } } else if (region->flags & MM_REGION_PHYSICAL) { } else if (region->flags & MM_REGION_GUARD) { KernelLog(LOG_ERROR, "Memory", "attempt to free guard region", "MMFree - Attempting to free a guard region!\n"); return false; } else { KernelPanic("MMFree - Unsupported region type.\n"); } MMUnreserve(space, region, unmapPages); } if (sharedRegionToFree) CloseHandleToObject(sharedRegionToFree, KERNEL_OBJECT_SHMEM); if (nodeToFree && fileHandleFlags) CloseHandleToObject(nodeToFree, KERNEL_OBJECT_NODE, fileHandleFlags); return true; } bool MMSpaceInitialise(MMSpace *space) { // EsPrint("... Committed %d/%d fixed and %d pageable out of a maximum %d.\n", pmm.commitFixed, pmm.commitFixedLimit, pmm.commitPageable, pmm.commitLimit); space->user = true; MMRegion *region = (MMRegion *) EsHeapAllocate(sizeof(MMRegion), true, K_CORE); if (!region) { return false; } if (!MMArchInitialiseUserSpace(space, region)) { EsHeapFree(region, sizeof(MMRegion), K_CORE); return false; } TreeInsert(&space->freeRegionsBase, ®ion->itemBase, region, MakeShortKey(region->baseAddress)); TreeInsert(&space->freeRegionsSize, ®ion->itemSize, region, MakeShortKey(region->pageCount), AVL_DUPLICATE_KEYS_ALLOW); return true; } void MMSpaceDestroy(MMSpace *space) { LinkedItem *item = space->usedRegionsNonGuard.firstItem; while (item) { MMRegion *region = item->thisItem; item = item->nextItem; MMFree(space, (void *) region->baseAddress); } while (true) { AVLItem *item = TreeFind(&space->freeRegionsBase, MakeShortKey(0), TREE_SEARCH_SMALLEST_ABOVE_OR_EQUAL); if (!item) break; TreeRemove(&space->freeRegionsBase, &item->thisItem->itemBase); TreeRemove(&space->freeRegionsSize, &item->thisItem->itemSize); EsHeapFree(item->thisItem, sizeof(MMRegion), K_CORE); } MMArchFreeVAS(space); } bool MMUnmapFilePage(uintptr_t frameNumber) { KMutexAssertLocked(&pmm.pageFrameMutex); MMPageFrame *frame = pmm.pageFrames + frameNumber; if (frame->state != MMPageFrame::ACTIVE || !frame->active.references) { KernelPanic("MMUnmapFilePage - Corrupt page frame database (%d/%x).\n", frameNumber, frame); } // Decrease the reference count. frame->active.references--; if (frame->active.references) { return false; } // If there are no more references, then the frame can be moved to the standby or modified list. // EsPrint("Unmap file page: %x\n", frameNumber << K_PAGE_BITS); frame->state = MMPageFrame::STANDBY; pmm.countStandbyPages++; if (*frame->cacheReference != ((frameNumber << K_PAGE_BITS) | MM_SHARED_ENTRY_PRESENT)) { KernelPanic("MMUnmapFilePage - Corrupt shared reference back pointer in frame %x.\n", frame); } frame->list.next = pmm.firstStandbyPage; frame->list.previous = &pmm.firstStandbyPage; if (pmm.firstStandbyPage) pmm.pageFrames[pmm.firstStandbyPage].list.previous = &frame->list.next; if (!pmm.lastStandbyPage) pmm.lastStandbyPage = frameNumber; pmm.firstStandbyPage = frameNumber; MMUpdateAvailablePageCount(true); pmm.countActivePages--; return true; } void MMBalanceThread() { size_t targetAvailablePages = 0; while (true) { #if 1 if (MM_AVAILABLE_PAGES() >= targetAvailablePages) { // Wait for there to be a low number of available pages. KEventWait(&pmm.availableLow); targetAvailablePages = MM_LOW_AVAILABLE_PAGES_THRESHOLD + MM_PAGES_TO_FIND_BALANCE; } #else // Test the balance thread works correctly by running it constantly. targetAvailablePages = MM_AVAILABLE_PAGES() * 2; #endif // EsPrint("--> Balance!\n"); // Find a process to balance. KMutexAcquire(&scheduler.allProcessesMutex); Process *process = pmm.nextProcessToBalance ?: scheduler.allProcesses.firstItem->thisItem; pmm.nextProcessToBalance = process->allItem.nextItem ? process->allItem.nextItem->thisItem : nullptr; OpenHandleToObject(process, KERNEL_OBJECT_PROCESS, ES_FLAGS_DEFAULT); KMutexRelease(&scheduler.allProcessesMutex); // For every memory region... MMSpace *space = process->vmm; ThreadSetTemporaryAddressSpace(space); KMutexAcquire(&space->reserveMutex); LinkedItem *item = pmm.nextRegionToBalance ? &pmm.nextRegionToBalance->itemNonGuard : space->usedRegionsNonGuard.firstItem; while (item && MM_AVAILABLE_PAGES() < targetAvailablePages) { MMRegion *region = item->thisItem; // EsPrint("process = %x, region = %x, offset = %x\n", process, region, pmm.balanceResumePosition); KMutexAcquire(®ion->data.mapMutex); bool canResume = false; if (region->flags & MM_REGION_FILE) { canResume = true; MMArchUnmapPages(space, region->baseAddress, region->pageCount, MM_UNMAP_PAGES_BALANCE_FILE, targetAvailablePages - MM_AVAILABLE_PAGES(), &pmm.balanceResumePosition); } else if (region->flags & MM_REGION_CACHE) { // TODO Trim the cache's active sections and cached sections lists. KMutexAcquire(&activeSectionManager.mutex); LinkedItem *item = activeSectionManager.lruList.firstItem; while (item && MM_AVAILABLE_PAGES() < targetAvailablePages) { CCActiveSection *section = item->thisItem; if (section->cache && section->referencedPageCount) CCDereferenceActiveSection(section); item = item->nextItem; } KMutexRelease(&activeSectionManager.mutex); } KMutexRelease(®ion->data.mapMutex); if (MM_AVAILABLE_PAGES() >= targetAvailablePages && canResume) { // We have enough to pause. break; } item = item->nextItem; pmm.balanceResumePosition = 0; } if (item) { // Continue with this region next time. pmm.nextRegionToBalance = item->thisItem; pmm.nextProcessToBalance = process; // EsPrint("will resume at %x\n", pmm.balanceResumePosition); } else { // Go to the next process. pmm.nextRegionToBalance = nullptr; pmm.balanceResumePosition = 0; } KMutexRelease(&space->reserveMutex); ThreadSetTemporaryAddressSpace(nullptr); CloseHandleToObject(process, KERNEL_OBJECT_PROCESS); } } void MMZeroPageThread() { while (true) { KEventWait(&pmm.zeroPageEvent); KEventWait(&pmm.availableNotCritical); bool done = false; while (!done) { uintptr_t pages[PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES]; int i = 0; { KMutexAcquire(&pmm.pageFrameMutex); EsDefer(KMutexRelease(&pmm.pageFrameMutex)); for (; i < PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES; i++) { if (pmm.firstFreePage) { pages[i] = pmm.firstFreePage; MMPhysicalActivatePages(pages[i], 1, ES_FLAGS_DEFAULT); } else { done = true; break; } MMPageFrame *frame = pmm.pageFrames + pages[i]; frame->state = MMPageFrame::ACTIVE; pmm.freeOrZeroedPageBitset.Take(pages[i]); } } for (int j = 0; j < i; j++) pages[j] <<= K_PAGE_BITS; if (i) PMZero(pages, i, false); KMutexAcquire(&pmm.pageFrameMutex); pmm.countActivePages -= i; while (i--) { MMPhysicalInsertZeroedPage(pages[i] >> K_PAGE_BITS); } KMutexRelease(&pmm.pageFrameMutex); } } } #if 0 void PMCheckZeroed(uintptr_t page) { pmm.pmManipulationLock.Acquire(); { MMSpace *vas = coreMMSpace; void *region = pmm.pmManipulationRegion; vas->data.mapMutex.Acquire(); MMArchMapPage(vas, page, (uintptr_t) region, MM_MAP_PAGE_OVERWRITE); vas->data.mapMutex.Release(); pmm.pmManipulationProcessorLock.Acquire(); ProcessorInvalidatePage((uintptr_t) region); for (uintptr_t i = 0; i < K_PAGE_SIZE; i++) { if (((uint8_t *) region)[i]) { KernelPanic("PMCheckZeroed - Supposedly zeroed page %x was not zeroed.\n", page); } } pmm.pmManipulationProcessorLock.Release(); } pmm.pmManipulationLock.Release(); } #endif void PMZeroPartial(uintptr_t page, uintptr_t start, uintptr_t end) { KMutexAcquire(&pmm.pmManipulationLock); MMSpace *vas = coreMMSpace; void *region = pmm.pmManipulationRegion; MMArchMapPage(vas, page, (uintptr_t) region, MM_MAP_PAGE_OVERWRITE | MM_MAP_PAGE_NO_NEW_TABLES); KSpinlockAcquire(&pmm.pmManipulationProcessorLock); ProcessorInvalidatePage((uintptr_t) region); EsMemoryZero((uint8_t *) region + start, end - start); KSpinlockRelease(&pmm.pmManipulationProcessorLock); KMutexRelease(&pmm.pmManipulationLock); } void PMZero(uintptr_t *pages, size_t pageCount, bool contiguous) { KMutexAcquire(&pmm.pmManipulationLock); repeat:; size_t doCount = pageCount > PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES ? PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES : pageCount; pageCount -= doCount; { MMSpace *vas = coreMMSpace; void *region = pmm.pmManipulationRegion; for (uintptr_t i = 0; i < doCount; i++) { MMArchMapPage(vas, contiguous ? pages[0] + (i << K_PAGE_BITS) : pages[i], (uintptr_t) region + K_PAGE_SIZE * i, MM_MAP_PAGE_OVERWRITE | MM_MAP_PAGE_NO_NEW_TABLES); } KSpinlockAcquire(&pmm.pmManipulationProcessorLock); for (uintptr_t i = 0; i < doCount; i++) { ProcessorInvalidatePage((uintptr_t) region + i * K_PAGE_SIZE); } EsMemoryZero(region, doCount * K_PAGE_SIZE); KSpinlockRelease(&pmm.pmManipulationProcessorLock); } if (pageCount) { if (!contiguous) pages += PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES; goto repeat; } // if (pageNumbers) EsPrint("w%d\n", pmm.pmManipulationLock.blockedThreads.count); KMutexRelease(&pmm.pmManipulationLock); } void PMCopy(uintptr_t page, void *_source, size_t pageCount) { uint8_t *source = (uint8_t *) _source; KMutexAcquire(&pmm.pmManipulationLock); repeat:; size_t doCount = pageCount > PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES ? PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES : pageCount; pageCount -= doCount; { MMSpace *vas = coreMMSpace; void *region = pmm.pmManipulationRegion; for (uintptr_t i = 0; i < doCount; i++) { MMArchMapPage(vas, page + K_PAGE_SIZE * i, (uintptr_t) region + K_PAGE_SIZE * i, MM_MAP_PAGE_OVERWRITE | MM_MAP_PAGE_NO_NEW_TABLES); } KSpinlockAcquire(&pmm.pmManipulationProcessorLock); for (uintptr_t i = 0; i < doCount; i++) { ProcessorInvalidatePage((uintptr_t) region + i * K_PAGE_SIZE); } EsMemoryCopy(region, source, doCount * K_PAGE_SIZE); KSpinlockRelease(&pmm.pmManipulationProcessorLock); } if (pageCount) { page += PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES * K_PAGE_SIZE; source += PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES * K_PAGE_SIZE; goto repeat; } KMutexRelease(&pmm.pmManipulationLock); } void PMRead(uintptr_t page, void *_source, size_t pageCount) { uint8_t *source = (uint8_t *) _source; KMutexAcquire(&pmm.pmManipulationLock); repeat:; size_t doCount = pageCount > PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES ? PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES : pageCount; pageCount -= doCount; { MMSpace *vas = coreMMSpace; void *region = pmm.pmManipulationRegion; for (uintptr_t i = 0; i < doCount; i++) { MMArchMapPage(vas, page + K_PAGE_SIZE * i, (uintptr_t) region + K_PAGE_SIZE * i, MM_MAP_PAGE_OVERWRITE | MM_MAP_PAGE_NO_NEW_TABLES); } KSpinlockAcquire(&pmm.pmManipulationProcessorLock); for (uintptr_t i = 0; i < doCount; i++) { ProcessorInvalidatePage((uintptr_t) region + i * K_PAGE_SIZE); } EsMemoryCopy(source, region, doCount * K_PAGE_SIZE); KSpinlockRelease(&pmm.pmManipulationProcessorLock); } if (pageCount) { page += PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES * K_PAGE_SIZE; source += PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES * K_PAGE_SIZE; goto repeat; } KMutexRelease(&pmm.pmManipulationLock); } void *Pool::Add(size_t _elementSize) { KMutexAcquire(&mutex); EsDefer(KMutexRelease(&mutex)); if (elementSize && _elementSize != elementSize) KernelPanic("Pool::Add - Pool element size mismatch.\n"); elementSize = _elementSize; void *address; if (cacheEntries) { address = cache[--cacheEntries]; EsMemoryZero(address, elementSize); } else { address = EsHeapAllocate(elementSize, true, K_FIXED); } return address; } void Pool::Remove(void *address) { KMutexAcquire(&mutex); EsDefer(KMutexRelease(&mutex)); if (!address) return; #if 1 if (cacheEntries == POOL_CACHE_COUNT) { EsHeapFree(address, elementSize, K_FIXED); } else { cache[cacheEntries++] = address; } #else EsHeapFree(address, elementSize); #endif } MMRegion *MMFindAndPinRegion(MMSpace *space, uintptr_t address, uintptr_t size) { if (address + size < address) { return nullptr; } KMutexAcquire(&space->reserveMutex); EsDefer(KMutexRelease(&space->reserveMutex)); MMRegion *region = MMFindRegion(space, address); if (!region) { return nullptr; } if (region->baseAddress > address) { return nullptr; } if (region->baseAddress + region->pageCount * K_PAGE_SIZE < address + size) { return nullptr; } if (!KWriterLockTake(®ion->data.pin, K_LOCK_SHARED, true /* poll */)) { return nullptr; } return region; } void MMUnpinRegion(MMSpace *space, MMRegion *region) { KMutexAcquire(&space->reserveMutex); KWriterLockReturn(®ion->data.pin, K_LOCK_SHARED); KMutexRelease(&space->reserveMutex); } bool MMPhysicalAllocateAndMap(size_t sizeBytes, size_t alignmentBytes, size_t maximumBits, bool zeroed, uint64_t caching, uint8_t **_virtualAddress, uintptr_t *_physicalAddress) { if (!sizeBytes) sizeBytes = 1; if (!alignmentBytes) alignmentBytes = 1; bool noBelow = false; #ifdef ES_BITS_32 if (!maximumBits || maximumBits >= 32) noBelow = true; #endif #ifdef ES_BITS_64 if (!maximumBits || maximumBits >= 64) noBelow = true; #endif uintptr_t sizePages = (sizeBytes + K_PAGE_SIZE - 1) >> K_PAGE_BITS; uintptr_t physicalAddress = MMPhysicalAllocate(MM_PHYSICAL_ALLOCATE_CAN_FAIL | MM_PHYSICAL_ALLOCATE_COMMIT_NOW, sizePages, (alignmentBytes + K_PAGE_SIZE - 1) >> K_PAGE_BITS, noBelow ? 0 : ((size_t) 1 << maximumBits)); if (!physicalAddress) { return false; } void *virtualAddress = MMMapPhysical(kernelMMSpace, physicalAddress, sizeBytes, caching); if (!virtualAddress) { MMPhysicalFree(physicalAddress, false, sizePages); return false; } if (zeroed) { EsMemoryZero(virtualAddress, sizeBytes); } *_virtualAddress = (uint8_t *) virtualAddress; *_physicalAddress = physicalAddress; return true; } void MMPhysicalFreeAndUnmap(void *virtualAddress, uintptr_t physicalAddress) { KMutexAcquire(&kernelMMSpace->reserveMutex); MMRegion *region = MMFindRegion(kernelMMSpace, (uintptr_t) virtualAddress); if (!region || (~region->flags & MM_REGION_PHYSICAL) || region->data.physical.offset != physicalAddress) { KernelPanic("MMPhysicalFreeAndUnmap - Virtual address %x did not point to a region of physical memory mapping %x.\n", virtualAddress, physicalAddress); } size_t pageCount = region->pageCount; MMUnreserve(kernelMMSpace, region, true); KMutexRelease(&kernelMMSpace->reserveMutex); MMPhysicalFree(physicalAddress, false, pageCount); } MMSpace *MMGetKernelSpace() { return kernelMMSpace; } MMSpace *MMGetCurrentProcessSpace() { return GetCurrentThread()->process->vmm; } bool MMFaultRange(uintptr_t address, uintptr_t byteCount, uint32_t flags = ES_FLAGS_DEFAULT) { uintptr_t start = address & ~(K_PAGE_SIZE - 1); uintptr_t end = (address + byteCount - 1) & ~(K_PAGE_SIZE - 1); for (uintptr_t page = start; page <= end; page += K_PAGE_SIZE) { if (!MMArchHandlePageFault(page, flags)) { return false; } } return true; } void MMRemapPhysical(MMSpace *space, const void *virtualAddress, uintptr_t newPhysicalAddress) { if (ProcessorAreInterruptsEnabled()) { KernelPanic("MMRemapPhysical - Cannot remap address with interrupts enabled (does not invalidate the page on other processors).\n"); } MMArchMapPage(space, newPhysicalAddress, (uintptr_t) virtualAddress, MM_MAP_PAGE_OVERWRITE | MM_MAP_PAGE_NO_NEW_TABLES); } void MMObjectCacheInsert(MMObjectCache *cache, MMObjectCacheItem *item) { KSpinlockAcquire(&cache->lock); cache->items.Insert(item, false /* end */); cache->count++; __sync_fetch_and_add(&pmm.approximateTotalObjectCacheBytes, cache->averageObjectBytes); if (MM_OBJECT_CACHE_SHOULD_TRIM()) { KEventSet(&pmm.trimObjectCaches, true); } KSpinlockRelease(&cache->lock); } void MMObjectCacheRemove(MMObjectCache *cache, MMObjectCacheItem *item, bool alreadyLocked) { if (!alreadyLocked) KSpinlockAcquire(&cache->lock); else KSpinlockAssertLocked(&cache->lock); item->Remove(); cache->count--; __sync_fetch_and_sub(&pmm.approximateTotalObjectCacheBytes, cache->averageObjectBytes); if (!alreadyLocked) KSpinlockRelease(&cache->lock); } MMObjectCacheItem *MMObjectCacheRemoveLRU(MMObjectCache *cache) { MMObjectCacheItem *item = nullptr; KSpinlockAcquire(&cache->lock); if (cache->count) { item = cache->items.first; MMObjectCacheRemove(cache, item, true); } KSpinlockRelease(&cache->lock); return item; } void MMObjectCacheRegister(MMObjectCache *cache, bool (*trim)(MMObjectCache *), size_t averageObjectBytes) { KMutexAcquire(&pmm.objectCacheListMutex); cache->trim = trim; cache->averageObjectBytes = averageObjectBytes; cache->item.thisItem = cache; pmm.objectCacheList.InsertEnd(&cache->item); KMutexRelease(&pmm.objectCacheListMutex); } void MMObjectCacheUnregister(MMObjectCache *cache) { KMutexAcquire(&pmm.objectCacheListMutex); pmm.objectCacheList.Remove(&cache->item); KMutexRelease(&pmm.objectCacheListMutex); // Wait for any trim threads still using the cache to finish. KWriterLockTake(&cache->trimLock, K_LOCK_EXCLUSIVE); KWriterLockReturn(&cache->trimLock, K_LOCK_EXCLUSIVE); } void MMObjectCacheFlush(MMObjectCache *cache) { if (cache->item.list) KernelPanic("MMObjectCacheFlush - Cache %x must be unregistered before flushing.\n", cache); // Wait for any trim threads still using the cache to finish. KWriterLockTake(&cache->trimLock, K_LOCK_EXCLUSIVE); // Trim the cache until it is empty. // The trim callback is allowed to increase cache->count, // but nobody else should be increasing it once it has been unregistered. while (cache->count) cache->trim(cache); // Return the trim lock. KWriterLockReturn(&cache->trimLock, K_LOCK_EXCLUSIVE); } void MMObjectCacheTrimThread() { MMObjectCache *cache = nullptr; while (true) { while (!MM_OBJECT_CACHE_SHOULD_TRIM()) { KEventReset(&pmm.trimObjectCaches); KEventWait(&pmm.trimObjectCaches); } KMutexAcquire(&pmm.objectCacheListMutex); // TODO Is there a faster way to find the next cache? // This needs to work with multiple producers and consumers. // And I don't want to put the caches in an array, because then registering a cache could fail. MMObjectCache *previousCache = cache; cache = nullptr; LinkedItem *item = pmm.objectCacheList.firstItem; while (item) { if (item->thisItem == previousCache && item->nextItem) { cache = item->nextItem->thisItem; break; } item = item->nextItem; } if (!cache && pmm.objectCacheList.firstItem) { cache = pmm.objectCacheList.firstItem->thisItem; } if (cache) { KWriterLockTake(&cache->trimLock, K_LOCK_SHARED); } KMutexRelease(&pmm.objectCacheListMutex); if (!cache) { continue; } for (uintptr_t i = 0; i < MM_OBJECT_CACHE_TRIM_GROUP_COUNT; i++) { if (!cache->trim(cache)) { break; } } KWriterLockReturn(&cache->trimLock, K_LOCK_SHARED); } } void MMSpaceOpenReference(MMSpace *space) { if (space == kernelMMSpace) { return; } if (space->referenceCount < 1) { KernelPanic("MMSpaceOpenReference - Space %x has invalid reference count.\n", space); } __sync_fetch_and_add(&space->referenceCount, 1); } void MMSpaceCloseReference(MMSpace *space) { if (space == kernelMMSpace) { return; } if (space->referenceCount < 1) { KernelPanic("MMSpaceCloseReference - Space %x has invalid reference count.\n", space); } if (__sync_fetch_and_sub(&space->referenceCount, 1) > 1) { return; } KRegisterAsyncTask(&space->removeAsyncTask, [] (KAsyncTask *task) { MMSpace *space = EsContainerOf(MMSpace, removeAsyncTask, task); MMArchFinalizeVAS(space); scheduler.mmSpacePool.Remove(space); }); } void MMInitialise() { { // Initialise coreMMSpace and kernelMMSpace. mmCoreRegions[0].core.used = false; mmCoreRegionCount = 1; MMArchInitialise(); MMRegion *region = (MMRegion *) EsHeapAllocate(sizeof(MMRegion), true, K_CORE); region->baseAddress = MM_KERNEL_SPACE_START; region->pageCount = MM_KERNEL_SPACE_SIZE / K_PAGE_SIZE; TreeInsert(&kernelMMSpace->freeRegionsBase, ®ion->itemBase, region, MakeShortKey(region->baseAddress)); TreeInsert(&kernelMMSpace->freeRegionsSize, ®ion->itemSize, region, MakeShortKey(region->pageCount), AVL_DUPLICATE_KEYS_ALLOW); } { // Initialise physical memory management. KMutexAcquire(&kernelMMSpace->reserveMutex); pmm.pmManipulationRegion = (void *) MMReserve(kernelMMSpace, PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES * K_PAGE_SIZE, ES_FLAGS_DEFAULT)->baseAddress; KMutexRelease(&kernelMMSpace->reserveMutex); // 1 extra for the top page, then round up so the page bitset is byte-aligned. uintptr_t pageFrameDatabaseCount = (MMArchGetPhysicalMemoryHighest() + (K_PAGE_SIZE << 3)) >> K_PAGE_BITS; pmm.pageFrames = (MMPageFrame *) MMStandardAllocate(kernelMMSpace, pageFrameDatabaseCount * sizeof(MMPageFrame), MM_REGION_FIXED); pmm.freeOrZeroedPageBitset.Initialise(pageFrameDatabaseCount, true); pmm.pageFrameDatabaseCount = pageFrameDatabaseCount; // Only set this after the database is ready, or it may be accessed mid-allocation! MMPhysicalInsertFreePagesStart(); uint64_t commitLimit = MMArchPopulatePageFrameDatabase(); MMPhysicalInsertFreePagesEnd(); pmm.pageFrameDatabaseInitialised = true; pmm.commitLimit = pmm.commitFixedLimit = commitLimit; KernelLog(LOG_INFO, "Memory", "pmm initialised", "MMInitialise - PMM initialised with a fixed commit limit of %d pages.\n", pmm.commitLimit); } { // Initialise file cache. CCInitialise(); } { // Create threads. pmm.zeroPageEvent.autoReset = true; MMCommit(PHYSICAL_MEMORY_MANIPULATION_REGION_PAGES * K_PAGE_SIZE, true); pmm.zeroPageThread = ThreadSpawn("MMZero", (uintptr_t) MMZeroPageThread, 0, SPAWN_THREAD_LOW_PRIORITY); ThreadSpawn("MMBalance", (uintptr_t) MMBalanceThread, 0, ES_FLAGS_DEFAULT)->isPageGenerator = true; ThreadSpawn("MMObjTrim", (uintptr_t) MMObjectCacheTrimThread, 0, ES_FLAGS_DEFAULT); } { // Create the global data shared region, and the API table region. mmAPITableRegion = MMSharedCreateRegion(0xF000, false, 0); mmGlobalDataRegion = MMSharedCreateRegion(sizeof(GlobalData), false, 0); mmGlobalData = (GlobalData *) MMMapShared(kernelMMSpace, mmGlobalDataRegion, 0, sizeof(GlobalData), MM_REGION_FIXED); MMFaultRange((uintptr_t) mmGlobalData, sizeof(GlobalData), MM_HANDLE_PAGE_FAULT_FOR_SUPERVISOR); } } #endif