essence-os/desktop/posix.cpp

827 lines
27 KiB
C++

// This file is part of the Essence operating system.
// It is released under the terms of the MIT license -- see LICENSE.md.
// Written by: nakst.
#define ES_API
#define ES_FORWARD
#include <essence.h>
#ifdef ENABLE_POSIX_SUBSYSTEM
#define ARRAY_DEFINITIONS_ONLY
#include <shared/array.cpp>
extern "C" void *ProcessorTLSRead(uintptr_t offset);
extern "C" void ProcessorTLSWrite(uintptr_t offset, void *value);
extern ptrdiff_t tlsStorageOffset;
bool NodeFindMountPoint(const char *prefix, size_t prefixBytes, EsMountPoint *result, bool mutexTaken);
EsError NodeOpen(const char *path, size_t pathBytes, uint32_t flags, _EsNodeInformation *node);
EsProcessStartupInformation *ProcessGetStartupInformation();
#define _POSIX_SOURCE
#define _GNU_SOURCE
#define __NEED_struct_iovec
#define __NEED_sigset_t
#define __NEED_struct_timespec
#define __NEED_time_t
#include <limits.h>
#include <bits/syscall.h>
#include <bits/alltypes.h>
#include <signal.h>
#include <sys/sysinfo.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <bits/ioctl.h>
#include <fcntl.h>
#include <dirent.h>
#include <unistd.h>
#include <poll.h>
#include <sys/utsname.h>
#include <errno.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <sched.h>
#include <elf.h>
struct ChildProcess {
uint64_t id;
EsHandle handle;
};
char *workingDirectory;
Array<ChildProcess> childProcesses;
Array<void *> _argv;
EsHandle posixMountPointBase;
EsMutex posixMountPointBaseMutex;
#ifdef ES_ARCH_X86_64
Elf64_Phdr *tlsHeader;
#endif
#ifdef DEBUG_BUILD
double syscallTimeSpent[1024];
uint64_t syscallCallCount[1024];
#endif
const char *syscallNames[] = {
"read", "write", "open", "close", "stat", "fstat", "lstat", "poll",
"lseek", "mmap", "mprotect", "munmap", "brk", "rt_sigaction", "rt_sigprocmask", "rt_sigreturn",
"ioctl", "pread64", "pwrite64", "readv", "writev", "access", "pipe", "select",
"sched_yield", "mremap", "msync", "mincore", "madvise", "shmget", "shmat", "shmctl",
"dup", "dup2", "pause", "nanosleep", "getitimer", "alarm", "setitimer", "getpid",
"sendfile", "socket", "connect", "accept", "sendto", "recvfrom", "sendmsg", "recvmsg",
"shutdown", "bind", "listen", "getsockname", "getpeername", "socketpair", "setsockopt", "getsockopt",
"clone", "fork", "vfork", "execve", "exit", "wait4", "kill", "uname",
"semget", "semop", "semctl", "shmdt", "msgget", "msgsnd", "msgrcv", "msgctl",
"fcntl", "flock", "fsync", "fdatasync", "truncate", "ftruncate", "getdents", "getcwd",
"chdir", "fchdir", "rename", "mkdir", "rmdir", "creat", "link", "unlink",
"symlink", "readlink", "chmod", "fchmod", "chown", "fchown", "lchown", "umask",
"gettimeofday", "getrlimit", "getrusage", "sysinfo", "times", "ptrace", "getuid", "syslog",
"getgid", "setuid", "setgid", "geteuid", "getegid", "setpgid", "getppid", "getpgrp",
"setsid", "setreuid", "setregid", "getgroups", "setgroups", "setresuid", "getresuid", "setresgid",
"getresgid", "getpgid", "setfsuid", "setfsgid", "getsid", "capget", "capset", "rt_sigpending",
"rt_sigtimedwait", "rt_sigqueueinfo", "rt_sigsuspend", "sigaltstack", "utime", "mknod", "uselib", "personality",
"ustat", "statfs", "fstatfs", "sysfs", "getpriority", "setpriority", "sched_setparam", "sched_getparam",
"sched_setscheduler", "sched_getscheduler", "sched_get_priority_max", "sched_get_priority_min", "sched_rr_get_interval", "mlock", "munlock", "mlockall",
"munlockall", "vhangup", "modify_ldt", "pivot_root", "_sysctl", "prctl", "arch_prctl", "adjtimex",
"setrlimit", "chroot", "sync", "acct", "settimeofday", "mount", "umount2", "swapon",
"swapoff", "reboot", "sethostname", "setdomainname", "iopl", "ioperm", "create_module", "init_module",
"delete_module", "get_kernel_syms", "query_module", "quotactl", "nfsservctl", "getpmsg", "putpmsg", "afs_syscall",
"tuxcall", "security", "gettid", "readahead", "setxattr", "lsetxattr", "fsetxattr", "getxattr",
"lgetxattr", "fgetxattr", "listxattr", "llistxattr", "flistxattr", "removexattr", "lremovexattr", "fremovexattr",
"tkill", "time", "futex", "sched_setaffinity", "sched_getaffinity", "set_thread_area", "io_setup", "io_destroy",
"io_getevents", "io_submit", "io_cancel", "get_thread_area", "lookup_dcookie", "epoll_create", "epoll_ctl_old", "epoll_wait_old",
"remap_file_pages", "getdents64", "set_tid_address", "restart_syscall", "semtimedop", "fadvise64", "timer_create", "timer_settime",
"timer_gettime", "timer_getoverrun", "timer_delete", "clock_settime", "clock_gettime", "clock_getres", "clock_nanosleep", "exit_group",
"epoll_wait", "epoll_ctl", "tgkill", "utimes", "vserver", "mbind", "set_mempolicy", "get_mempolicy",
"mq_open", "mq_unlink", "mq_timedsend", "mq_timedreceive", "mq_notify", "mq_getsetattr", "kexec_load", "waitid",
"add_key", "request_key", "keyctl", "ioprio_set", "ioprio_get", "inotify_init", "inotify_add_watch", "inotify_rm_watch",
"migrate_pages", "openat", "mkdirat", "mknodat", "fchownat", "futimesat", "newfstatat", "unlinkat",
"renameat", "linkat", "symlinkat", "readlinkat", "fchmodat", "faccessat", "pselect6", "ppoll",
"unshare", "set_robust_list", "get_robust_list", "splice", "tee", "sync_file_range", "vmsplice", "move_pages",
"utimensat", "epoll_pwait", "signalfd", "timerfd_create", "eventfd", "fallocate", "timerfd_settime", "timerfd_gettime",
"accept4", "signalfd4", "eventfd2", "epoll_create1", "dup3", "pipe2", "inotify_init1", "preadv",
"pwritev", "rt_tgsigqueueinfo", "perf_event_open", "recvmmsg", "fanotify_init", "fanotify_mark", "prlimit64", "name_to_handle_at",
"open_by_handle_at", "clock_adjtime", "syncfs", "sendmmsg", "setns", "getcpu", "process_vm_readv", "process_vm_writev",
"kcmp", "finit_module", "sched_setattr", "sched_getattr", "renameat2", "seccomp", "getrandom", "memfd_create",
"kexec_file_load", "bpf", "execveat", "userfaultfd", "membarrier", "mlock2", "copy_file_range", "preadv2",
"pwritev2", "pkey_mprotect", "pkey_alloc", "pkey_free", "statx",
};
extern "C" void ProcessorCheckStackAlignment();
char *EsPOSIXConvertPath(const char *path, size_t *outNameLength, bool addPOSIXMountPointPrefix) {
const char *posixNames[2] = { path[0] != '/' ? workingDirectory : nullptr, path };
size_t posixNameLengths[2] = { path[0] != '/' ? EsCStringLength(workingDirectory) : 0, EsCStringLength(path) };
char *name = (char *) EsHeapAllocate(posixNameLengths[0] + posixNameLengths[1] + (addPOSIXMountPointPrefix ? 7 : 0) + 2 /* space for / and NUL; see chdir */, true);
if (!name) return nullptr;
size_t nameLength = 0;
if (addPOSIXMountPointPrefix) name += 7;
for (uintptr_t i = 0; i < 2; i++) {
while (posixNameLengths[i]) {
const char *entry = posixNames[i];
size_t entryLength = 0;
while (posixNameLengths[i]) {
posixNameLengths[i]--;
posixNames[i]++;
if (entry[entryLength] == '/') break;
entryLength++;
}
if (!entryLength || (entryLength == 1 && entry[0] == '.')) {
// Ignore.
} else if (entryLength == 2 && entry[0] == '.' && entry[1] == '.' && nameLength) {
while (name[--nameLength] != '/');
} else {
name[nameLength++] = '/';
EsMemoryCopy(name + nameLength, entry, entryLength);
nameLength += entryLength;
}
}
}
if (!nameLength) {
nameLength++;
name[0] = '/';
}
if (addPOSIXMountPointPrefix) {
name -= 7;
nameLength += 7;
EsMemoryCopy(name, "|POSIX:", 7);
}
if (outNameLength) *outNameLength = nameLength;
name[nameLength] = 0;
return name;
}
long EsPOSIXSystemCall(long n, long a1, long a2, long a3, long a4, long a5, long a6) {
#ifdef DEBUG_BUILD
ProcessorCheckStackAlignment();
#endif
if ((uintptr_t) n < sizeof(syscallNames) / sizeof(syscallNames[0])) {
// EsPrint(":: %z %x %x %x\n", syscallNames[n], a1, a2, a3);
}
if (!posixMountPointBase) {
EsMutexAcquire(&posixMountPointBaseMutex);
if (!posixMountPointBase) {
_EsNodeInformation node;
EsAssert(ES_SUCCESS == NodeOpen(EsLiteral("|POSIX:/"), ES_NODE_DIRECTORY | ES__NODE_DIRECTORY_WRITE, &node));
posixMountPointBase = node.handle;
}
EsMutexRelease(&posixMountPointBaseMutex);
}
long returnValue = 0;
_EsPOSIXSyscall syscall = { n, a1, a2, a3, a4, a5, a6 };
#ifdef DEBUG_BUILD
double startTime = EsTimeStampMs();
static double processStartTime = 0;
if (!processStartTime) {
processStartTime = startTime;
}
if (n == SYS_exit_group) {
double processExecutionTime = startTime - processStartTime;
EsPrint("=== System call performance ===\n");
int array[sizeof(syscallNames) / sizeof(syscallNames[0])];
for (uintptr_t i = 0; i < sizeof(array) / sizeof(array[0]); i++) {
array[i] = i;
}
EsCRTqsort(array, sizeof(array) / sizeof(array[0]), sizeof(array[0]), [] (const void *_left, const void *_right) {
int left = *(int *) _left, right = *(int *) _right;
if (syscallTimeSpent[left] > syscallTimeSpent[right]) return -1;
if (syscallTimeSpent[left] < syscallTimeSpent[right]) return 1;
return 0;
});
double total = 0;
for (uintptr_t i = 0; i < sizeof(array) / sizeof(array[0]); i++) {
if (!syscallTimeSpent[array[i]]) break;
EsPrint("%z - %Fms - %d calls\n", syscallNames[array[i]], syscallTimeSpent[array[i]], syscallCallCount[array[i]]);
total += syscallTimeSpent[array[i]];
}
EsPrint("Total time in system calls: %Fms\n", total);
EsPrint("Total run time of process: %Fms\n", processExecutionTime);
}
#endif
switch (n) {
case SYS_open: {
size_t pathBytes;
char *path = EsPOSIXConvertPath((const char *) a1, &pathBytes, false);
syscall.arguments[0] = (long) path;
syscall.arguments[4] = (long) posixMountPointBase;
syscall.arguments[6] = (long) pathBytes;
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
// EsPrint("SYS_open '%s' with handle %d\n", pathBytes, path, returnValue);
EsHeapFree(path);
} break;
case SYS_vfork: {
long result = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
if (result > 0) {
EsHandle handle = result;
ChildProcess pid = { EsProcessGetID(handle), handle };
childProcesses.Add(pid);
returnValue = pid.id;
}
} break;
case SYS_pipe: {
syscall.index = SYS_pipe2;
syscall.arguments[1] = 0;
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
} break;
case SYS_close: {
// EsPrint("SYS_close handle %d\n", a1);
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
} break;
case SYS_pipe2:
case SYS_writev:
case SYS_fcntl:
case SYS_dup2:
case SYS_write:
case SYS_readv:
case SYS_lseek:
case SYS_read:
case SYS_fstat:
case SYS_sysinfo:
case SYS_getdents64:
case SYS_exit_group:
case SYS_ioctl: {
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
} break;
case SYS_chdir: {
char *simplified = EsPOSIXConvertPath((const char *) a1, nullptr, false);
EsHeapFree(workingDirectory);
size_t oldLength = EsCStringLength(simplified);
simplified[oldLength] = '/';
simplified[oldLength + 1] = 0;
workingDirectory = simplified;
} break;
case SYS_getpid: {
// Run the system call directly, so that the kernel can handle the vfork()'d case.
EsObjectID id;
EsSyscall(ES_SYSCALL_THREAD_GET_ID, ES_CURRENT_PROCESS, (uintptr_t) &id, 0, 0);
returnValue = id;
} break;
case SYS_gettid: {
returnValue = EsThreadGetID(ES_CURRENT_THREAD);
} break;
case SYS_getcwd: {
size_t bytes = EsCStringLength(workingDirectory) + 1;
char *destination = (char *) a1;
if (bytes > (size_t) a2) {
returnValue = -ERANGE;
} else {
EsMemoryCopy(destination, workingDirectory, bytes);
if (workingDirectory[bytes - 2] == '/' && bytes > 2) destination[bytes - 2] = 0;
returnValue = a1;
}
} break;
case SYS_getppid:
case SYS_getuid:
case SYS_getgid:
case SYS_getegid:
case SYS_geteuid: {
// TODO.
} break;
case SYS_getrusage: {
// TODO.
struct rusage *buffer = (struct rusage *) a2;
EsMemoryZero(buffer, sizeof(struct rusage));
} break;
case SYS_unlink: {
_EsNodeInformation node;
node.handle = posixMountPointBase;
size_t pathBytes;
char *path = EsPOSIXConvertPath((const char *) a1, &pathBytes, false);
EsError error = EsSyscall(ES_SYSCALL_NODE_OPEN, (uintptr_t) path, pathBytes, ES_NODE_FAIL_IF_NOT_FOUND | ES_FILE_WRITE, (uintptr_t) &node);
EsHeapFree(path);
if (error == ES_ERROR_FILE_DOES_NOT_EXIST) returnValue = -ENOENT;
else if (error == ES_ERROR_PATH_NOT_TRAVERSABLE) returnValue = -ENOTDIR;
else if (error == ES_ERROR_OPERATION_BLOCKED) returnValue = -EBUSY;
else if (error == ES_ERROR_HARDWARE_FAILURE || error == ES_ERROR_CORRUPT_DATA) returnValue = -EIO;
else if (error != ES_SUCCESS) returnValue = -EACCES;
else {
error = EsSyscall(ES_SYSCALL_NODE_DELETE, node.handle, 0, 0, 0);
EsHandleClose(node.handle);
if (error == ES_ERROR_HARDWARE_FAILURE || error == ES_ERROR_CORRUPT_DATA) returnValue = -EIO;
else if (error != ES_SUCCESS) returnValue = -EACCES;
}
} break;
case SYS_truncate: {
_EsNodeInformation node;
node.handle = posixMountPointBase;
size_t pathBytes;
char *path = EsPOSIXConvertPath((const char *) a1, &pathBytes, false);
EsError error = EsSyscall(ES_SYSCALL_NODE_OPEN, (uintptr_t) path, pathBytes, ES_NODE_FAIL_IF_NOT_FOUND | ES_FILE_WRITE, (uintptr_t) &node);
EsHeapFree(path);
if (error == ES_ERROR_FILE_DOES_NOT_EXIST) returnValue = -ENOENT;
else if (error == ES_ERROR_PATH_NOT_TRAVERSABLE) returnValue = -ENOTDIR;
else if (error == ES_ERROR_OPERATION_BLOCKED) returnValue = -EBUSY;
else if (error == ES_ERROR_HARDWARE_FAILURE || error == ES_ERROR_CORRUPT_DATA) returnValue = -EIO;
else if (error != ES_SUCCESS) returnValue = -EACCES;
else if (node.type == ES_NODE_DIRECTORY) { returnValue = -EISDIR; EsHandleClose(node.handle); }
else {
EsError error = EsFileResize(node.handle, a2);
EsHandleClose(node.handle);
if (error == ES_ERROR_HARDWARE_FAILURE || error == ES_ERROR_CORRUPT_DATA) returnValue = -EIO;
else if (error != ES_SUCCESS) returnValue = -EACCES;
}
} break;
case SYS_execve: {
// NOTE We can't use EsHeapAllocate since the system call never returns.
size_t pathBytes;
char *_path = EsPOSIXConvertPath((const char *) a1, &pathBytes, false);
char *path = (char *) __builtin_alloca(pathBytes);
EsMemoryCopy(path, _path, pathBytes);
EsHeapFree(_path);
char **argv = (char **) a2;
char **envp = (char **) a3;
size_t environmentSize = 2;
for (uintptr_t i = 0; argv[i]; i++) environmentSize += EsCStringLength(argv[i]) + 1;
for (uintptr_t i = 0; envp[i]; i++) environmentSize += EsCStringLength(envp[i]) + 1;
bool environmentContainsWorkingDirectory = false;
for (uintptr_t i = 0; envp[i]; i++) {
if (0 == EsMemoryCompare("PWD=", envp[i], 4)) {
environmentContainsWorkingDirectory = true;
break;
}
}
if (!environmentContainsWorkingDirectory) {
environmentSize += 4 + EsCStringLength(workingDirectory) + 1;
}
char newEnvironment[environmentSize];
char *position = newEnvironment;
EsMemoryZero(newEnvironment, environmentSize);
for (uintptr_t i = 0; argv[i]; i++) {
size_t length = EsCStringLength(argv[i]) + 1;
EsMemoryCopy(position, argv[i], length);
position += length;
}
position++;
for (uintptr_t i = 0; envp[i]; i++) {
size_t length = EsCStringLength(envp[i]) + 1;
EsMemoryCopy(position, envp[i], length);
position += length;
}
if (!environmentContainsWorkingDirectory) {
size_t length = 4 + EsCStringLength(workingDirectory) + 1;
EsMemoryCopy(position, "PWD=", 4);
EsMemoryCopy(position + 4, workingDirectory, EsCStringLength(workingDirectory) + 1);
position += length;
}
syscall.arguments[0] = (long) path;
syscall.arguments[1] = (long) pathBytes;
syscall.arguments[2] = (long) newEnvironment;
syscall.arguments[3] = (long) environmentSize;
syscall.arguments[4] = (long) posixMountPointBase;
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
} break;
case SYS_access: {
// We don't support file permissions yet, so just check the file exists.
int fd = EsPOSIXSystemCall(SYS_open, a1, O_PATH, 0, 0, 0, 0);
if (fd < 0) returnValue = fd;
else {
returnValue = 0;
EsPOSIXSystemCall(SYS_close, fd, 0, 0, 0, 0, 0);
}
} break;
case SYS_faccessat: {
if (*(char *) a2 == '/') {
// We don't support file permissions yet, so just check the file exists.
int fd = EsPOSIXSystemCall(SYS_open, a2, O_PATH, 0, 0, 0, 0);
if (fd < 0) returnValue = fd;
else {
returnValue = 0;
EsPOSIXSystemCall(SYS_close, fd, 0, 0, 0, 0, 0);
}
} else {
EsPanic("Unsupported relative faccessat.\n");
}
} break;
case SYS_lstat:
case SYS_stat: {
int fd = EsPOSIXSystemCall(SYS_open, a1, O_PATH, 0, 0, 0, 0);
if (fd < 0) returnValue = fd;
else {
returnValue = EsPOSIXSystemCall(SYS_fstat, fd, a2, 0, 0, 0, 0);
EsPOSIXSystemCall(SYS_close, fd, 0, 0, 0, 0, 0);
}
} break;
case SYS_readlink: {
if (0 == EsMemoryCompare((void *) a1, EsLiteral("/proc/self/fd/"))) {
// The process is trying to get the path of a file descriptor.
syscall.index = ES_POSIX_SYSCALL_GET_POSIX_FD_PATH;
syscall.arguments[0] = EsCRTatoi((char *) a1 + EsCStringLength("/proc/self/fd/"));
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
} else {
// We don't support symbolic links, so the output is the same as the input.
int length = EsCStringLength((char *) a1);
EsMemoryZero((void *) a2, a3);
EsMemoryCopy((void *) a2, (void *) a1, length > a3 ? a3 : length);
returnValue = length > a3 ? a3 : length;
}
} break;
case SYS_set_tid_address: {
// TODO Support set_child_tid and clear_child_tid addresses.
returnValue = EsThreadGetID(ES_CURRENT_THREAD);
} break;
case SYS_brk: {
returnValue = -1;
} break;
case SYS_mremap: {
returnValue = -ENOMEM;
} break;
case SYS_mmap: {
bool read = a3 & PROT_READ, write = a3 & PROT_WRITE, none = a3 == PROT_NONE;
if (a4 & MAP_FIXED) {
returnValue = -ENOMEM;
} else if ((a4 == (MAP_ANON | MAP_PRIVATE)) && (a5 == -1) && (a6 == 0) && ((read && write) || none)) {
returnValue = (long) EsMemoryReserve(a2, ES_MEMORY_PROTECTION_READ_WRITE, none ? 0 : ES_MEMORY_RESERVE_COMMIT_ALL);
} else {
EsPanic("Unsupported mmap [%x, %x, %x, %x, %x, %x]\n", a1, a2, a3, a4, a5, a6);
}
} break;
case SYS_munmap: {
void *address = (void *) a1;
size_t length = (size_t) a2;
if (length == 0 || ((uintptr_t) address & (ES_PAGE_SIZE - 1))) {
returnValue = -EINVAL;
} else {
EsMemoryUnreserve(address, length);
}
} break;
case SYS_mprotect: {
void *address = (void *) a1;
size_t length = (size_t) a2;
int protection = (int) a3;
if (protection == (PROT_READ | PROT_WRITE)) {
returnValue = EsMemoryCommit(address, length) ? 0 : -ENOMEM;
} else if (protection == 0) {
returnValue = EsMemoryDecommit(address, length) ? 0 : -ENOMEM;
} else {
EsPanic("Unsupported mprotect [%x, %x, %x, %x, %x, %x]\n", a1, a2, a3, a4, a5, a6);
}
} break;
case SYS_prlimit64: {
// You can't access other process's resources.
if (a1 && a1 != (long) EsProcessGetID(ES_CURRENT_PROCESS)) {
returnValue = -EPERM;
break;
}
struct rlimit *newLimit = (struct rlimit *) a3;
if (newLimit && a2 != RLIMIT_STACK) {
returnValue = -EPERM;
break;
}
struct rlimit *limit = (struct rlimit *) a4;
if (a2 == RLIMIT_STACK) {
size_t current, maximum;
EsError error = EsSyscall(ES_SYSCALL_THREAD_STACK_SIZE, ES_CURRENT_THREAD,
(uintptr_t) &current, (uintptr_t) &maximum, newLimit ? newLimit->rlim_cur : 0);
if (limit) {
limit->rlim_cur = current;
limit->rlim_max = maximum;
}
if (error != ES_SUCCESS) returnValue = -EINVAL;
} else if (a2 == RLIMIT_AS) {
if (limit) limit->rlim_cur = limit->rlim_max = RLIM_INFINITY;
} else if (a2 == RLIMIT_RSS) {
if (limit) limit->rlim_cur = limit->rlim_max = 0x10000000; // 256MB. This value is fake. TODO
} else if (a2 == RLIMIT_NOFILE) {
if (limit) limit->rlim_cur = limit->rlim_max = 1048576;
} else {
EsPanic("Unsupported prlimit64 [%x]\n", a2);
}
} break;
case SYS_setitimer:
case SYS_madvise:
case SYS_umask:
case SYS_chmod:
case SYS_rt_sigaction:
case SYS_rt_sigprocmask: {
// TODO Support signals.
// Ignore.
} break;
case SYS_clock_gettime: {
// We'll ignore the clockid_t in a1, since we don't have proper timekeeping yet.
struct timespec *tp = (struct timespec *) a2;
double timeStampMs = EsTimeStampMs();
uint64_t ns = timeStampMs * 1e6;
tp->tv_sec = ns / 1000000000;
tp->tv_nsec = ns % 1000000000;
} break;
case SYS_wait4: {
if ((a3 & ~3) || a4 || a1 < -1 || !a1) {
EsPanic("Unsupported wait4 [%x/%x/%x/%x]\n", a1, a2, a3, a4);
}
int *wstatus = (int *) a2;
int options = a3;
bool foundChild = false;
uintptr_t childIndex = 0;
if (a1 > 0) {
for (uintptr_t i = 0; i < childProcesses.Length(); i++) {
if (childProcesses[i].id == (uint64_t) a1) {
foundChild = true;
childIndex = i;
break;
}
}
} else if (a1 == -1) {
foundChild = childProcesses.Length();
}
if (!foundChild) {
returnValue = -ECHILD;
} else {
returnValue = 0;
if (~options & 1 /* WNOHANG */) {
if (a1 == -1) {
EsHandle *handles = (EsHandle *) __builtin_alloca(childProcesses.Length() * sizeof(EsHandle));
for (uintptr_t i = 0; i < childProcesses.Length(); i++) {
handles[i] = childProcesses[i].handle;
}
EsWait(handles, childProcesses.Length(), ES_WAIT_NO_TIMEOUT);
} else {
EsWaitSingle(childProcesses[childIndex].handle);
}
}
for (uintptr_t i = 0; i < childProcesses.Length(); i++) {
if (a1 > 0 && childProcesses[i].id != (uint64_t) a1) {
continue;
}
EsHandle handle = childProcesses[i].handle;
EsProcessState state;
EsProcessGetState(handle, &state);
if (state.flags & ES_PROCESS_STATE_ALL_THREADS_TERMINATED) {
returnValue = childProcesses[i].id;
*wstatus = (EsProcessGetExitStatus(handle) & 0xFF) << 8;
EsHandleClose(handle);
childProcesses.Delete(i);
break;
}
}
}
} break;
case SYS_sched_getaffinity: {
// TODO Getting the correct number of CPUs.
// TODO Getting the affinity for other processes.
cpu_set_t *set = (cpu_set_t *) a3;
EsCRTmemset(set, 0, a2);
CPU_SET(0, set);
} break;
case SYS_mkdir: {
size_t pathBytes;
char *path = EsPOSIXConvertPath((const char *) a1, &pathBytes, true);
EsError error = EsPathCreate(path, pathBytes, ES_NODE_DIRECTORY, false);
if (error == ES_ERROR_INSUFFICIENT_RESOURCES) returnValue = -ENOMEM;
else if (error == ES_ERROR_ALREADY_EXISTS) returnValue = -EEXIST;
else if (error == ES_ERROR_PATH_NOT_TRAVERSABLE) returnValue = -ENOENT;
else if (error == ES_ERROR_PATH_NOT_WITHIN_MOUNTED_VOLUME) returnValue = -ENOENT;
else if (error == ES_ERROR_FILE_ON_READ_ONLY_VOLUME) returnValue = -EPERM;
EsHeapFree(path);
} break;
case SYS_uname: {
struct utsname *buffer = (struct utsname *) a1;
EsCRTstrcpy(buffer->sysname, "Essence");
EsCRTstrcpy(buffer->release, "0.0.0");
EsCRTstrcpy(buffer->version, "0.0.0");
EsCRTstrcpy(buffer->machine, "Unknown");
} break;
case SYS_setpgid: {
if (a1 < 0) {
returnValue = -EINVAL;
} else {
EsHandle process = EsProcessOpen(a1);
if (process != ES_INVALID_HANDLE) {
syscall.arguments[0] = process;
returnValue = EsSyscall(ES_SYSCALL_POSIX, (uintptr_t) &syscall, 0, 0, 0);
EsHandleClose(process);
} else {
returnValue = -ESRCH;
}
}
} break;
case SYS_rename: {
size_t oldPathBytes;
char *oldPath = EsPOSIXConvertPath((const char *) a1, &oldPathBytes, true);
size_t newPathBytes;
char *newPath = EsPOSIXConvertPath((const char *) a2, &newPathBytes, true);
EsError error = EsPathMove(oldPath, oldPathBytes, newPath, newPathBytes);
EsHeapFree(oldPath);
EsHeapFree(newPath);
// TODO More return values.
if (error == ES_ERROR_FILE_DOES_NOT_EXIST) returnValue = -ENOENT;
else if (error == ES_ERROR_PATH_NOT_TRAVERSABLE) returnValue = -ENOTDIR;
else if (error == ES_ERROR_OPERATION_BLOCKED) returnValue = -EBUSY;
else if (error == ES_ERROR_HARDWARE_FAILURE || error == ES_ERROR_CORRUPT_DATA) returnValue = -EIO;
else if (error != ES_SUCCESS) returnValue = -EACCES;
} break;
case -1000: {
// Update thread local storage:
void *apiTLS = ProcessorTLSRead(tlsStorageOffset);
EsSyscall(ES_SYSCALL_THREAD_SET_TLS, a1, 0, 0, 0);
tlsStorageOffset = -a2;
ProcessorTLSWrite(tlsStorageOffset, apiTLS);
} break;
default: {
EsPanic("Unknown linux syscall %d = %z.\nArguments: %x, %x, %x, %x, %x, %x\n",
n, syscallNames[n], a1, a2, a3, a4, a5, a6);
} break;
}
#ifdef DEBUG_BUILD
if ((uintptr_t) n < sizeof(syscallTimeSpent) / sizeof(syscallTimeSpent[0])) {
double endTime = EsTimeStampMs();
syscallTimeSpent[n] += endTime - startTime;
syscallCallCount[n]++;
// EsPrint(":: %z %x %x %x -> %x; %Fms\n", syscallNames[n], a1, a2, a3, returnValue, endTime - startTime);
}
#endif
return returnValue;
}
void EsPOSIXInitialise(int *argc, char ***argv) {
EsProcessStartupInformation *startupInformation = ProcessGetStartupInformation();
// Get the arguments and environment.
EsHandle environmentHandle = startupInformation->data.subsystemData;
char *environmentBuffer = (char *) "./application\0\0LANG=en_US.UTF-8\0PWD=/\0HOME=/\0PATH=/Applications/POSIX/bin\0TMPDIR=/Applications/POSIX/tmp\0\0";
if (environmentHandle) {
EsAssert(startupInformation->data.subsystemID == ES_SUBSYSTEM_ID_POSIX);
environmentBuffer = (char *) EsHeapAllocate(ARG_MAX, false);
EsConstantBufferRead((EsHandle) environmentHandle, environmentBuffer);
EsHandleClose((EsHandle) environmentHandle);
}
// Extract the arguments and environment variables.
uintptr_t position = 0;
char *start = environmentBuffer;
*argc = 0;
for (int i = 0; i < 2; i++) {
while (position < ARG_MAX) {
if (!environmentBuffer[position]) {
_argv.Add(start);
start = environmentBuffer + position + 1;
if (i == 0) {
*argc = *argc + 1;
}
if (!environmentBuffer[position + 1]) {
start = environmentBuffer + position + 2;
_argv.Add(nullptr);
break;
}
}
position++;
}
position += 2;
}
// Copy the working directory string.
for (uintptr_t i = *argc + 1; i < _argv.Length(); i++) {
if (_argv[i] && 0 == EsMemoryCompare("PWD=", _argv[i], 4)) {
size_t length = EsCStringLength((char *) _argv[i]) - 4;
workingDirectory = (char *) EsHeapAllocate(length + 2, false);
workingDirectory[length] = 0, workingDirectory[length + 1] = 0;
EsMemoryCopy(workingDirectory, (char *) _argv[i] + 4, length);
if (workingDirectory[length - 1] != '/') workingDirectory[length] = '/';
}
}
// Add the auxillary vectors.
#ifdef ES_ARCH_X86_64
tlsHeader = (Elf64_Phdr *) EsHeapAllocate(sizeof(Elf64_Phdr), true);
tlsHeader->p_type = PT_TLS;
tlsHeader->p_flags = 4 /* read */;
tlsHeader->p_vaddr = startupInformation->tlsImageStart;
tlsHeader->p_filesz = startupInformation->tlsImageBytes;
tlsHeader->p_memsz = startupInformation->tlsBytes;
tlsHeader->p_align = 8;
_argv.Add((void *) AT_PHNUM);
_argv.Add((void *) 1);
_argv.Add((void *) AT_PHENT);
_argv.Add((void *) sizeof(Elf64_Phdr));
_argv.Add((void *) AT_PHDR);
_argv.Add((void *) tlsHeader);
#else
#error "no architecture TLS support"
#endif
_argv.Add((void *) AT_PAGESZ);
_argv.Add((void *) ES_PAGE_SIZE);
_argv.Add(nullptr);
// Return argv.
*argv = (char **) _argv.array;
}
void POSIXCleanup() {
_argv.Free();
childProcesses.Free();
EsHeapFree(workingDirectory);
EsHeapFree(tlsHeader);
}
#endif