feat: nearly complete POSIX.1-2008 compliant terminal interface implementation
[lunaix-os.git] / lunaix-os / kernel / process / sched.c
index f58710ee347387eee1629214b8b3c270c25b1a62..bec2faef33af8cc4591949602359bc89d7bd0335 100644 (file)
@@ -1,11 +1,13 @@
-#include <arch/x86/interrupts.h>
-#include <arch/x86/tss.h>
+#include <sys/abi.h>
+#include <sys/interrupts.h>
+#include <sys/mm/mempart.h>
 
-#include <hal/apic.h>
-#include <hal/cpu.h>
+#include <hal/intc.h>
+#include <sys/cpu.h>
 
+#include <lunaix/fs/taskfs.h>
 #include <lunaix/mm/cake.h>
-#include <lunaix/mm/kalloc.h>
+#include <lunaix/mm/mmap.h>
 #include <lunaix/mm/pmm.h>
 #include <lunaix/mm/valloc.h>
 #include <lunaix/mm/vmm.h>
 #include <lunaix/syscall.h>
 #include <lunaix/syslog.h>
 
+#include <klibc/string.h>
+
 volatile struct proc_info* __current;
 
+static struct proc_info dummy_proc;
+
 struct proc_info dummy;
 
 struct scheduler sched_ctx;
@@ -27,23 +33,45 @@ struct cake_pile* proc_pile;
 
 LOG_MODULE("SCHED")
 
+void
+sched_init_dummy();
+
 void
 sched_init()
 {
-    // size_t pg_size = ROUNDUP(sizeof(struct proc_info) * MAX_PROCESS, 0x1000);
-
-    // for (size_t i = 0; i <= pg_size; i += 4096) {
-    //     uintptr_t pa = pmm_alloc_page(KERNEL_PID, PP_FGPERSIST);
-    //     vmm_set_mapping(
-    //       PD_REFERENCED, PROC_START + i, pa, PG_PREM_RW, VMAP_NULL);
-    // }
-
     proc_pile = cake_new_pile("proc", sizeof(struct proc_info), 1, 0);
     cake_set_constructor(proc_pile, cake_ctor_zeroing);
 
-    sched_ctx = (struct scheduler){ ._procs = vzalloc(PROC_TABLE_SIZE),
-                                    .ptable_len = 0,
-                                    .procs_index = 0 };
+    sched_ctx = (struct scheduler){
+        ._procs = vzalloc(PROC_TABLE_SIZE), .ptable_len = 0, .procs_index = 0};
+
+    // TODO initialize dummy_proc
+    sched_init_dummy();
+}
+
+#define DUMMY_STACK_SIZE 2048
+
+void
+sched_init_dummy()
+{
+    // This surely need to be simplified or encapsulated!
+    // It is a living nightmare!
+
+    extern void my_dummy();
+    static char dummy_stack[DUMMY_STACK_SIZE] __attribute__((aligned(16)));
+
+    ptr_t stktop = (ptr_t)dummy_stack + DUMMY_STACK_SIZE;
+
+    dummy_proc = (struct proc_info){};
+
+    proc_init_transfer(&dummy_proc, stktop, (ptr_t)my_dummy, TRANSFER_IE);
+
+    dummy_proc.page_table = cpu_ldvmspace();
+    dummy_proc.state = PS_READY;
+    dummy_proc.parent = &dummy_proc;
+    dummy_proc.pid = KERNEL_PID;
+
+    __current = &dummy_proc;
 }
 
 void
@@ -51,34 +79,34 @@ run(struct proc_info* proc)
 {
     proc->state = PS_RUNNING;
 
-    /*
-        将tss.esp0设置为上次调度前的esp值。
-        当处理信号时,上下文信息是不会恢复的,而是保存在用户栈中,然后直接跳转进位于用户空间的sig_wrapper进行
-          信号的处理。当用户自定义的信号处理函数返回时,sigreturn的系统调用才开始进行上下文的恢复(或者说是进行
-          另一次调度。
-        由于这中间没有进行地址空间的交换,所以第二次跳转使用的是同一个内核栈,而之前默认tss.esp0的值是永远指向最顶部
-        这样一来就有可能会覆盖更早的上下文信息(比如嵌套的信号捕获函数)
-    */
-    tss_update_esp(proc->intr_ctx.registers.esp);
-
-    apic_done_servicing();
-
-    asm volatile("pushl %0\n"
-                 "jmp switch_to\n" ::"r"(proc)
-                 : "memory"); // kernel/asm/x86/interrupt.S
+    intc_notify_eos(0);
+    switch_context(proc);
 }
 
 int
 can_schedule(struct proc_info* proc)
 {
-    if (__SIGTEST(proc->sig_pending, _SIGCONT)) {
-        __SIGCLEAR(proc->sig_pending, _SIGSTOP);
-    } else if (__SIGTEST(proc->sig_pending, _SIGSTOP)) {
+    if (!proc) {
+        return 0;
+    }
+
+    struct sighail* sh = &proc->sigctx;
+
+    if ((proc->state & PS_PAUSED)) {
+        return !!(sh->sig_pending & ~1);
+    }
+    if ((proc->state & PS_BLOCKED)) {
+        return sigset_test(sh->sig_pending, _SIGINT);
+    }
+
+    if (sigset_test(sh->sig_pending, _SIGCONT)) {
+        sigset_clear(sh->sig_pending, _SIGSTOP);
+    } else if (sigset_test(sh->sig_pending, _SIGSTOP)) {
         // 如果进程受到SIGSTOP,则该进程不给予调度。
         return 0;
     }
 
-    return 1;
+    return (proc->state == PS_READY);
 }
 
 void
@@ -86,10 +114,10 @@ check_sleepers()
 {
     struct proc_info* leader = sched_ctx._procs[0];
     struct proc_info *pos, *n;
-    time_t now = clock_systime();
+    time_t now = clock_systime() / 1000;
     llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers)
     {
-        if (PROC_TERMINATED(pos->state)) {
+        if (proc_terminated(pos)) {
             goto del;
         }
 
@@ -103,7 +131,7 @@ check_sleepers()
 
         if (atime && now >= atime) {
             pos->sleep.alarm_time = 0;
-            __SIGSET(pos->sig_pending, _SIGALRM);
+            proc_setsignal(pos, _SIGALRM);
         }
 
         if (!wtime && !atime) {
@@ -125,6 +153,7 @@ schedule()
     struct proc_info* next;
     int prev_ptr = sched_ctx.procs_index;
     int ptr = prev_ptr;
+    int found = 0;
 
     if (!(__current->state & ~PS_RUNNING)) {
         __current->state = PS_READY;
@@ -133,19 +162,21 @@ schedule()
     check_sleepers();
 
     // round-robin scheduler
-redo:
     do {
         ptr = (ptr + 1) % sched_ctx.ptable_len;
         next = sched_ctx._procs[ptr];
-    } while (!next || (next->state != PS_READY && ptr != prev_ptr));
 
-    sched_ctx.procs_index = ptr;
+        if (!(found = can_schedule(next))) {
+            if (ptr == prev_ptr) {
+                next = &dummy_proc;
+                goto done;
+            }
+        }
+    } while (!found);
 
-    if (!can_schedule(next)) {
-        // 如果该进程不给予调度,则尝试重新选择
-        goto redo;
-    }
+    sched_ctx.procs_index = ptr;
 
+done:
     run(next);
 }
 
@@ -153,7 +184,7 @@ void
 sched_yieldk()
 {
     cpu_enable_interrupt();
-    cpu_int(LUNAIX_SCHED);
+    cpu_trap_sched();
 }
 
 __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
@@ -162,32 +193,40 @@ __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
         return 0;
     }
 
+    time_t systime = clock_systime() / 1000;
+
     if (__current->sleep.wakeup_time) {
-        return (__current->sleep.wakeup_time - clock_systime()) / 1000U;
+        return (__current->sleep.wakeup_time - systime);
     }
 
     struct proc_info* root_proc = sched_ctx._procs[0];
-    __current->sleep.wakeup_time = clock_systime() + seconds * 1000;
-    llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
+    __current->sleep.wakeup_time = systime + seconds;
+
+    if (llist_empty(&__current->sleep.sleepers)) {
+        llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
+    }
+
+    store_retval(seconds);
 
-    __current->intr_ctx.registers.eax = seconds;
-    __current->state = PS_BLOCKED;
+    block_current();
     schedule();
+
+    return 0;
 }
 
 __DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds)
 {
     time_t prev_ddl = __current->sleep.alarm_time;
-    time_t now = clock_systime();
+    time_t now = clock_systime() / 1000;
 
-    __current->sleep.alarm_time = seconds ? now + seconds * 1000 : 0;
+    __current->sleep.alarm_time = seconds ? now + seconds : 0;
 
     struct proc_info* root_proc = sched_ctx._procs[0];
     if (llist_empty(&__current->sleep.sleepers)) {
         llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
     }
 
-    return prev_ddl ? (prev_ddl - now) / 1000 : 0;
+    return prev_ddl ? (prev_ddl - now) : 0;
 }
 
 __DEFINE_LXSYSCALL1(void, exit, int, status)
@@ -252,7 +291,6 @@ repeat:
     goto repeat;
 
 done:
-    status_flags |= PEXITSIG * (proc->sig_inprogress != 0);
     if (status) {
         *status = proc->exit_code | status_flags;
     }
@@ -278,15 +316,18 @@ alloc_process()
 
     proc->state = PS_CREATED;
     proc->pid = i;
+    proc->mm.pid = i;
     proc->created = clock_systime();
     proc->pgid = proc->pid;
     proc->fdtable = vzalloc(sizeof(struct v_fdtable));
 
-    llist_init_head(&proc->mm.regions.head);
+    llist_init_head(&proc->mm.regions);
     llist_init_head(&proc->tasks);
     llist_init_head(&proc->children);
     llist_init_head(&proc->grp_member);
     llist_init_head(&proc->sleep.sleepers);
+
+    iopoll_init(&proc->pollctx);
     waitq_init(&proc->waitqueue);
 
     sched_ctx._procs[i] = proc;
@@ -317,7 +358,7 @@ commit_process(struct proc_info* process)
 
 // from <kernel/process.c>
 extern void
-__del_pagetable(pid_t pid, uintptr_t mount_point);
+__del_pagetable(pid_t pid, ptr_t mount_point);
 
 pid_t
 destroy_process(pid_t pid)
@@ -325,32 +366,47 @@ destroy_process(pid_t pid)
     int index = pid;
     if (index <= 0 || index > sched_ctx.ptable_len) {
         __current->k_status = EINVAL;
-        return;
+        return -1;
     }
+
     struct proc_info* proc = sched_ctx._procs[index];
     sched_ctx._procs[index] = 0;
 
     llist_delete(&proc->siblings);
+    llist_delete(&proc->grp_member);
+    llist_delete(&proc->tasks);
+    llist_delete(&proc->sleep.sleepers);
+
+    iopoll_free(pid, &proc->pollctx);
+
+    taskfs_invalidate(pid);
+
+    if (proc->cwd) {
+        vfs_unref_dnode(proc->cwd);
+    }
 
     for (size_t i = 0; i < VFS_MAX_FD; i++) {
         struct v_fd* fd = proc->fdtable->fds[i];
-        if (fd)
-            vfs_close(fd->file);
+        if (fd) {
+            vfs_pclose(fd->file, pid);
+            vfs_free_fd(fd);
+        }
     }
 
     vfree(proc->fdtable);
 
+    vmm_mount_pd(VMS_MOUNT_1, proc->page_table);
+
     struct mm_region *pos, *n;
-    llist_for_each(pos, n, &proc->mm.regions.head, head)
+    llist_for_each(pos, n, &proc->mm.regions, head)
     {
-        vfree(pos);
+        mem_sync_pages(VMS_MOUNT_1, pos, pos->start, pos->end - pos->start, 0);
+        region_release(pos);
     }
 
-    vmm_mount_pd(PD_MOUNT_1, proc->page_table);
-
-    __del_pagetable(pid, PD_MOUNT_1);
+    __del_pagetable(pid, VMS_MOUNT_1);
 
-    vmm_unmount_pd(PD_MOUNT_1);
+    vmm_unmount_pd(VMS_MOUNT_1);
 
     cake_release(proc_pile, proc);
 
@@ -363,7 +419,7 @@ terminate_proc(int exit_code)
     __current->state = PS_TERMNAT;
     __current->exit_code = exit_code;
 
-    __SIGSET(__current->parent->sig_pending, _SIGCHLD);
+    proc_setsignal(__current->parent, _SIGCHLD);
 }
 
 struct proc_info*
@@ -388,5 +444,5 @@ orphaned_proc(pid_t pid)
 
     // 如果其父进程的状态是terminated 或 destroy中的一种
     // 或者其父进程是在该进程之后创建的,那么该进程为孤儿进程
-    return PROC_TERMINATED(parent->state) || parent->created > proc->created;
+    return proc_terminated(parent) || parent->created > proc->created;
 }
\ No newline at end of file