feat: (devfs) a new filesystem for device exposure.
[lunaix-os.git] / lunaix-os / kernel / sched.c
index ab04e66a05290977a923c0d96d61285394728bb1..f8f00d40a094d62c31976cd4d54ef577673ba6de 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <lunaix/mm/kalloc.h>
 #include <lunaix/mm/pmm.h>
+#include <lunaix/mm/valloc.h>
 #include <lunaix/mm/vmm.h>
 #include <lunaix/process.h>
 #include <lunaix/sched.h>
@@ -21,8 +22,6 @@ volatile struct proc_info* __current;
 
 struct proc_info dummy;
 
-extern void __proc_table;
-
 struct scheduler sched_ctx;
 
 LOG_MODULE("SCHED")
@@ -35,10 +34,10 @@ sched_init()
     for (size_t i = 0; i <= pg_size; i += 4096) {
         uintptr_t pa = pmm_alloc_page(KERNEL_PID, PP_FGPERSIST);
         vmm_set_mapping(
-          PD_REFERENCED, &__proc_table + i, pa, PG_PREM_RW, VMAP_NULL);
+          PD_REFERENCED, PROC_START + i, pa, PG_PREM_RW, VMAP_NULL);
     }
 
-    sched_ctx = (struct scheduler){ ._procs = (struct proc_info*)&__proc_table,
+    sched_ctx = (struct scheduler){ ._procs = (struct proc_info*)PROC_START,
                                     .ptable_len = 0,
                                     .procs_index = 0 };
 }
@@ -46,17 +45,68 @@ sched_init()
 void
 run(struct proc_info* proc)
 {
-    if (!(__current->state & ~PROC_RUNNING)) {
-        __current->state = PROC_STOPPED;
-    }
-    proc->state = PROC_RUNNING;
+    proc->state = PS_RUNNING;
+
+    /*
+        将tss.esp0设置为上次调度前的esp值。
+        当处理信号时,上下文信息是不会恢复的,而是保存在用户栈中,然后直接跳转进位于用户空间的sig_wrapper进行
+          信号的处理。当用户自定义的信号处理函数返回时,sigreturn的系统调用才开始进行上下文的恢复(或者说是进行
+          另一次调度。
+        由于这中间没有进行地址空间的交换,所以第二次跳转使用的是同一个内核栈,而之前默认tss.esp0的值是永远指向最顶部
+        这样一来就有可能会覆盖更早的上下文信息(比如嵌套的信号捕获函数)
+    */
+    tss_update_esp(proc->intr_ctx.registers.esp);
 
-    // FIXME: 这里还是得再考虑一下。
-    // tss_update_esp(__current->intr_ctx.esp);
     apic_done_servicing();
 
     asm volatile("pushl %0\n"
-                 "jmp switch_to\n" ::"r"(proc)); // kernel/asm/x86/interrupt.S
+                 "jmp switch_to\n" ::"r"(proc)
+                 : "memory"); // kernel/asm/x86/interrupt.S
+}
+
+int
+can_schedule(struct proc_info* proc)
+{
+    if (__SIGTEST(proc->sig_pending, _SIGCONT)) {
+        __SIGCLEAR(proc->sig_pending, _SIGSTOP);
+    } else if (__SIGTEST(proc->sig_pending, _SIGSTOP)) {
+        // 如果进程受到SIGSTOP,则该进程不给予调度。
+        return 0;
+    }
+
+    return 1;
+}
+
+void
+check_sleepers()
+{
+    struct proc_info* leader = &sched_ctx._procs[0];
+    struct proc_info *pos, *n;
+    time_t now = clock_systime();
+    llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers)
+    {
+        if (PROC_TERMINATED(pos->state)) {
+            goto del;
+        }
+
+        time_t wtime = pos->sleep.wakeup_time;
+        time_t atime = pos->sleep.alarm_time;
+
+        if (wtime && now >= wtime) {
+            pos->sleep.wakeup_time = 0;
+            pos->state = PS_STOPPED;
+        }
+
+        if (atime && now >= atime) {
+            pos->sleep.alarm_time = 0;
+            __SIGSET(pos->sig_pending, _SIGALRM);
+        }
+
+        if (!wtime && !atime) {
+        del:
+            llist_delete(&pos->sleep.sleepers);
+        }
+    }
 }
 
 void
@@ -71,46 +121,74 @@ schedule()
     struct proc_info* next;
     int prev_ptr = sched_ctx.procs_index;
     int ptr = prev_ptr;
+
+    if (!(__current->state & ~PS_RUNNING)) {
+        __current->state = PS_STOPPED;
+    }
+
+    check_sleepers();
+
     // round-robin scheduler
+redo:
     do {
         ptr = (ptr + 1) % sched_ctx.ptable_len;
         next = &sched_ctx._procs[ptr];
-    } while (next->state != PROC_STOPPED && ptr != prev_ptr);
+    } while (next->state != PS_STOPPED && ptr != prev_ptr);
 
     sched_ctx.procs_index = ptr;
 
+    if (!can_schedule(next)) {
+        // 如果该进程不给予调度,则尝试重新选择
+        goto redo;
+    }
+
     run(next);
 }
 
-static void
-proc_timer_callback(struct proc_info* proc)
+void
+sched_yieldk()
 {
-    proc->timer = NULL;
-    proc->state = PROC_STOPPED;
+    cpu_int(LUNAIX_SCHED);
 }
 
 __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
 {
-    // FIXME: sleep的实现或许需要改一下。专门绑一个计时器好像没有必要……
     if (!seconds) {
         return 0;
     }
 
-    if (__current->timer) {
-        return __current->timer->counter / timer_context()->running_frequency;
+    if (__current->sleep.wakeup_time) {
+        return (__current->sleep.wakeup_time - clock_systime()) / 1000U;
     }
 
-    struct lx_timer* timer =
-      timer_run_second(seconds, proc_timer_callback, __current, 0);
-    __current->timer = timer;
+    __current->sleep.wakeup_time = clock_systime() + seconds * 1000;
+    llist_append(&sched_ctx._procs[0].sleep.sleepers,
+                 &__current->sleep.sleepers);
+
     __current->intr_ctx.registers.eax = seconds;
-    __current->state = PROC_BLOCKED;
+    __current->state = PS_BLOCKED;
     schedule();
 }
 
+__DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds)
+{
+    time_t prev_ddl = __current->sleep.alarm_time;
+    time_t now = clock_systime();
+
+    __current->sleep.alarm_time = seconds ? now + seconds * 1000 : 0;
+
+    if (llist_empty(&__current->sleep.sleepers)) {
+        llist_append(&sched_ctx._procs[0].sleep.sleepers,
+                     &__current->sleep.sleepers);
+    }
+
+    return prev_ddl ? (prev_ddl - now) / 1000 : 0;
+}
+
 __DEFINE_LXSYSCALL1(void, exit, int, status)
 {
     terminate_proc(status);
+    schedule();
 }
 
 __DEFINE_LXSYSCALL(void, yield)
@@ -131,6 +209,11 @@ __DEFINE_LXSYSCALL3(pid_t, waitpid, pid_t, pid, int*, status, int, options)
     return _wait(pid, status, options);
 }
 
+__DEFINE_LXSYSCALL(int, geterrno)
+{
+    return __current->k_status;
+}
+
 pid_t
 _wait(pid_t wpid, int* status, int options)
 {
@@ -142,17 +225,16 @@ _wait(pid_t wpid, int* status, int options)
     }
 
     wpid = wpid ? wpid : -__current->pgid;
-    cpu_enable_interrupt();
 repeat:
     llist_for_each(proc, n, &__current->children, siblings)
     {
         if (!~wpid || proc->pid == wpid || proc->pgid == -wpid) {
-            if (proc->state == PROC_TERMNAT && !options) {
-                status_flags |= PROCTERM;
+            if (proc->state == PS_TERMNAT && !options) {
+                status_flags |= PEXITTERM;
                 goto done;
             }
-            if (proc->state == PROC_STOPPED && (options & WUNTRACED)) {
-                status_flags |= PROCSTOP;
+            if (proc->state == PS_STOPPED && (options & WUNTRACED)) {
+                status_flags |= PEXITSTOP;
                 goto done;
             }
         }
@@ -161,59 +243,68 @@ repeat:
         return 0;
     }
     // 放弃当前的运行机会
-    sched_yield();
+    sched_yieldk();
     goto repeat;
 
 done:
-    cpu_disable_interrupt();
-    *status = (proc->exit_code & 0xffff) | status_flags;
+    status_flags |= PEXITSIG * (proc->sig_inprogress != 0);
+    if (status) {
+        *status = proc->exit_code | status_flags;
+    }
     return destroy_process(proc->pid);
 }
 
-pid_t
-alloc_pid()
+struct proc_info*
+alloc_process()
 {
     pid_t i = 0;
-    for (;
-         i < sched_ctx.ptable_len && sched_ctx._procs[i].state != PROC_DESTROY;
+    for (; i < sched_ctx.ptable_len && sched_ctx._procs[i].state != PS_DESTROY;
          i++)
         ;
 
     if (i == MAX_PROCESS) {
         panick("Panic in Ponyville shimmer!");
     }
-    return i;
-}
-
-void
-push_process(struct proc_info* process)
-{
-    int index = process->pid;
-    if (index < 0 || index > sched_ctx.ptable_len) {
-        __current->k_status = LXINVLDPID;
-        return;
-    }
 
-    if (index == sched_ctx.ptable_len) {
+    if (i == sched_ctx.ptable_len) {
         sched_ctx.ptable_len++;
     }
 
-    sched_ctx._procs[index] = *process;
+    struct proc_info* proc = &sched_ctx._procs[i];
+    memset(proc, 0, sizeof(*proc));
 
-    process = &sched_ctx._procs[index];
+    proc->state = PS_CREATED;
+    proc->pid = i;
+    proc->created = clock_systime();
+    proc->pgid = proc->pid;
+    proc->fdtable = vzalloc(sizeof(struct v_fdtable));
 
-    // make sure the reference is relative to process table
-    llist_init_head(&process->children);
-    llist_init_head(&process->grp_member);
+    llist_init_head(&proc->mm.regions.head);
+    llist_init_head(&proc->children);
+    llist_init_head(&proc->grp_member);
+    llist_init_head(&proc->sleep.sleepers);
+
+    return proc;
+}
+
+void
+commit_process(struct proc_info* process)
+{
+    assert(process == &sched_ctx._procs[process->pid]);
+
+    if (process->state != PS_CREATED) {
+        __current->k_status = EINVAL;
+        return;
+    }
 
     // every process is the child of first process (pid=1)
-    if (process->parent) {
-        llist_append(&process->parent->children, &process->siblings);
-    } else {
-        process->parent = &sched_ctx._procs[0];
+    if (!process->parent) {
+        process->parent = &sched_ctx._procs[1];
     }
 
-    process->state = PROC_STOPPED;
+    llist_append(&process->parent->children, &process->siblings);
+
+    process->state = PS_STOPPED;
 }
 
 // from <kernel/process.c>
@@ -225,26 +316,32 @@ destroy_process(pid_t pid)
 {
     int index = pid;
     if (index <= 0 || index > sched_ctx.ptable_len) {
-        __current->k_status = LXINVLDPID;
+        __current->k_status = EINVAL;
         return;
     }
     struct proc_info* proc = &sched_ctx._procs[index];
-    proc->state = PROC_DESTROY;
+    proc->state = PS_DESTROY;
     llist_delete(&proc->siblings);
 
-    if (proc->mm.regions) {
-        struct mm_region *pos, *n;
-        llist_for_each(pos, n, &proc->mm.regions->head, head)
-        {
-            lxfree(pos);
-        }
+    for (size_t i = 0; i < VFS_MAX_FD; i++) {
+        struct v_fd* fd = proc->fdtable->fds[i];
+        if (fd)
+            vfs_close(fd->file);
+    }
+
+    vfree(proc->fdtable);
+
+    struct mm_region *pos, *n;
+    llist_for_each(pos, n, &proc->mm.regions.head, head)
+    {
+        vfree(pos);
     }
 
-    vmm_mount_pd(PD_MOUNT_2, proc->page_table);
+    vmm_mount_pd(PD_MOUNT_1, proc->page_table);
 
-    __del_pagetable(pid, PD_MOUNT_2);
+    __del_pagetable(pid, PD_MOUNT_1);
 
-    vmm_unmount_pd(PD_MOUNT_2);
+    vmm_unmount_pd(PD_MOUNT_1);
 
     return pid;
 }
@@ -252,10 +349,10 @@ destroy_process(pid_t pid)
 void
 terminate_proc(int exit_code)
 {
-    __current->state = PROC_TERMNAT;
+    __current->state = PS_TERMNAT;
     __current->exit_code = exit_code;
 
-    schedule();
+    __SIGSET(__current->parent->sig_pending, _SIGCHLD);
 }
 
 struct proc_info*
@@ -280,5 +377,5 @@ orphaned_proc(pid_t pid)
 
     // 如果其父进程的状态是terminated 或 destroy中的一种
     // 或者其父进程是在该进程之后创建的,那么该进程为孤儿进程
-    return (parent->state & PROC_TERMMASK) || parent->created > proc->created;
+    return PROC_TERMINATED(parent->state) || parent->created > proc->created;
 }
\ No newline at end of file