X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/blobdiff_plain/9b8e0c494de6b447b44454112748f702dffec90d..0fd474df7001837bde53da0e42e83081827c9641:/lunaix-os/kernel/process/sched.c diff --git a/lunaix-os/kernel/process/sched.c b/lunaix-os/kernel/process/sched.c index 1c6bd97..112741c 100644 --- a/lunaix-os/kernel/process/sched.c +++ b/lunaix-os/kernel/process/sched.c @@ -1,16 +1,15 @@ -#include -#include +#include +#include -#include -#include +#include #include #include -#include #include #include #include #include +#include #include #include #include @@ -18,117 +17,145 @@ #include #include #include +#include +#include -volatile struct proc_info* __current; +#include -static struct proc_info dummy_proc; +struct thread empty_thread_obj; -struct proc_info dummy; +volatile struct proc_info* __current = NULL; +volatile struct thread* current_thread = &empty_thread_obj; struct scheduler sched_ctx; -struct cake_pile* proc_pile; +struct cake_pile *proc_pile ,*thread_pile; -LOG_MODULE("SCHED") +#define root_process (sched_ctx.procs[1]) -void -sched_init_dummy(); +LOG_MODULE("SCHED") void sched_init() { proc_pile = cake_new_pile("proc", sizeof(struct proc_info), 1, 0); + thread_pile = cake_new_pile("thread", sizeof(struct thread), 1, 0); cake_set_constructor(proc_pile, cake_ctor_zeroing); + cake_set_constructor(thread_pile, cake_ctor_zeroing); - sched_ctx = (struct scheduler){ ._procs = vzalloc(PROC_TABLE_SIZE), - .ptable_len = 0, - .procs_index = 0 }; - - // TODO initialize dummy_proc - sched_init_dummy(); + sched_ctx = (struct scheduler){ + .procs = vzalloc(PROC_TABLE_SIZE), .ptable_len = 0, .procs_index = 0}; + + llist_init_head(&sched_ctx.sleepers); } -#define DUMMY_STACK_SIZE 2048 - void -sched_init_dummy() +run(struct thread* thread) { - // This surely need to be simplified or encapsulated! - // It is a living nightmare! - - extern void my_dummy(); - static char dummy_stack[DUMMY_STACK_SIZE] __attribute__((aligned(16))); - - // memset to 0 - dummy_proc = (struct proc_info){}; - dummy_proc.intr_ctx = (isr_param){ - .registers = { .ds = KDATA_SEG, - .es = KDATA_SEG, - .fs = KDATA_SEG, - .gs = KDATA_SEG, - .esp = (void*)dummy_stack + DUMMY_STACK_SIZE - 20 }, - .cs = KCODE_SEG, - .eip = (void*)my_dummy, - .ss = KDATA_SEG, - .eflags = cpu_reflags() | 0x0200 - }; - - *(u32_t*)(&dummy_stack[DUMMY_STACK_SIZE - 4]) = dummy_proc.intr_ctx.eflags; - *(u32_t*)(&dummy_stack[DUMMY_STACK_SIZE - 8]) = KCODE_SEG; - *(u32_t*)(&dummy_stack[DUMMY_STACK_SIZE - 12]) = dummy_proc.intr_ctx.eip; - - dummy_proc.page_table = cpu_rcr3(); - dummy_proc.state = PS_READY; - dummy_proc.parent = &dummy_proc; - dummy_proc.pid = KERNEL_PID; - - __current = &dummy_proc; + thread->state = PS_RUNNING; + thread->process->state = PS_RUNNING; + thread->process->th_active = thread; + + procvm_mount_self(vmspace(thread->process)); + set_current_executing(thread); + + switch_context(); + + fail("unexpected return from switching"); } +/* + Currently, we do not allow self-destorying thread, doing + so will eliminate current kernel stack which is disaster. + A compromise solution is to perform a regular scan and + clean-up on these thread, in the preemptible kernel thread. +*/ + void -run(struct proc_info* proc) +cleanup_detached_threads() { - proc->state = PS_RUNNING; - - /* - 将tss.esp0设置为上次调度前的esp值。 - 当处理信号时,上下文信息是不会恢复的,而是保存在用户栈中,然后直接跳转进位于用户空间的sig_wrapper进行 - 信号的处理。当用户自定义的信号处理函数返回时,sigreturn的系统调用才开始进行上下文的恢复(或者说是进行 - 另一次调度。 - 由于这中间没有进行地址空间的交换,所以第二次跳转使用的是同一个内核栈,而之前默认tss.esp0的值是永远指向最顶部 - 这样一来就有可能会覆盖更早的上下文信息(比如嵌套的信号捕获函数) - */ - tss_update_esp(proc->intr_ctx.registers.esp); - - apic_done_servicing(); - - asm volatile("pushl %0\n" - "jmp switch_to\n" ::"r"(proc) - : "memory"); // kernel/asm/x86/interrupt.S + // XXX may be a lock on sched_context will ben the most appropriate? + cpu_disable_interrupt(); + + int i = 0; + struct thread *pos, *n; + llist_for_each(pos, n, sched_ctx.threads, sched_sibs) { + if (likely(!proc_terminated(pos) || !thread_detached(pos))) { + continue; + } + + struct proc_mm* mm = vmspace(pos->process); + + procvm_mount(mm); + destory_thread(pos); + procvm_unmount(mm); + + i++; + } + + if (i) { + INFO("cleaned %d terminated detached thread(s)", i); + } + + cpu_enable_interrupt(); } -int -can_schedule(struct proc_info* proc) +bool +can_schedule(struct thread* thread) { - if (__SIGTEST(proc->sig_pending, _SIGCONT)) { - __SIGCLEAR(proc->sig_pending, _SIGSTOP); - } else if (__SIGTEST(proc->sig_pending, _SIGSTOP)) { - // 如果进程受到SIGSTOP,则该进程不给予调度。 + if (!thread) { return 0; } - return 1; + if (proc_terminated(thread)) { + return false; + } + + if (preempt_check_stalled(thread)) { + thread_flags_set(thread, TH_STALLED); + return true; + } + + if (unlikely(kernel_process(thread->process))) { + // a kernel process is always runnable + return thread->state == PS_READY; + } + + struct sigctx* sh = &thread->sigctx; + + if ((thread->state & PS_PAUSED)) { + return !!(sh->sig_pending & ~1); + } + + if ((thread->state & PS_BLOCKED)) { + return sigset_test(sh->sig_pending, _SIGINT); + } + + if (sigset_test(sh->sig_pending, _SIGSTOP)) { + // If one thread is experiencing SIGSTOP, then we know + // all other threads are also SIGSTOP (as per POSIX-2008.1) + // In which case, the entire process is stopped. + thread->state = PS_STOPPED; + return false; + } + + if (sigset_test(sh->sig_pending, _SIGCONT)) { + thread->state = PS_READY; + } + + return (thread->state == PS_READY) \ + && proc_runnable(thread->process); } void check_sleepers() { - struct proc_info* leader = sched_ctx._procs[0]; - struct proc_info *pos, *n; - time_t now = clock_systime(); - llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers) + struct thread *pos, *n; + time_t now = clock_systime() / 1000; + + llist_for_each(pos, n, &sched_ctx.sleepers, sleep.sleepers) { - if (PROC_TERMINATED(pos->state)) { + if (proc_terminated(pos)) { goto del; } @@ -142,7 +169,7 @@ check_sleepers() if (atime && now >= atime) { pos->sleep.alarm_time = 0; - __SIGSET(pos->sig_pending, _SIGALRM); + thread_setsignal(pos, _SIGALRM); } if (!wtime && !atime) { @@ -155,51 +182,46 @@ check_sleepers() void schedule() { - if (!sched_ctx.ptable_len) { - return; - } + assert(sched_ctx.ptable_len && sched_ctx.ttable_len); // 上下文切换相当的敏感!我们不希望任何的中断打乱栈的顺序…… - cpu_disable_interrupt(); - struct proc_info* next; - int prev_ptr = sched_ctx.procs_index; - int ptr = prev_ptr; + no_preemption(); - if (!(__current->state & ~PS_RUNNING)) { + if (!(current_thread->state & ~PS_RUNNING)) { + current_thread->state = PS_READY; __current->state = PS_READY; + } + procvm_unmount_self(vmspace(__current)); check_sleepers(); // round-robin scheduler -redo: + + struct thread* current = current_thread; + struct thread* to_check = current; + do { - ptr = (ptr + 1) % sched_ctx.ptable_len; - next = sched_ctx._procs[ptr]; - } while (!next || (next->state != PS_READY && ptr != prev_ptr)); + to_check = list_next(to_check, struct thread, sched_sibs); - sched_ctx.procs_index = ptr; + if (can_schedule(to_check)) { + break; + } - if (next->state != PS_READY) { - // schedule the dummy process if we're out of choice - next = &dummy_proc; - goto done; - } + if (to_check == current) { + // FIXME do something less leathal here + fail("Ran out of threads!") + goto done; + } - if (!can_schedule(next)) { - // 如果该进程不给予调度,则尝试重新选择 - goto redo; - } + } while (1); + + sched_ctx.procs_index = to_check->process->pid; done: - run(next); -} + run(to_check); -void -sched_yieldk() -{ - cpu_enable_interrupt(); - cpu_int(LUNAIX_SCHED); + fail("unexpected return from scheduler"); } __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds) @@ -208,38 +230,45 @@ __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds) return 0; } - if (__current->sleep.wakeup_time) { - return (__current->sleep.wakeup_time - clock_systime()) / 1000U; + time_t systime = clock_systime() / 1000; + struct haybed* bed = ¤t_thread->sleep; + + if (bed->wakeup_time) { + return (bed->wakeup_time - systime); } - struct proc_info* root_proc = sched_ctx._procs[0]; - __current->sleep.wakeup_time = clock_systime() + seconds * 1000; - llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers); + bed->wakeup_time = systime + seconds; + + if (llist_empty(&bed->sleepers)) { + llist_append(&sched_ctx.sleepers, &bed->sleepers); + } - __current->intr_ctx.registers.eax = seconds; + store_retval(seconds); - block_current(); + block_current_thread(); schedule(); + + return 0; } __DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds) { - time_t prev_ddl = __current->sleep.alarm_time; - time_t now = clock_systime(); + struct haybed* bed = ¤t_thread->sleep; + time_t prev_ddl = bed->alarm_time; + time_t now = clock_systime() / 1000; - __current->sleep.alarm_time = seconds ? now + seconds * 1000 : 0; + bed->alarm_time = seconds ? now + seconds : 0; - struct proc_info* root_proc = sched_ctx._procs[0]; - if (llist_empty(&__current->sleep.sleepers)) { - llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers); + if (llist_empty(&bed->sleepers)) { + llist_append(&sched_ctx.sleepers, &bed->sleepers); } - return prev_ddl ? (prev_ddl - now) / 1000 : 0; + return prev_ddl ? (prev_ddl - now) : 0; } __DEFINE_LXSYSCALL1(void, exit, int, status) { - terminate_proc(status); + terminate_current(status); schedule(); } @@ -263,7 +292,7 @@ __DEFINE_LXSYSCALL3(pid_t, waitpid, pid_t, pid, int*, status, int, options) __DEFINE_LXSYSCALL(int, geterrno) { - return __current->k_status; + return current_thread->syscall_ret; } pid_t @@ -277,6 +306,7 @@ _wait(pid_t wpid, int* status, int options) } wpid = wpid ? wpid : -__current->pgid; + repeat: llist_for_each(proc, n, &__current->children, siblings) { @@ -295,94 +325,189 @@ repeat: return 0; } // 放弃当前的运行机会 - sched_yieldk(); + yield_current(); goto repeat; done: - status_flags |= PEXITSIG * (proc->sig_inprogress != 0); if (status) { - *status = proc->exit_code | status_flags; + *status = PEXITNUM(status_flags, proc->exit_code); } return destroy_process(proc->pid); } -struct proc_info* -alloc_process() -{ +static inline pid_t +get_free_pid() { pid_t i = 0; - for (; i < sched_ctx.ptable_len && sched_ctx._procs[i]; i++) + + for (; i < sched_ctx.ptable_len && sched_ctx.procs[i]; i++) ; + + if (unlikely(i == MAX_PROCESS)) { + fail("Panic in Ponyville shimmer!"); + } - if (i == MAX_PROCESS) { - panick("Panic in Ponyville shimmer!"); + return i; +} + +struct thread* +alloc_thread(struct proc_info* process) { + if (process->thread_count >= MAX_THREAD_PP) { + return NULL; } + + struct thread* th = cake_grab(thread_pile); + + th->process = process; + th->created = clock_systime(); + + // FIXME we need a better tid allocation method! + th->tid = th->created; + th->tid = (th->created ^ ((ptr_t)th)) % MAX_THREAD_PP; + + th->state = PS_CREATED; + + llist_init_head(&th->sleep.sleepers); + llist_init_head(&th->sched_sibs); + llist_init_head(&th->proc_sibs); + waitq_init(&th->waitqueue); + + return th; +} + +struct proc_info* +alloc_process() +{ + pid_t i = get_free_pid(); if (i == sched_ctx.ptable_len) { sched_ctx.ptable_len++; } struct proc_info* proc = cake_grab(proc_pile); + if (!proc) { + return NULL; + } proc->state = PS_CREATED; proc->pid = i; proc->created = clock_systime(); proc->pgid = proc->pid; + + proc->root = vfs_sysroot; + + proc->sigreg = vzalloc(sizeof(struct sigregistry)); proc->fdtable = vzalloc(sizeof(struct v_fdtable)); - proc->fxstate = - vzalloc_dma(512); // FXSAVE需要十六位对齐地址,使用DMA块(128位对齐) - llist_init_head(&proc->mm.regions); + proc->mm = procvm_create(proc); + llist_init_head(&proc->tasks); llist_init_head(&proc->children); llist_init_head(&proc->grp_member); - llist_init_head(&proc->sleep.sleepers); - waitq_init(&proc->waitqueue); + llist_init_head(&proc->threads); + + iopoll_init(&proc->pollctx); - sched_ctx._procs[i] = proc; + sched_ctx.procs[i] = proc; return proc; } void -commit_process(struct proc_info* process) -{ - assert(process == sched_ctx._procs[process->pid]); +commit_thread(struct thread* thread) { + struct proc_info* process = thread->process; - if (process->state != PS_CREATED) { - __current->k_status = EINVAL; - return; + assert(process && !proc_terminated(process)); + + llist_append(&process->threads, &thread->proc_sibs); + + if (sched_ctx.threads) { + llist_append(sched_ctx.threads, &thread->sched_sibs); + } else { + sched_ctx.threads = &thread->sched_sibs; } + sched_ctx.ttable_len++; + process->thread_count++; + thread->state = PS_READY; +} + +void +commit_process(struct proc_info* process) +{ + assert(process == sched_ctx.procs[process->pid]); + assert(process->state == PS_CREATED); + // every process is the child of first process (pid=1) if (!process->parent) { - process->parent = sched_ctx._procs[1]; + if (likely(!kernel_process(process))) { + process->parent = root_process; + } else { + process->parent = process; + } + } else { + assert(!proc_terminated(process->parent)); + } + + if (sched_ctx.proc_list) { + llist_append(sched_ctx.proc_list, &process->tasks); + } else { + sched_ctx.proc_list = &process->tasks; } llist_append(&process->parent->children, &process->siblings); - llist_append(&sched_ctx._procs[0]->tasks, &process->tasks); process->state = PS_READY; } -// from -extern void -__del_pagetable(pid_t pid, uintptr_t mount_point); +void +destory_thread(struct thread* thread) +{ + cake_ensure_valid(thread); + + struct proc_info* proc = thread->process; -pid_t -destroy_process(pid_t pid) + llist_delete(&thread->sched_sibs); + llist_delete(&thread->proc_sibs); + llist_delete(&thread->sleep.sleepers); + waitq_cancel_wait(&thread->waitqueue); + + thread_release_mem(thread); + + proc->thread_count--; + sched_ctx.ttable_len--; + + cake_release(thread_pile, thread); +} + +static void +orphan_children(struct proc_info* proc) { - int index = pid; - if (index <= 0 || index > sched_ctx.ptable_len) { - __current->k_status = EINVAL; - return; + struct proc_info *root; + struct proc_info *pos, *n; + + root = root_process; + + llist_for_each(pos, n, &proc->children, siblings) { + pos->parent = root; + llist_append(&root->children, &pos->siblings); } - struct proc_info* proc = sched_ctx._procs[index]; - sched_ctx._procs[index] = 0; +} + +void +delete_process(struct proc_info* proc) +{ + pid_t pid = proc->pid; + struct proc_mm* mm = vmspace(proc); + + assert(pid); // long live the pid0 !! + + sched_ctx.procs[pid] = NULL; llist_delete(&proc->siblings); llist_delete(&proc->grp_member); llist_delete(&proc->tasks); - llist_delete(&proc->sleep.sleepers); + + iopoll_free(proc); taskfs_invalidate(pid); @@ -390,6 +515,10 @@ destroy_process(pid_t pid) vfs_unref_dnode(proc->cwd); } + if (proc->cmd) { + vfree(proc->cmd); + } + for (size_t i = 0; i < VFS_MAX_FD; i++) { struct v_fd* fd = proc->fdtable->fds[i]; if (fd) { @@ -399,33 +528,85 @@ destroy_process(pid_t pid) } vfree(proc->fdtable); - vfree_dma(proc->fxstate); - vmm_mount_pd(VMS_MOUNT_1, proc->page_table); + signal_free_registry(proc->sigreg); - struct mm_region *pos, *n; - llist_for_each(pos, n, &proc->mm.regions, head) - { - mem_sync_pages(VMS_MOUNT_1, pos, pos->start, pos->end - pos->start, 0); - vfree(pos); + procvm_mount(mm); + + struct thread *pos, *n; + llist_for_each(pos, n, &proc->threads, proc_sibs) { + // terminate and destory all thread unconditionally + destory_thread(pos); } - __del_pagetable(pid, VMS_MOUNT_1); + orphan_children(proc); - vmm_unmount_pd(VMS_MOUNT_1); + procvm_unmount_release(mm); cake_release(proc_pile, proc); +} + +pid_t +destroy_process(pid_t pid) +{ + int index = pid; + if (index <= 0 || index > sched_ctx.ptable_len) { + syscall_result(EINVAL); + return -1; + } + + struct proc_info* proc = sched_ctx.procs[index]; + delete_process(proc); return pid; } +static void +terminate_proc_only(struct proc_info* proc, int exit_code) { + assert(proc->pid != 0); + + proc->state = PS_TERMNAT; + proc->exit_code = exit_code; + + proc_setsignal(proc->parent, _SIGCHLD); +} + void -terminate_proc(int exit_code) -{ - __current->state = PS_TERMNAT; - __current->exit_code = exit_code; +terminate_thread(struct thread* thread, ptr_t val) { + thread->exit_val = val; + thread->state = PS_TERMNAT; + + struct proc_info* proc = thread->process; + if (proc->thread_count == 1) { + terminate_proc_only(thread->process, 0); + } +} + +void +terminate_current_thread(ptr_t val) { + terminate_thread(current_thread, val); +} + +void +terminate_proccess(struct proc_info* proc, int exit_code) { + assert(!kernel_process(proc)); - __SIGSET(__current->parent->sig_pending, _SIGCHLD); + if (proc->pid == 1) { + fail("Attempt to kill init"); + } + + terminate_proc_only(proc, exit_code); + + struct thread *pos, *n; + llist_for_each(pos, n, &proc->threads, proc_sibs) { + pos->state = PS_TERMNAT; + } +} + +void +terminate_current(int exit_code) +{ + terminate_proccess(__current, exit_code); } struct proc_info* @@ -435,7 +616,7 @@ get_process(pid_t pid) if (index < 0 || index > sched_ctx.ptable_len) { return NULL; } - return sched_ctx._procs[index]; + return sched_ctx.procs[index]; } int @@ -445,10 +626,10 @@ orphaned_proc(pid_t pid) return 0; if (pid >= sched_ctx.ptable_len) return 0; - struct proc_info* proc = sched_ctx._procs[pid]; + struct proc_info* proc = sched_ctx.procs[pid]; struct proc_info* parent = proc->parent; // 如果其父进程的状态是terminated 或 destroy中的一种 // 或者其父进程是在该进程之后创建的,那么该进程为孤儿进程 - return PROC_TERMINATED(parent->state) || parent->created > proc->created; + return proc_terminated(parent) || parent->created > proc->created; } \ No newline at end of file