-#include <arch/x86/interrupts.h>
-#include <arch/x86/tss.h>
+#include <sys/abi.h>
+#include <sys/mm/mempart.h>
-#include <hal/apic.h>
-#include <hal/cpu.h>
+#include <sys/cpu.h>
#include <lunaix/fs/taskfs.h>
#include <lunaix/mm/cake.h>
-#include <lunaix/mm/kalloc.h>
+#include <lunaix/mm/mmap.h>
#include <lunaix/mm/pmm.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/procvm.h>
#include <lunaix/process.h>
#include <lunaix/sched.h>
#include <lunaix/signal.h>
#include <lunaix/status.h>
#include <lunaix/syscall.h>
#include <lunaix/syslog.h>
+#include <lunaix/hart_state.h>
+#include <lunaix/kpreempt.h>
-volatile struct proc_info* __current;
+#include <lunaix/generic/isrm.h>
+
+#include <klibc/string.h>
-static struct proc_info dummy_proc;
+struct thread empty_thread_obj;
-struct proc_info dummy;
+volatile struct proc_info* __current;
+volatile struct thread* current_thread = &empty_thread_obj;
struct scheduler sched_ctx;
-struct cake_pile* proc_pile;
+struct cake_pile *proc_pile ,*thread_pile;
-LOG_MODULE("SCHED")
+#define root_process (sched_ctx.procs[1])
-void
-sched_init_dummy();
+LOG_MODULE("SCHED")
void
sched_init()
{
proc_pile = cake_new_pile("proc", sizeof(struct proc_info), 1, 0);
+ thread_pile = cake_new_pile("thread", sizeof(struct thread), 1, 0);
cake_set_constructor(proc_pile, cake_ctor_zeroing);
+ cake_set_constructor(thread_pile, cake_ctor_zeroing);
- sched_ctx = (struct scheduler){ ._procs = vzalloc(PROC_TABLE_SIZE),
- .ptable_len = 0,
- .procs_index = 0 };
-
- // TODO initialize dummy_proc
- sched_init_dummy();
+ sched_ctx = (struct scheduler){
+ .procs = vzalloc(PROC_TABLE_SIZE), .ptable_len = 0, .procs_index = 0};
+
+ llist_init_head(&sched_ctx.sleepers);
}
void
-sched_init_dummy()
+run(struct thread* thread)
{
- // This surely need to be simplified or encapsulated!
- // It is a living nightmare!
-
- extern void my_dummy();
- static char dummy_stack[1024] __attribute__((aligned(16)));
-
- // memset to 0
- dummy_proc = (struct proc_info){};
- dummy_proc.intr_ctx =
- (isr_param){ .registers = { .ds = KDATA_SEG,
- .es = KDATA_SEG,
- .fs = KDATA_SEG,
- .gs = KDATA_SEG,
- .esp = (void*)dummy_stack + 1004 },
- .cs = KCODE_SEG,
- .eip = (void*)my_dummy,
- .ss = KDATA_SEG,
- .eflags = cpu_reflags() | 0x0200 };
-
- *(u32_t*)(&dummy_stack[1020]) = dummy_proc.intr_ctx.eflags;
- *(u32_t*)(&dummy_stack[1016]) = KCODE_SEG;
- *(u32_t*)(&dummy_stack[1012]) = dummy_proc.intr_ctx.eip;
-
- dummy_proc.page_table = cpu_rcr3();
- dummy_proc.state = PS_READY;
- dummy_proc.parent = &dummy_proc;
-
- __current = &dummy_proc;
+ thread->state = PS_RUNNING;
+ thread->process->state = PS_RUNNING;
+ thread->process->th_active = thread;
+
+ procvm_mount_self(vmspace(thread->process));
+ set_current_executing(thread);
+
+ switch_context();
+
+ fail("unexpected return from switching");
}
+/*
+ Currently, we do not allow self-destorying thread, doing
+ so will eliminate current kernel stack which is disaster.
+ A compromise solution is to perform a regular scan and
+ clean-up on these thread, in the preemptible kernel thread.
+*/
+
void
-run(struct proc_info* proc)
+cleanup_detached_threads()
{
- proc->state = PS_RUNNING;
-
- /*
- 将tss.esp0设置为上次调度前的esp值。
- 当处理信号时,上下文信息是不会恢复的,而是保存在用户栈中,然后直接跳转进位于用户空间的sig_wrapper进行
- 信号的处理。当用户自定义的信号处理函数返回时,sigreturn的系统调用才开始进行上下文的恢复(或者说是进行
- 另一次调度。
- 由于这中间没有进行地址空间的交换,所以第二次跳转使用的是同一个内核栈,而之前默认tss.esp0的值是永远指向最顶部
- 这样一来就有可能会覆盖更早的上下文信息(比如嵌套的信号捕获函数)
- */
- tss_update_esp(proc->intr_ctx.registers.esp);
-
- apic_done_servicing();
-
- asm volatile("pushl %0\n"
- "jmp switch_to\n" ::"r"(proc)
- : "memory"); // kernel/asm/x86/interrupt.S
+ // XXX may be a lock on sched_context will ben the most appropriate?
+ cpu_disable_interrupt();
+
+ int i = 0;
+ struct thread *pos, *n;
+ llist_for_each(pos, n, sched_ctx.threads, sched_sibs) {
+ if (likely(!proc_terminated(pos) || !thread_detached(pos))) {
+ continue;
+ }
+
+ struct proc_mm* mm = vmspace(pos->process);
+
+ procvm_mount(mm);
+ destory_thread(pos);
+ procvm_unmount(mm);
+
+ i++;
+ }
+
+ if (i) {
+ INFO("cleaned %d terminated detached thread(s)", i);
+ }
+
+ cpu_enable_interrupt();
}
-int
-can_schedule(struct proc_info* proc)
+bool
+can_schedule(struct thread* thread)
{
- if (__SIGTEST(proc->sig_pending, _SIGCONT)) {
- __SIGCLEAR(proc->sig_pending, _SIGSTOP);
- } else if (__SIGTEST(proc->sig_pending, _SIGSTOP)) {
- // 如果进程受到SIGSTOP,则该进程不给予调度。
+ if (!thread) {
return 0;
}
- return 1;
+ if (proc_terminated(thread)) {
+ return false;
+ }
+
+ if (preempt_check_stalled(thread)) {
+ thread_flags_set(thread, TH_STALLED);
+ return true;
+ }
+
+ if (unlikely(kernel_process(thread->process))) {
+ // a kernel process is always runnable
+ return thread->state == PS_READY;
+ }
+
+ struct sigctx* sh = &thread->sigctx;
+
+ if ((thread->state & PS_PAUSED)) {
+ return !!(sh->sig_pending & ~1);
+ }
+
+ if ((thread->state & PS_BLOCKED)) {
+ return sigset_test(sh->sig_pending, _SIGINT);
+ }
+
+ if (sigset_test(sh->sig_pending, _SIGSTOP)) {
+ // If one thread is experiencing SIGSTOP, then we know
+ // all other threads are also SIGSTOP (as per POSIX-2008.1)
+ // In which case, the entire process is stopped.
+ thread->state = PS_STOPPED;
+ return false;
+ }
+
+ if (sigset_test(sh->sig_pending, _SIGCONT)) {
+ thread->state = PS_READY;
+ }
+
+ return (thread->state == PS_READY) \
+ && proc_runnable(thread->process);
}
void
check_sleepers()
{
- struct proc_info* leader = sched_ctx._procs[0];
- struct proc_info *pos, *n;
- time_t now = clock_systime();
- llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers)
+ struct thread *pos, *n;
+ time_t now = clock_systime() / 1000;
+
+ llist_for_each(pos, n, &sched_ctx.sleepers, sleep.sleepers)
{
- if (PROC_TERMINATED(pos->state)) {
+ if (proc_terminated(pos)) {
goto del;
}
if (atime && now >= atime) {
pos->sleep.alarm_time = 0;
- __SIGSET(pos->sig_pending, _SIGALRM);
+ thread_setsignal(pos, _SIGALRM);
}
if (!wtime && !atime) {
void
schedule()
{
- if (!sched_ctx.ptable_len) {
- return;
- }
+ assert(sched_ctx.ptable_len && sched_ctx.ttable_len);
// 上下文切换相当的敏感!我们不希望任何的中断打乱栈的顺序……
- cpu_disable_interrupt();
- struct proc_info* next;
- int prev_ptr = sched_ctx.procs_index;
- int ptr = prev_ptr;
+ no_preemption();
- if (!(__current->state & ~PS_RUNNING)) {
+ if (!(current_thread->state & ~PS_RUNNING)) {
+ current_thread->state = PS_READY;
__current->state = PS_READY;
+
}
+ procvm_unmount_self(vmspace(__current));
check_sleepers();
// round-robin scheduler
-redo:
+
+ struct thread* current = current_thread;
+ struct thread* to_check = current;
+
do {
- ptr = (ptr + 1) % sched_ctx.ptable_len;
- next = sched_ctx._procs[ptr];
- } while (!next || (next->state != PS_READY && ptr != prev_ptr));
+ to_check = list_next(to_check, struct thread, sched_sibs);
- sched_ctx.procs_index = ptr;
+ if (can_schedule(to_check)) {
+ break;
+ }
- if (next->state != PS_READY) {
- // schedule the dummy process if we're out of choice
- next = &dummy_proc;
- goto done;
- }
+ if (to_check == current) {
+ // FIXME do something less leathal here
+ fail("Ran out of threads!")
+ goto done;
+ }
- if (!can_schedule(next)) {
- // 如果该进程不给予调度,则尝试重新选择
- goto redo;
- }
+ } while (1);
+
+ sched_ctx.procs_index = to_check->process->pid;
done:
- run(next);
-}
+ isrm_notify_eos(0);
+ run(to_check);
-void
-sched_yieldk()
-{
- cpu_enable_interrupt();
- cpu_int(LUNAIX_SCHED);
+ fail("unexpected return from scheduler");
}
__DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
return 0;
}
- if (__current->sleep.wakeup_time) {
- return (__current->sleep.wakeup_time - clock_systime()) / 1000U;
+ time_t systime = clock_systime() / 1000;
+ struct haybed* bed = ¤t_thread->sleep;
+
+ if (bed->wakeup_time) {
+ return (bed->wakeup_time - systime);
}
- struct proc_info* root_proc = sched_ctx._procs[0];
- __current->sleep.wakeup_time = clock_systime() + seconds * 1000;
- llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
+ bed->wakeup_time = systime + seconds;
+
+ if (llist_empty(&bed->sleepers)) {
+ llist_append(&sched_ctx.sleepers, &bed->sleepers);
+ }
- __current->intr_ctx.registers.eax = seconds;
+ store_retval(seconds);
- block_current();
+ block_current_thread();
schedule();
+
+ return 0;
}
__DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds)
{
- time_t prev_ddl = __current->sleep.alarm_time;
- time_t now = clock_systime();
+ struct haybed* bed = ¤t_thread->sleep;
+ time_t prev_ddl = bed->alarm_time;
+ time_t now = clock_systime() / 1000;
- __current->sleep.alarm_time = seconds ? now + seconds * 1000 : 0;
+ bed->alarm_time = seconds ? now + seconds : 0;
- struct proc_info* root_proc = sched_ctx._procs[0];
- if (llist_empty(&__current->sleep.sleepers)) {
- llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
+ if (llist_empty(&bed->sleepers)) {
+ llist_append(&sched_ctx.sleepers, &bed->sleepers);
}
- return prev_ddl ? (prev_ddl - now) / 1000 : 0;
+ return prev_ddl ? (prev_ddl - now) : 0;
}
__DEFINE_LXSYSCALL1(void, exit, int, status)
{
- terminate_proc(status);
+ terminate_current(status);
schedule();
}
__DEFINE_LXSYSCALL(int, geterrno)
{
- return __current->k_status;
+ return current_thread->syscall_ret;
}
pid_t
}
wpid = wpid ? wpid : -__current->pgid;
+
repeat:
llist_for_each(proc, n, &__current->children, siblings)
{
return 0;
}
// 放弃当前的运行机会
- sched_yieldk();
+ yield_current();
goto repeat;
done:
- status_flags |= PEXITSIG * (proc->sig_inprogress != 0);
if (status) {
- *status = proc->exit_code | status_flags;
+ *status = PEXITNUM(status_flags, proc->exit_code);
}
return destroy_process(proc->pid);
}
-struct proc_info*
-alloc_process()
-{
+static inline pid_t
+get_free_pid() {
pid_t i = 0;
- for (; i < sched_ctx.ptable_len && sched_ctx._procs[i]; i++)
+
+ for (; i < sched_ctx.ptable_len && sched_ctx.procs[i]; i++)
;
-
- if (i == MAX_PROCESS) {
+
+ if (unlikely(i == MAX_PROCESS)) {
panick("Panic in Ponyville shimmer!");
}
+ return i;
+}
+
+struct thread*
+alloc_thread(struct proc_info* process) {
+ if (process->thread_count >= MAX_THREAD_PP) {
+ return NULL;
+ }
+
+ struct thread* th = cake_grab(thread_pile);
+
+ th->process = process;
+ th->created = clock_systime();
+
+ // FIXME we need a better tid allocation method!
+ th->tid = th->created;
+ th->tid = (th->created ^ ((ptr_t)th)) % MAX_THREAD_PP;
+
+ th->state = PS_CREATED;
+
+ llist_init_head(&th->sleep.sleepers);
+ llist_init_head(&th->sched_sibs);
+ llist_init_head(&th->proc_sibs);
+ waitq_init(&th->waitqueue);
+
+ return th;
+}
+
+struct proc_info*
+alloc_process()
+{
+ pid_t i = get_free_pid();
+
if (i == sched_ctx.ptable_len) {
sched_ctx.ptable_len++;
}
struct proc_info* proc = cake_grab(proc_pile);
+ if (!proc) {
+ return NULL;
+ }
proc->state = PS_CREATED;
proc->pid = i;
proc->created = clock_systime();
proc->pgid = proc->pid;
+
+ proc->sigreg = vzalloc(sizeof(struct sigregistry));
proc->fdtable = vzalloc(sizeof(struct v_fdtable));
- proc->fxstate =
- vzalloc_dma(512); // FXSAVE需要十六位对齐地址,使用DMA块(128位对齐)
- llist_init_head(&proc->mm.regions.head);
+ proc->mm = procvm_create(proc);
+
llist_init_head(&proc->tasks);
llist_init_head(&proc->children);
llist_init_head(&proc->grp_member);
- llist_init_head(&proc->sleep.sleepers);
- waitq_init(&proc->waitqueue);
+ llist_init_head(&proc->threads);
+
+ iopoll_init(&proc->pollctx);
- sched_ctx._procs[i] = proc;
+ sched_ctx.procs[i] = proc;
return proc;
}
void
-commit_process(struct proc_info* process)
-{
- assert(process == sched_ctx._procs[process->pid]);
+commit_thread(struct thread* thread) {
+ struct proc_info* process = thread->process;
- if (process->state != PS_CREATED) {
- __current->k_status = EINVAL;
- return;
+ assert(process && !proc_terminated(process));
+
+ llist_append(&process->threads, &thread->proc_sibs);
+
+ if (sched_ctx.threads) {
+ llist_append(sched_ctx.threads, &thread->sched_sibs);
+ } else {
+ sched_ctx.threads = &thread->sched_sibs;
}
+ sched_ctx.ttable_len++;
+ process->thread_count++;
+ thread->state = PS_READY;
+}
+
+void
+commit_process(struct proc_info* process)
+{
+ assert(process == sched_ctx.procs[process->pid]);
+ assert(process->state == PS_CREATED);
+
// every process is the child of first process (pid=1)
if (!process->parent) {
- process->parent = sched_ctx._procs[1];
+ if (likely(!kernel_process(process))) {
+ process->parent = root_process;
+ } else {
+ process->parent = process;
+ }
+ } else {
+ assert(!proc_terminated(process->parent));
+ }
+
+ if (sched_ctx.proc_list) {
+ llist_append(sched_ctx.proc_list, &process->tasks);
+ } else {
+ sched_ctx.proc_list = &process->tasks;
}
llist_append(&process->parent->children, &process->siblings);
- llist_append(&sched_ctx._procs[0]->tasks, &process->tasks);
process->state = PS_READY;
}
-// from <kernel/process.c>
-extern void
-__del_pagetable(pid_t pid, uintptr_t mount_point);
+void
+destory_thread(struct thread* thread)
+{
+ cake_ensure_valid(thread);
+
+ struct proc_info* proc = thread->process;
+
+ llist_delete(&thread->sched_sibs);
+ llist_delete(&thread->proc_sibs);
+ llist_delete(&thread->sleep.sleepers);
+ waitq_cancel_wait(&thread->waitqueue);
-pid_t
-destroy_process(pid_t pid)
+ thread_release_mem(thread);
+
+ proc->thread_count--;
+ sched_ctx.ttable_len--;
+
+ cake_release(thread_pile, thread);
+}
+
+static void
+orphan_children(struct proc_info* proc)
{
- int index = pid;
- if (index <= 0 || index > sched_ctx.ptable_len) {
- __current->k_status = EINVAL;
- return;
+ struct proc_info *root;
+ struct proc_info *pos, *n;
+
+ root = root_process;
+
+ llist_for_each(pos, n, &proc->children, siblings) {
+ pos->parent = root;
+ llist_append(&root->children, &pos->siblings);
}
- struct proc_info* proc = sched_ctx._procs[index];
- sched_ctx._procs[index] = 0;
+}
+
+void
+delete_process(struct proc_info* proc)
+{
+ pid_t pid = proc->pid;
+ struct proc_mm* mm = vmspace(proc);
+
+ assert(pid); // long live the pid0 !!
+
+ sched_ctx.procs[pid] = NULL;
llist_delete(&proc->siblings);
llist_delete(&proc->grp_member);
llist_delete(&proc->tasks);
- llist_delete(&proc->sleep.sleepers);
+
+ iopoll_free(proc);
taskfs_invalidate(pid);
vfs_unref_dnode(proc->cwd);
}
+ if (proc->cmd) {
+ vfree(proc->cmd);
+ }
+
for (size_t i = 0; i < VFS_MAX_FD; i++) {
struct v_fd* fd = proc->fdtable->fds[i];
if (fd) {
}
vfree(proc->fdtable);
- vfree_dma(proc->fxstate);
- struct mm_region *pos, *n;
- llist_for_each(pos, n, &proc->mm.regions.head, head)
- {
- vfree(pos);
- }
+ signal_free_registry(proc->sigreg);
- vmm_mount_pd(PD_MOUNT_1, proc->page_table);
+ procvm_mount(mm);
+
+ struct thread *pos, *n;
+ llist_for_each(pos, n, &proc->threads, proc_sibs) {
+ // terminate and destory all thread unconditionally
+ destory_thread(pos);
+ }
- __del_pagetable(pid, PD_MOUNT_1);
+ orphan_children(proc);
- vmm_unmount_pd(PD_MOUNT_1);
+ procvm_unmount_release(mm);
cake_release(proc_pile, proc);
+}
+
+pid_t
+destroy_process(pid_t pid)
+{
+ int index = pid;
+ if (index <= 0 || index > sched_ctx.ptable_len) {
+ syscall_result(EINVAL);
+ return -1;
+ }
+
+ struct proc_info* proc = sched_ctx.procs[index];
+ delete_process(proc);
return pid;
}
+static void
+terminate_proc_only(struct proc_info* proc, int exit_code) {
+ assert(proc->pid != 0);
+
+ proc->state = PS_TERMNAT;
+ proc->exit_code = exit_code;
+
+ proc_setsignal(proc->parent, _SIGCHLD);
+}
+
void
-terminate_proc(int exit_code)
-{
- __current->state = PS_TERMNAT;
- __current->exit_code = exit_code;
+terminate_thread(struct thread* thread, ptr_t val) {
+ thread->exit_val = val;
+ thread->state = PS_TERMNAT;
+
+ struct proc_info* proc = thread->process;
+ if (proc->thread_count == 1) {
+ terminate_proc_only(thread->process, 0);
+ }
+}
+
+void
+terminate_current_thread(ptr_t val) {
+ terminate_thread(current_thread, val);
+}
+
+void
+terminate_proccess(struct proc_info* proc, int exit_code) {
+ assert(!kernel_process(proc));
- __SIGSET(__current->parent->sig_pending, _SIGCHLD);
+ if (proc->pid == 1) {
+ panick("Attempt to kill init");
+ }
+
+ terminate_proc_only(proc, exit_code);
+
+ struct thread *pos, *n;
+ llist_for_each(pos, n, &proc->threads, proc_sibs) {
+ pos->state = PS_TERMNAT;
+ }
+}
+
+void
+terminate_current(int exit_code)
+{
+ terminate_proccess(__current, exit_code);
}
struct proc_info*
if (index < 0 || index > sched_ctx.ptable_len) {
return NULL;
}
- return sched_ctx._procs[index];
+ return sched_ctx.procs[index];
}
int
return 0;
if (pid >= sched_ctx.ptable_len)
return 0;
- struct proc_info* proc = sched_ctx._procs[pid];
+ struct proc_info* proc = sched_ctx.procs[pid];
struct proc_info* parent = proc->parent;
// 如果其父进程的状态是terminated 或 destroy中的一种
// 或者其父进程是在该进程之后创建的,那么该进程为孤儿进程
- return PROC_TERMINATED(parent->state) || parent->created > proc->created;
+ return proc_terminated(parent) || parent->created > proc->created;
}
\ No newline at end of file