From: Minep Date: Sun, 23 Jul 2023 13:35:18 +0000 (+0100) Subject: refactor: Optimize the signal context overhead X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/commitdiff_plain/04ee277abd77fc51c5ab02ee58790a859607ea24?hp=946c3fdd31300074cc78841795bd47af908ddddb refactor: Optimize the signal context overhead refactor: remove kernel memory overhead for saving x87 context by saving it into user stack --- diff --git a/lunaix-os/.prettierignore b/lunaix-os/.prettierignore new file mode 100644 index 0000000..b2eb281 --- /dev/null +++ b/lunaix-os/.prettierignore @@ -0,0 +1 @@ +*.S.inc \ No newline at end of file diff --git a/lunaix-os/includes/arch/x86/i386_asm.h b/lunaix-os/includes/arch/x86/i386_asm.h index 5fec5b3..b6e8d05 100644 --- a/lunaix-os/includes/arch/x86/i386_asm.h +++ b/lunaix-os/includes/arch/x86/i386_asm.h @@ -7,4 +7,6 @@ #define UDATA_SEG 0x23 #define TSS_SEG 0x28 +#define stack_alignment 0xfffffff0 + #endif /* __LUNAIX_I386_ASM_H */ diff --git a/lunaix-os/includes/arch/x86/interrupt.S.inc b/lunaix-os/includes/arch/x86/interrupt.S.inc new file mode 100644 index 0000000..2d95fac --- /dev/null +++ b/lunaix-os/includes/arch/x86/interrupt.S.inc @@ -0,0 +1,90 @@ + +#define regsize 4 + +/* stack layout: saved interrupt context */ + .struct 0 +idepth: + .struct idepth + regsize +ieax: + .struct ieax + regsize +iebx: + .struct iebx + regsize +iecx: + .struct iecx + regsize +iedx: + .struct iedx + regsize +iedi: + .struct iedi + regsize +iebp: + .struct iebp + regsize +iesi: + .struct iesi + regsize +ids: + .struct ids + regsize +ies: + .struct ies + regsize +ifs: + .struct ifs + regsize +igs: + .struct igs + regsize +iexecp: +iesp: + .struct iesp + regsize +isave_prev: + .struct isave_prev + regsize +ivec: + .struct ivec + regsize +iecode: + .struct iecode + regsize +ieip: + .struct ieip + regsize +ics: + .struct ics + regsize +ieflags: + .struct ieflags + regsize +iuesp: + .struct iuesp + regsize +iuss: + + +/* stack layout: execution (flow-control) state context */ + .struct 0 +exsave_prev: + .struct exsave_prev + regsize +exvec: + .struct exvec + regsize +execode: + .struct execode + regsize +exeip: + .struct exeip + regsize +excs: + .struct excs + regsize +exeflags: + .struct exeflags + regsize +exuesp: + .struct exuesp + regsize +exuss: + +/* struct layout: critical section of struct proc_info */ + .struct 0 +proc_pid: + .struct proc_pid + regsize +proc_parent: + .struct proc_parent + regsize +proc_intr_ctx: + .struct proc_intr_ctx + regsize +proc_ustack_top: + .struct proc_ustack_top + regsize +proc_page_table: + .struct proc_page_table + regsize +proc_fxstate: + +/* struct layout: proc_sig */ + .struct 0 +psig_signum: + .struct psig_signum + regsize +psig_sigact: + .struct psig_sigact + regsize +psig_sighand: + .struct psig_sighand + regsize +psig_saved_ictx: \ No newline at end of file diff --git a/lunaix-os/includes/lunaix/common.h b/lunaix-os/includes/lunaix/common.h index c9ef9e4..ac0b4d4 100644 --- a/lunaix-os/includes/lunaix/common.h +++ b/lunaix-os/includes/lunaix/common.h @@ -13,6 +13,7 @@ #define KSTACK_SIZE MEM_1MB #define KSTACK_START (USER_START - KSTACK_SIZE) #define KSTACK_TOP ((USER_START - 1) & ~0xf) +#define within_kstack(addr) (KSTACK_START <= (addr) && (addr) <= KSTACK_TOP) #define KERNEL_MM_BASE 0xC0000000 diff --git a/lunaix-os/includes/lunaix/process.h b/lunaix-os/includes/lunaix/process.h index bb55f02..7d385a4 100644 --- a/lunaix-os/includes/lunaix/process.h +++ b/lunaix-os/includes/lunaix/process.h @@ -41,18 +41,12 @@ #define PS_GrBP (PS_PAUSED | PS_BLOCKED) #define PS_GrDT (PS_TERMNAT | PS_DESTROY) -#define PROC_TERMINATED(state) ((state)&PS_GrDT) -#define PROC_HANGED(state) ((state)&PS_BLOCKED) -#define PROC_RUNNABLE(state) ((state)&PS_PAUSED) +#define proc_terminated(proc) (((proc)->state) & PS_GrDT) +#define proc_hanged(proc) (((proc)->state) & PS_BLOCKED) +#define proc_runnable(proc) (((proc)->state) & PS_PAUSED) #define PROC_FINPAUSE 1 -struct proc_sigstate -{ - isr_param proc_regs; - char fxstate[512] __attribute__((aligned(16))); -}; - struct sigact { struct sigact* prev; @@ -75,7 +69,7 @@ struct proc_sig int sig_num; void* sigact; void* sighand; - struct proc_sigstate prev_context; + isr_param* saved_ictx; } __attribute__((packed)); struct proc_info @@ -93,7 +87,6 @@ struct proc_info isr_param* intr_ctx; // offset = 8 ptr_t ustack_top; // offset = 84 -> 56 -> 60 -> 12 ptr_t page_table; // offset = 88 -> 60 -> 64 -> 16 - void* fxstate; // offset = 92 -> 64 -> 68 -> 20 /* ---- critical section end ---- */ diff --git a/lunaix-os/kernel/asm/x86/interrupt.S b/lunaix-os/kernel/asm/x86/interrupt.S index 3007d45..80dee1a 100644 --- a/lunaix-os/kernel/asm/x86/interrupt.S +++ b/lunaix-os/kernel/asm/x86/interrupt.S @@ -3,6 +3,8 @@ #include #include #include +#include + #define __ASM_INTR_DIAGNOSIS #ifdef __ASM_INTR_DIAGNOSIS @@ -30,85 +32,6 @@ */ -#define regsize 4 - -/* stack layout: saved interrupt context */ - .struct 0 -idepth: - .struct idepth + regsize -ieax: - .struct ieax + regsize -iebx: - .struct iebx + regsize -iecx: - .struct iecx + regsize -iedx: - .struct iedx + regsize -iedi: - .struct iedi + regsize -iebp: - .struct iebp + regsize -iesi: - .struct iesi + regsize -ids: - .struct ids + regsize -ies: - .struct ies + regsize -ifs: - .struct ifs + regsize -igs: - .struct igs + regsize -iesp: - .struct iesp + regsize -isave_prev: - .struct isave_prev + regsize -ivec: - .struct ivec + regsize -iecode: - .struct iecode + regsize -ieip: - .struct ieip + regsize -ics: - .struct ics + regsize -ieflags: - .struct ieflags + regsize -iuesp: - .struct iuesp + regsize -iuss: - - -/* stack layout: execution (flow-control) state context */ - .struct 0 -exsave_prev: - .struct exsave_prev + regsize -exvec: - .struct exvec + regsize -execode: - .struct execode + regsize -exeip: - .struct exeip + regsize -excs: - .struct excs + regsize -exeflags: - .struct exeflags + regsize -exuesp: - .struct exuesp + regsize -exuss: - -/* struct layout: critical section of struct proc_info */ - .struct 0 -proc_pid: - .struct proc_pid + regsize -proc_parent: - .struct proc_parent + regsize -proc_intr_ctx: - .struct proc_intr_ctx + regsize -proc_ustack_top: - .struct proc_ustack_top + regsize -proc_page_table: - .struct proc_page_table + regsize -proc_fxstate: - .section .text .global interrupt_wrapper interrupt_wrapper: @@ -168,7 +91,8 @@ proc_fxstate: andl $0x3, %eax /* 判断 RPL */ jz 1f - movw $KDATA_SEG, %ax /* 如果从用户模式转来,则切换至内核数据段 */ + /* crossing the user/kernel boundary */ + movw $KDATA_SEG, %ax movw %ax, %gs movw %ax, %fs movw %ax, %ds @@ -176,18 +100,21 @@ proc_fxstate: movl __current, %eax - # FIXME: Save x87 context to user stack, rather than kernel's memory. - # 保存x87FPU的状态 - movl proc_fxstate(%eax), %ebx - fxsave (%ebx) - # 保存用户栈顶指针。因为我们允许同级中断的产生,所以需要该手段跟踪用户栈的地址。 movl iuesp(%esp), %ebx # 取出esp movl %ebx, proc_ustack_top(%eax) # 存入__current->ustack_top + # Save x87 context to user stack, rather than kernel's memory. + # XXX: what will happen if we triggered a page fault during fxsave? + movl iuesp(%esp), %eax + andl $stack_alignment, %eax + subl $512, %eax + fxsave (%eax) + + /* kernel space same-level switch */ 1: movl %esp, %eax - andl $0xfffffff0, %esp + andl $stack_alignment, %esp subl $16, %esp movl %eax, (%esp) @@ -205,12 +132,19 @@ proc_fxstate: movl exeip(%eax), %eax movl %eax, (debug_resv + 4) # eip #endif - movl __current, %eax - movl proc_fxstate(%eax), %eax + // movl __current, %eax + // movl proc_fxstate(%eax), %eax - test %eax, %eax # do we have stored x87 context? + // test %eax, %eax # do we have stored x87 context? + + movl ics(%esp), %eax + andl $3, %eax jz 1f - fxrstor (%eax) + + movl iuesp(%esp), %eax + andl $stack_alignment, %eax + subl $512, %eax + fxrstor (%eax) 1: popl %eax # discard isr_param::depth @@ -276,16 +210,16 @@ proc_fxstate: # 我们已经处在了新的地址空间,为了避免影响其先前的栈布局 # 需要使用一个临时的栈空间 movl $tmp_stack, %esp - - # 更新 tss - movl proc_intr_ctx(%ebx), %eax # proc->intr_ctx - movl iesp(%eax), %eax # intr_ctx->esp - movl %eax, (tss_esp0_off + _tss) call signal_dispatch # kernel/signal.c test %eax, %eax # do we have signal to handle? jz 1f + + # 更新 tss + movl proc_intr_ctx(%ebx), %ecx # __current->intr_ctx + movl %ecx, (tss_esp0_off + _tss) + jmp handle_signal 1: movl proc_intr_ctx(%ebx), %eax @@ -296,16 +230,16 @@ proc_fxstate: # 注意1:任何对proc_sig的布局改动,都须及时的保证这里的一致性! # 注意2:handle_signal在调用之前,须确保proc_sig已经写入用户栈! # arg1 in %eax: addr of proc_sig structure in user stack - leal 12(%eax), %ebx # %ebx = &proc_sig->prev_context + movl psig_saved_ictx(%eax), %ebx # %ebx = &proc_sig->saved_ictx - pushl $UDATA_SEG # proc_sig->prev_context.proc_regs.ss + pushl $UDATA_SEG pushl %eax # esp - movl 48(%ebx), %ebx - pushl 68(%ebx) # proc_sig->prev_context.proc_regs.execp->eflags + movl iexecp(%ebx), %ebx + pushl exeflags(%ebx) # proc_sig->saved_ictx->execp->eflags pushl $UCODE_SEG # cs - pushl 4(%eax) # %eip = proc_sig->sigact + pushl psig_sigact(%eax) # %eip = proc_sig->sigact movw $UDATA_SEG, %cx # switch data seg to user mode movw %cx, %es diff --git a/lunaix-os/kernel/k_init.c b/lunaix-os/kernel/k_init.c index 8c40b6b..2dc39e6 100644 --- a/lunaix-os/kernel/k_init.c +++ b/lunaix-os/kernel/k_init.c @@ -165,11 +165,6 @@ spawn_proc0() proc0->intr_ctx = isrp; - // 加载x87默认配置 - asm volatile("fninit\n" - "fxsave (%%eax)" ::"a"(proc0->fxstate) - : "memory"); - // 向调度器注册进程。 commit_process(proc0); diff --git a/lunaix-os/kernel/process/process.c b/lunaix-os/kernel/process/process.c index 4e8fa0a..63fc7ad 100644 --- a/lunaix-os/kernel/process/process.c +++ b/lunaix-os/kernel/process/process.c @@ -211,8 +211,6 @@ dup_proc() pcb->intr_ctx = __current->intr_ctx; pcb->parent = __current; - memcpy(pcb->fxstate, __current->fxstate, 512); - if (__current->cwd) { pcb->cwd = __current->cwd; vfs_ref_dnode(pcb->cwd); diff --git a/lunaix-os/kernel/process/sched.c b/lunaix-os/kernel/process/sched.c index 1587848..acf04b7 100644 --- a/lunaix-os/kernel/process/sched.c +++ b/lunaix-os/kernel/process/sched.c @@ -142,7 +142,7 @@ check_sleepers() time_t now = clock_systime(); llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers) { - if (PROC_TERMINATED(pos->state)) { + if (proc_terminated(pos)) { goto del; } @@ -343,8 +343,6 @@ alloc_process() proc->created = clock_systime(); proc->pgid = proc->pid; proc->fdtable = vzalloc(sizeof(struct v_fdtable)); - proc->fxstate = - vzalloc_dma(512); // FXSAVE需要十六位对齐地址,使用DMA块(128位对齐) llist_init_head(&proc->mm.regions); llist_init_head(&proc->tasks); @@ -415,7 +413,6 @@ destroy_process(pid_t pid) } vfree(proc->fdtable); - vfree_dma(proc->fxstate); vmm_mount_pd(VMS_MOUNT_1, proc->page_table); @@ -466,5 +463,5 @@ orphaned_proc(pid_t pid) // 如果其父进程的状态是terminated 或 destroy中的一种 // 或者其父进程是在该进程之后创建的,那么该进程为孤儿进程 - return PROC_TERMINATED(parent->state) || parent->created > proc->created; + return proc_terminated(parent) || parent->created > proc->created; } \ No newline at end of file diff --git a/lunaix-os/kernel/process/signal.c b/lunaix-os/kernel/process/signal.c index 15c650d..d00e757 100644 --- a/lunaix-os/kernel/process/signal.c +++ b/lunaix-os/kernel/process/signal.c @@ -4,6 +4,9 @@ #include #include #include +#include + +LOG_MODULE("SIG") #include @@ -11,6 +14,13 @@ extern struct scheduler sched_ctx; /* kernel/sched.c */ #define UNMASKABLE (sigset(SIGKILL) | sigset(SIGTERM)) #define TERMSIG (sigset(SIGSEGV) | sigset(SIGINT) | UNMASKABLE) +#define CORE (sigset(SIGSEGV)) + +static inline void +signal_terminate(int errcode) +{ + terminate_proc(errcode | PEXITSIG); +} // Referenced in kernel/asm/x86/interrupt.S void* @@ -38,7 +48,7 @@ signal_dispatch() if (!action->sa_actor) { if (sigset_test(TERMSIG, sig_selected)) { - terminate_proc(sig_selected | PEXITSIG); + signal_terminate(sig_selected); schedule(); // never return } @@ -55,35 +65,11 @@ signal_dispatch() struct proc_sig* sigframe = (struct proc_sig*)((ustack - sizeof(struct proc_sig)) & ~0xf); - /* - 这是一个相当恶心的坑。 - 问题是出在原本的sigframe->prev_context = __current->intr_ctx的上面 - 这个语句会被gcc在编译时,用更加高效的 rep movsl 来代替。 - - 由于我们采用按需分页,所以在很多情况下,用户栈实际被分配的空间不允许我们进行完整的 - 注入,而需要走page fault handler进行动态分页。 - - 竞态条件就出现在这里! - - 假若我们的__current->intr_ctx注入了一半,然后产生page-fault中断, - 那么这就会导致我们的__current->intr_ctx被这个page-fault中断导致的 - 上下文信息覆盖。那么当page-fault handler成功分配了一个页,返回, - 拷贝也就得以进行。遗憾的是,只不过这次拷贝的内容和前面的拷贝是没有任何的关系 - (因为此时的intr_ctx已经不是之前的intr_ctx了!) - 而这就会导致我们保存在信号上下文中的进程上下文信息不完整,从而在soft_iret时 - 触发#GP。 - - 解决办法就是先吧intr_ctx拷贝到一个静态分配的区域里,然后再注入到用户栈。 - */ - static volatile struct proc_sigstate __temp_save; - __temp_save.proc_regs = *__current->intr_ctx; - memcpy(__temp_save.fxstate, __current->fxstate, 512); - sigframe->sig_num = sig_selected; - sigframe->sigact = action->sa_actor; sigframe->sighand = action->sa_handler; - sigframe->prev_context = __temp_save; + + sigframe->saved_ictx = __current->intr_ctx; action->prev = prev_working; psig->inprogress = action; @@ -135,7 +121,7 @@ send_grp: } send_single: - if (PROC_TERMINATED(proc->state)) { + if (proc_terminated(proc)) { __current->k_status = EINVAL; return -1; } @@ -148,9 +134,7 @@ send_single: __DEFINE_LXSYSCALL1(int, sigreturn, struct proc_sig, *sig_ctx) { - memcpy(__current->fxstate, sig_ctx->prev_context.fxstate, 512); - // FIXME: Interrupt context is exposed to user space! - *__current->intr_ctx = sig_ctx->prev_context.proc_regs; + __current->intr_ctx = sig_ctx->saved_ictx; struct sigact* current = __current->sigctx.inprogress; if (current) { @@ -160,6 +144,24 @@ __DEFINE_LXSYSCALL1(int, sigreturn, struct proc_sig, *sig_ctx) __current->sigctx.inprogress = NULL; } + if (proc_terminated(__current)) { + __current->exit_code |= PEXITSIG; + } else if (sigset_test(CORE, sig_ctx->sig_num)) { + signal_terminate(sig_ctx->sig_num); + } + + ptr_t ictx = (ptr_t)__current->intr_ctx; + + /* + Ensure our restored context is within kernel stack + + This prevent user to forge their own context such that arbitrary code + can be executed as supervisor level + */ + if (!within_kstack(ictx)) { + signal_terminate(SIGSEGV); + } + schedule(); // never reach! diff --git a/lunaix-os/usr/init/init.c b/lunaix-os/usr/init/init.c index 0ed0d0b..b3eaae5 100644 --- a/lunaix-os/usr/init/init.c +++ b/lunaix-os/usr/init/init.c @@ -35,7 +35,7 @@ main(int argc, const char** argv) waitpid(pid, &err, 0); - if (err) { + if (WEXITSTATUS(err)) { printf("shell exit abnormally (%d)", err); } diff --git a/lunaix-os/usr/libc/arch/i386/crt0.S b/lunaix-os/usr/libc/arch/i386/crt0.S index 28aa6ba..eec9ee9 100644 --- a/lunaix-os/usr/libc/arch/i386/crt0.S +++ b/lunaix-os/usr/libc/arch/i386/crt0.S @@ -10,6 +10,7 @@ .global _start _start: xorl %eax, %eax + fninit call main 1: diff --git a/lunaix-os/usr/signal_demo/signal_demo.c b/lunaix-os/usr/signal_demo/signal_demo.c index b20f4ad..352d0de 100644 --- a/lunaix-os/usr/signal_demo/signal_demo.c +++ b/lunaix-os/usr/signal_demo/signal_demo.c @@ -24,7 +24,7 @@ sigalrm_handler(int signum) printf("I, pid %d, have received an alarm!\n", pid); } -void +int main() { signal(SIGCHLD, sigchild_handler); @@ -72,4 +72,6 @@ main() } printf("done\n"); + + return 0; } \ No newline at end of file