From: Minep Date: Sun, 23 Jul 2023 10:39:16 +0000 (+0100) Subject: refactor: Optimize the context switch overhead X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/commitdiff_plain/946c3fdd31300074cc78841795bd47af908ddddb refactor: Optimize the context switch overhead --- diff --git a/lunaix-os/includes/arch/x86/i386_abi.h b/lunaix-os/includes/arch/x86/i386_abi.h index 8804bc9..8bc8cd2 100644 --- a/lunaix-os/includes/arch/x86/i386_abi.h +++ b/lunaix-os/includes/arch/x86/i386_abi.h @@ -1,9 +1,9 @@ #ifndef __LUNAIX_I386ABI_H #define __LUNAIX_I386ABI_H -#define store_retval(retval) __current->intr_ctx.registers.eax = (retval) +#define store_retval(retval) __current->intr_ctx->registers.eax = (retval) -#define store_retval_to(proc, retval) (proc)->intr_ctx.registers.eax = (retval) +#define store_retval_to(proc, retval) (proc)->intr_ctx->registers.eax = (retval) #define j_usr(sp, pc) \ asm volatile("movw %0, %%ax\n" \ diff --git a/lunaix-os/includes/arch/x86/interrupts.h b/lunaix-os/includes/arch/x86/interrupts.h index 6d3c4b1..0d7c815 100644 --- a/lunaix-os/includes/arch/x86/interrupts.h +++ b/lunaix-os/includes/arch/x86/interrupts.h @@ -8,24 +8,25 @@ struct exec_param; +struct regcontext +{ + reg32 eax; + reg32 ebx; + reg32 ecx; + reg32 edx; + reg32 edi; + reg32 ebp; + reg32 esi; + reg32 ds; + reg32 es; + reg32 fs; + reg32 gs; +} __attribute__((packed)); + typedef struct { unsigned int depth; - struct - { - reg32 eax; - reg32 ebx; - reg32 ecx; - reg32 edx; - reg32 edi; - reg32 ebp; - reg32 esi; - reg32 ds; - reg32 es; - reg32 fs; - reg32 gs; - } __attribute__((packed)) registers; - + struct regcontext registers; union { reg32 esp; @@ -35,7 +36,7 @@ typedef struct struct exec_param { - isr_param saved_prev_ctx; + isr_param* saved_prev_ctx; u32_t vector; u32_t err_code; u32_t eip; diff --git a/lunaix-os/includes/arch/x86/tss.h b/lunaix-os/includes/arch/x86/tss.h index c2a2535..931ff26 100644 --- a/lunaix-os/includes/arch/x86/tss.h +++ b/lunaix-os/includes/arch/x86/tss.h @@ -1,7 +1,10 @@ #ifndef __LUNAIX_TSS_H #define __LUNAIX_TSS_H -#include +#define tss_esp0_off 4 + +#ifndef __ASM__ +#include struct x86_tss { u32_t link; @@ -12,5 +15,6 @@ struct x86_tss void tss_update_esp(u32_t esp0); +#endif #endif /* __LUNAIX_TSS_H */ diff --git a/lunaix-os/includes/hal/cpu.h b/lunaix-os/includes/hal/cpu.h index 694d678..3b77c7e 100644 --- a/lunaix-os/includes/hal/cpu.h +++ b/lunaix-os/includes/hal/cpu.h @@ -137,4 +137,10 @@ cpu_rdmsr(u32_t msr_idx, u32_t* reg_high, u32_t* reg_low); void cpu_wrmsr(u32_t msr_idx, u32_t reg_high, u32_t reg_low); +static inline void +cpu_ldvmspace(ptr_t vms) +{ + cpu_lcr3(vms); +} + #endif \ No newline at end of file diff --git a/lunaix-os/includes/lunaix/process.h b/lunaix-os/includes/lunaix/process.h index a8ac5d8..bb55f02 100644 --- a/lunaix-os/includes/lunaix/process.h +++ b/lunaix-os/includes/lunaix/process.h @@ -90,10 +90,10 @@ struct proc_info pid_t pid; // offset = 0 struct proc_info* parent; // offset = 4 - isr_param intr_ctx; // offset = 8 - ptr_t ustack_top; // offset = 84 -> 56 -> 60 - ptr_t page_table; // offset = 88 -> 60 -> 64 - void* fxstate; // offset = 92 -> 64 -> 68 + isr_param* intr_ctx; // offset = 8 + ptr_t ustack_top; // offset = 84 -> 56 -> 60 -> 12 + ptr_t page_table; // offset = 88 -> 60 -> 64 -> 16 + void* fxstate; // offset = 92 -> 64 -> 68 -> 20 /* ---- critical section end ---- */ @@ -170,7 +170,7 @@ pid_t destroy_process(pid_t pid); void -setup_proc_mem(struct proc_info* proc, ptr_t kstack_from); +copy_kernel_stack(struct proc_info* proc, ptr_t kstack_from); /** * @brief 复制当前进程(LunaixOS的类 fork (unix) 实现) diff --git a/lunaix-os/kernel/asm/x86/interrupt.S b/lunaix-os/kernel/asm/x86/interrupt.S index 602f793..3007d45 100644 --- a/lunaix-os/kernel/asm/x86/interrupt.S +++ b/lunaix-os/kernel/asm/x86/interrupt.S @@ -1,6 +1,7 @@ #define __ASM__ #include #include +#include #include #define __ASM_INTR_DIAGNOSIS @@ -29,25 +30,104 @@ */ +#define regsize 4 + +/* stack layout: saved interrupt context */ + .struct 0 +idepth: + .struct idepth + regsize +ieax: + .struct ieax + regsize +iebx: + .struct iebx + regsize +iecx: + .struct iecx + regsize +iedx: + .struct iedx + regsize +iedi: + .struct iedi + regsize +iebp: + .struct iebp + regsize +iesi: + .struct iesi + regsize +ids: + .struct ids + regsize +ies: + .struct ies + regsize +ifs: + .struct ifs + regsize +igs: + .struct igs + regsize +iesp: + .struct iesp + regsize +isave_prev: + .struct isave_prev + regsize +ivec: + .struct ivec + regsize +iecode: + .struct iecode + regsize +ieip: + .struct ieip + regsize +ics: + .struct ics + regsize +ieflags: + .struct ieflags + regsize +iuesp: + .struct iuesp + regsize +iuss: + + +/* stack layout: execution (flow-control) state context */ + .struct 0 +exsave_prev: + .struct exsave_prev + regsize +exvec: + .struct exvec + regsize +execode: + .struct execode + regsize +exeip: + .struct exeip + regsize +excs: + .struct excs + regsize +exeflags: + .struct exeflags + regsize +exuesp: + .struct exuesp + regsize +exuss: + +/* struct layout: critical section of struct proc_info */ + .struct 0 +proc_pid: + .struct proc_pid + regsize +proc_parent: + .struct proc_parent + regsize +proc_intr_ctx: + .struct proc_intr_ctx + regsize +proc_ustack_top: + .struct proc_ustack_top + regsize +proc_page_table: + .struct proc_page_table + regsize +proc_fxstate: + .section .text .global interrupt_wrapper interrupt_wrapper: /* Stack layout (layout of struct isr_param) - msa: [ss] > 76 - [esp] > 72 - eflags > 68 - cs > 64 - eip > 60 - err_code > 56 - vector > offset = 52 + msa: [ss] > 76 -> 28 + [esp] > 72 -> 24 + eflags > 68 -> 20 + cs > 64 -> 16 + eip > 60 -> 12 + err_code > 56 -> 8 + vector > offset = 52 -> 4 [saved_prev_ctx] > offset = 0 --- - esp + esp > 12 * 4 = 48 gs fs es - ds > offset = 7 * 4 = 28 + 4 + ds > offset = 8 * 4 = 32 esi ebp edi @@ -62,7 +142,7 @@ */ cld - subl $52, %esp + subl $4, %esp pushl %esp subl $16, %esp @@ -80,11 +160,11 @@ pushl %eax movl __current, %eax - movl 8(%eax), %eax + movl proc_intr_ctx(%eax), %eax incl %eax pushl %eax # nested intr: current depth - movl 116(%esp), %eax /* 取出 %cs */ + movl ics(%esp), %eax /* 取出 %cs */ andl $0x3, %eax /* 判断 RPL */ jz 1f @@ -98,12 +178,12 @@ # FIXME: Save x87 context to user stack, rather than kernel's memory. # 保存x87FPU的状态 - movl 68(%eax), %ebx + movl proc_fxstate(%eax), %ebx fxsave (%ebx) # 保存用户栈顶指针。因为我们允许同级中断的产生,所以需要该手段跟踪用户栈的地址。 - movl 124(%esp), %ebx # 取出esp - movl %ebx, 60(%eax) # 存入__current->ustack_top + movl iuesp(%esp), %ebx # 取出esp + movl %ebx, proc_ustack_top(%eax) # 存入__current->ustack_top 1: movl %esp, %eax @@ -121,16 +201,17 @@ #ifdef __ASM_INTR_DIAGNOSIS movl %eax, (debug_resv + 8) - movl 48(%esp), %eax - movl 60(%eax), %eax + movl iesp(%esp), %eax + movl exeip(%eax), %eax movl %eax, (debug_resv + 4) # eip #endif movl __current, %eax - movl 68(%eax), %eax + movl proc_fxstate(%eax), %eax test %eax, %eax # do we have stored x87 context? jz 1f - fxrstor (%eax) + fxrstor (%eax) + 1: popl %eax # discard isr_param::depth popl %eax @@ -150,20 +231,10 @@ movl %eax, tmp_store movl __current, %eax + + # nested intr: restore saved context - popl 8(%eax) # depth - popl 12(%eax) # eax - popl 16(%eax) # ebx - popl 20(%eax) # ecx - popl 24(%eax) # edx - popl 28(%eax) # edi - popl 32(%eax) # ebp - popl 36(%eax) # esi - popl 40(%eax) # ds - popl 44(%eax) # es - popl 48(%eax) # fs - popl 52(%eax) # gs - popl 56(%eax) # esp + popl proc_intr_ctx(%eax) addl $8, %esp @@ -180,8 +251,9 @@ shll $3, %eax addl $12, %eax addl %esp, %eax - movl %eax, (_tss + 4) + movl %eax, (_tss + tss_esp0_off) movl tmp_store, %eax + iret .global switch_to @@ -191,8 +263,8 @@ popl %ebx # next movl __current, %eax - movl 64(%eax), %ecx # __current->pagetable - movl 64(%ebx), %eax # next->pagetable + movl proc_page_table(%eax), %ecx # __current->pagetable + movl proc_page_table(%ebx), %eax # next->pagetable cmpl %ecx, %eax # if(next->pagtable != __current->pagetable) { jz 1f @@ -204,13 +276,19 @@ # 我们已经处在了新的地址空间,为了避免影响其先前的栈布局 # 需要使用一个临时的栈空间 movl $tmp_stack, %esp + + # 更新 tss + movl proc_intr_ctx(%ebx), %eax # proc->intr_ctx + movl iesp(%eax), %eax # intr_ctx->esp + movl %eax, (tss_esp0_off + _tss) + call signal_dispatch # kernel/signal.c test %eax, %eax # do we have signal to handle? jz 1f jmp handle_signal 1: - leal 8(%ebx), %eax + movl proc_intr_ctx(%ebx), %eax jmp soft_iret .global handle_signal diff --git a/lunaix-os/kernel/asm/x86/interrupts.c b/lunaix-os/kernel/asm/x86/interrupts.c index 3fb24cb..9f86cf2 100644 --- a/lunaix-os/kernel/asm/x86/interrupts.c +++ b/lunaix-os/kernel/asm/x86/interrupts.c @@ -20,9 +20,9 @@ void intr_handler(isr_param* param) { param->execp->saved_prev_ctx = __current->intr_ctx; - __current->intr_ctx = *param; + __current->intr_ctx = param; - volatile struct exec_param* execp = __current->intr_ctx.execp; + volatile struct exec_param* execp = __current->intr_ctx->execp; if (execp->vector <= 255) { isr_cb subscriber = isrm_get(execp->vector); diff --git a/lunaix-os/kernel/asm/x86/tss.c b/lunaix-os/kernel/asm/x86/tss.c index 45c01ee..603eca6 100644 --- a/lunaix-os/kernel/asm/x86/tss.c +++ b/lunaix-os/kernel/asm/x86/tss.c @@ -1,12 +1,7 @@ #include #include +#include volatile struct x86_tss _tss = { .link = 0, .esp0 = KSTACK_TOP, - .ss0 = KDATA_SEG }; - -void -tss_update_esp(u32_t esp0) -{ - _tss.esp0 = esp0; -} \ No newline at end of file + .ss0 = KDATA_SEG }; \ No newline at end of file diff --git a/lunaix-os/kernel/exe/exec.c b/lunaix-os/kernel/exe/exec.c index 3c4e253..a846126 100644 --- a/lunaix-os/kernel/exe/exec.c +++ b/lunaix-os/kernel/exe/exec.c @@ -220,7 +220,7 @@ __DEFINE_LXSYSCALL3(int, // we will jump to new entry point (_u_start) upon syscall's // return so execve 'will not return' from the perspective of it's invoker - volatile struct exec_param* execp = __current->intr_ctx.execp; + volatile struct exec_param* execp = __current->intr_ctx->execp; execp->esp = container.stack_top; execp->eip = container.exe.entry; diff --git a/lunaix-os/kernel/k_init.c b/lunaix-os/kernel/k_init.c index 71adb30..8c40b6b 100644 --- a/lunaix-os/kernel/k_init.c +++ b/lunaix-os/kernel/k_init.c @@ -126,10 +126,6 @@ spawn_proc0() * 目前的解决方案是2 */ - proc0->intr_ctx = (isr_param){ .registers = { .ds = KDATA_SEG, - .es = KDATA_SEG, - .fs = KDATA_SEG, - .gs = KDATA_SEG } }; proc0->parent = proc0; // 方案1:必须在读取eflags之后禁用。否则当进程被调度时,中断依然是关闭的! @@ -141,7 +137,7 @@ spawn_proc0() proc0->page_table = vmm_dup_vmspace(proc0->pid); // 直接切换到新的拷贝,进行配置。 - cpu_lcr3(proc0->page_table); + cpu_ldvmspace(proc0->page_table); // 为内核创建一个专属栈空间。 for (size_t i = 0; i < (KSTACK_SIZE >> PG_SIZE_BITS); i++) { @@ -155,12 +151,19 @@ spawn_proc0() struct exec_param* execp = (struct exec_param*)(KSTACK_TOP - sizeof(struct exec_param)); + isr_param* isrp = (isr_param*)((ptr_t)execp - sizeof(isr_param)); *execp = (struct exec_param){ .cs = KCODE_SEG, .eip = (ptr_t)__proc0, .ss = KDATA_SEG, .eflags = cpu_reflags() }; - proc0->intr_ctx.execp = execp; + *isrp = (isr_param){ .registers = { .ds = KDATA_SEG, + .es = KDATA_SEG, + .fs = KDATA_SEG, + .gs = KDATA_SEG }, + .execp = execp }; + + proc0->intr_ctx = isrp; // 加载x87默认配置 asm volatile("fninit\n" diff --git a/lunaix-os/kernel/process/process.c b/lunaix-os/kernel/process/process.c index 533c577..4e8fa0a 100644 --- a/lunaix-os/kernel/process/process.c +++ b/lunaix-os/kernel/process/process.c @@ -221,7 +221,13 @@ dup_proc() __copy_fdtable(pcb); region_copy(&__current->mm, &pcb->mm); - setup_proc_mem(pcb, VMS_SELF); + /* + * store the return value for forked process. + * this will be implicit carried over after kernel stack is copied. + */ + store_retval(0); + + copy_kernel_stack(pcb, VMS_SELF); // 根据 mm_region 进一步配置页表 @@ -240,9 +246,6 @@ dup_proc() vmm_unmount_pd(VMS_MOUNT_1); - // 正如同fork,返回两次。 - store_retval_to(pcb, 0); - commit_process(pcb); return pcb->pid; @@ -251,7 +254,7 @@ dup_proc() extern void __kernel_end; void -setup_proc_mem(struct proc_info* proc, ptr_t usedMnt) +copy_kernel_stack(struct proc_info* proc, ptr_t usedMnt) { // copy the entire kernel page table pid_t pid = proc->pid; @@ -279,9 +282,5 @@ setup_proc_mem(struct proc_info* proc, ptr_t usedMnt) *ppte = (p & 0xfff) | ppa; } - // 我们不需要分配内核的区域,因为所有的内核代码和数据段只能通过系统调用来访问,任何非法的访问 - // 都会导致eip落在区域外面,从而segmentation fault. - - // 至于其他的区域我们暂时没有办法知道,因为那需要知道用户程序的信息。我们留到之后在处理。 proc->page_table = pt_copy; } \ No newline at end of file diff --git a/lunaix-os/kernel/process/sched.c b/lunaix-os/kernel/process/sched.c index 5789927..1587848 100644 --- a/lunaix-os/kernel/process/sched.c +++ b/lunaix-os/kernel/process/sched.c @@ -1,5 +1,5 @@ +#include #include -#include #include #include @@ -63,6 +63,8 @@ sched_init_dummy() struct exec_param* execp = (void*)dummy_stack + DUMMY_STACK_SIZE - sizeof(struct exec_param); + isr_param* isrp = (void*)execp - sizeof(isr_param); + *execp = (struct exec_param){ .cs = KCODE_SEG, .eflags = cpu_reflags() | 0x0200, @@ -70,13 +72,15 @@ sched_init_dummy() .ss = KDATA_SEG, }; + *isrp = (isr_param){ .registers = { .ds = KDATA_SEG, + .es = KDATA_SEG, + .fs = KDATA_SEG, + .gs = KDATA_SEG }, + .execp = execp }; + // memset to 0 dummy_proc = (struct proc_info){}; - dummy_proc.intr_ctx = (isr_param){ .registers = { .ds = KDATA_SEG, - .es = KDATA_SEG, - .fs = KDATA_SEG, - .gs = KDATA_SEG }, - .execp = execp }; + dummy_proc.intr_ctx = isrp; dummy_proc.page_table = cpu_rcr3(); dummy_proc.state = PS_READY; @@ -99,7 +103,6 @@ run(struct proc_info* proc) 由于这中间没有进行地址空间的交换,所以第二次跳转使用的是同一个内核栈,而之前默认tss.esp0的值是永远指向最顶部 这样一来就有可能会覆盖更早的上下文信息(比如嵌套的信号捕获函数) */ - tss_update_esp(proc->intr_ctx.esp); apic_done_servicing(); @@ -226,7 +229,7 @@ __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds) llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers); } - __current->intr_ctx.registers.eax = seconds; + store_retval(seconds); block_current(); schedule(); diff --git a/lunaix-os/kernel/process/signal.c b/lunaix-os/kernel/process/signal.c index a8ec239..15c650d 100644 --- a/lunaix-os/kernel/process/signal.c +++ b/lunaix-os/kernel/process/signal.c @@ -76,7 +76,7 @@ signal_dispatch() 解决办法就是先吧intr_ctx拷贝到一个静态分配的区域里,然后再注入到用户栈。 */ static volatile struct proc_sigstate __temp_save; - __temp_save.proc_regs = __current->intr_ctx; + __temp_save.proc_regs = *__current->intr_ctx; memcpy(__temp_save.fxstate, __current->fxstate, 512); sigframe->sig_num = sig_selected; @@ -149,7 +149,8 @@ send_single: __DEFINE_LXSYSCALL1(int, sigreturn, struct proc_sig, *sig_ctx) { memcpy(__current->fxstate, sig_ctx->prev_context.fxstate, 512); - __current->intr_ctx = sig_ctx->prev_context.proc_regs; + // FIXME: Interrupt context is exposed to user space! + *__current->intr_ctx = sig_ctx->prev_context.proc_regs; struct sigact* current = __current->sigctx.inprogress; if (current) {