From 2e21eb2f39dd80aa166216381d5d402be943686e Mon Sep 17 00:00:00 2001 From: Minep Date: Tue, 31 May 2022 00:23:05 +0100 Subject: [PATCH 1/1] Kernel address space isolation and make the kernel heap global to all processes. And featuring initd! --- lunaix-os/includes/lunaix/mm/page.h | 4 +- lunaix-os/includes/lunaix/mm/vmm.h | 18 +++++++- lunaix-os/includes/lunaix/process.h | 3 +- lunaix-os/kernel/asm/x86/interrupts.c | 26 ++++++++---- lunaix-os/kernel/asm/x86/pfault.c | 4 ++ lunaix-os/kernel/asm/x86/prologue.S | 4 +- lunaix-os/kernel/asm/x86/syscall.S | 18 ++++++-- lunaix-os/kernel/k_init.c | 59 ++++++++++++++++++--------- lunaix-os/kernel/lxinit.c | 31 ++------------ lunaix-os/kernel/mm/cow.c | 13 +++--- lunaix-os/kernel/mm/kalloc.c | 28 ++++++++----- lunaix-os/kernel/mm/vmm.c | 13 ++++++ lunaix-os/kernel/process.c | 37 +++++++++++++---- lunaix-os/kernel/sched.c | 6 +-- lunaix-os/link/linker.ld | 2 +- 15 files changed, 175 insertions(+), 91 deletions(-) diff --git a/lunaix-os/includes/lunaix/mm/page.h b/lunaix-os/includes/lunaix/mm/page.h index a6b3162..4b6a1a1 100644 --- a/lunaix-os/includes/lunaix/mm/page.h +++ b/lunaix-os/includes/lunaix/mm/page.h @@ -95,11 +95,13 @@ typedef struct extern void __pg_mount_point; -/* 三个页挂载点: 用于临时创建&编辑页表 */ +/* 三个页挂载点,一个页目录挂载点: 用于临时创建&编辑页表 */ +#define PD_MOUNT 0xAFC00000 #define PG_MOUNT_1 (&__pg_mount_point) #define PG_MOUNT_2 (&__pg_mount_point + 0x1000) #define PG_MOUNT_3 (&__pg_mount_point + 0x2000) +#define PG_MOUNT_4 (&__pg_mount_point + 0x3000) #endif /* __LUNAIX_PAGE_H */ diff --git a/lunaix-os/includes/lunaix/mm/vmm.h b/lunaix-os/includes/lunaix/mm/vmm.h index 682f8d0..2e9f72f 100644 --- a/lunaix-os/includes/lunaix/mm/vmm.h +++ b/lunaix-os/includes/lunaix/mm/vmm.h @@ -119,6 +119,22 @@ vmm_lookup(void* va); * @return void* 包含虚拟页副本的物理页地址。 * */ -void* vmm_dup_page(void* va); +void* vmm_dup_page(pid_t pid, void* pa); + +/** + * @brief 挂载另一个虚拟地址空间至当前虚拟地址空间 + * + * @param pde 页目录的物理地址 + * @return void* + */ +void* +vmm_mount_pd(void* pde); + +/** + * @brief 卸载已挂载的虚拟地址空间 + * + */ +void* +vmm_unmount_pd(); #endif /* __LUNAIX_VMM_H */ diff --git a/lunaix-os/includes/lunaix/process.h b/lunaix-os/includes/lunaix/process.h index 25b1d60..efa1166 100644 --- a/lunaix-os/includes/lunaix/process.h +++ b/lunaix-os/includes/lunaix/process.h @@ -18,7 +18,6 @@ struct proc_mm { - heap_context_t k_heap; heap_context_t u_heap; struct mm_region* region; }; @@ -50,6 +49,8 @@ void push_process(struct proc_info* process); void destroy_process(pid_t pid); +void* dup_pagetable(pid_t pid); + /** * @brief 复制当前进程(LunaixOS的类 fork (unix) 实现) * diff --git a/lunaix-os/kernel/asm/x86/interrupts.c b/lunaix-os/kernel/asm/x86/interrupts.c index f6b5538..8330bc2 100644 --- a/lunaix-os/kernel/asm/x86/interrupts.c +++ b/lunaix-os/kernel/asm/x86/interrupts.c @@ -5,6 +5,7 @@ #include #include #include +#include LOG_MODULE("intr") @@ -29,6 +30,7 @@ intr_set_fallback_handler(int_subscriber subscribers) { fallback = subscribers; } +extern x86_page_table* __kernel_ptd; void intr_handler(isr_param* param) @@ -37,25 +39,29 @@ intr_handler(isr_param* param) // kprintf(KDEBUG "%p", param->registers.esp); // } __current->intr_ctx = *param; + + cpu_lcr3(__kernel_ptd); + + isr_param *lparam = &__current->intr_ctx; - if (param->vector <= 255) { - int_subscriber subscriber = subscribers[param->vector]; + if (lparam->vector <= 255) { + int_subscriber subscriber = subscribers[lparam->vector]; if (subscriber) { - subscriber(param); + subscriber(lparam); goto done; } } if (fallback) { - fallback(param); + fallback(lparam); goto done; } kprint_panic("INT %u: (%x) [%p: %p] Unknown", - param->vector, - param->err_code, - param->cs, - param->eip); + lparam->vector, + lparam->err_code, + lparam->cs, + lparam->eip); done: @@ -65,10 +71,12 @@ done: // for all external interrupts except the spurious interrupt // this is required by Intel Manual Vol.3A, section 10.8.1 & 10.8.5 - if (param->vector >= EX_INTERRUPT_BEGIN && param->vector != APIC_SPIV_IV) { + if (lparam->vector >= EX_INTERRUPT_BEGIN && lparam->vector != APIC_SPIV_IV) { apic_done_servicing(); } + cpu_lcr3(__current->page_table); + *param = __current->intr_ctx; return; diff --git a/lunaix-os/kernel/asm/x86/pfault.c b/lunaix-os/kernel/asm/x86/pfault.c index c5ec07f..63a40a3 100644 --- a/lunaix-os/kernel/asm/x86/pfault.c +++ b/lunaix-os/kernel/asm/x86/pfault.c @@ -2,6 +2,9 @@ #include #include #include +#include + +static void kprintf(const char* fmt, ...) { va_list args; va_start(args, fmt); __kprintf("PFAULT", fmt, args); va_end(args); } extern void __print_panic_msg(const char* msg, const isr_param* param); @@ -14,6 +17,7 @@ intr_routine_page_fault (const isr_param* param) goto done; } + kprintf("%p", pg_fault_ptr); __print_panic_msg("Page fault", param); done: diff --git a/lunaix-os/kernel/asm/x86/prologue.S b/lunaix-os/kernel/asm/x86/prologue.S index ddd2652..ef559bb 100644 --- a/lunaix-os/kernel/asm/x86/prologue.S +++ b/lunaix-os/kernel/asm/x86/prologue.S @@ -61,7 +61,9 @@ call _kernel_init - /* _kernel_init 永不返回 */ + movl $KSTACK_TOP, %esp + + call _kernel_post_init 1: hlt diff --git a/lunaix-os/kernel/asm/x86/syscall.S b/lunaix-os/kernel/asm/x86/syscall.S index 2ea22d4..25e153f 100644 --- a/lunaix-os/kernel/asm/x86/syscall.S +++ b/lunaix-os/kernel/asm/x86/syscall.S @@ -6,12 +6,17 @@ 注意,这里的顺序非常重要。每个系统调用在这个地址表里的索引等于其调用号。 */ syscall_table: + 1: .dc.l 0 .dc.l dup_proc .dc.l schedule .dc.l terminate_process .dc.l _syscall_sbrk .dc.l _syscall_brk + 2: + .rept __SYSCALL_MAX - (2b - 1b)/4 + .dc.l 0 + .endr .global syscall_hndlr @@ -24,7 +29,13 @@ movl (%ebp), %eax cmpl $__SYSCALL_MAX, %eax - jb 1f + jae 2f + + shll $2, %eax + addl $syscall_table, %eax + cmpl $0, (%eax) + jne 1f + 2: neg %eax popl %ebp ret @@ -35,14 +46,13 @@ pushl 12(%ebp) /* edx - #3 arg */ pushl 8(%ebp) /* ecx - #2 arg */ pushl 4(%ebp) /* ebx - #1 arg */ - shll $2, %eax - addl $syscall_table, %eax - + call (%eax) addl $24, %esp popl %ebp + ret diff --git a/lunaix-os/kernel/k_init.c b/lunaix-os/kernel/k_init.c index bbd505d..c119676 100644 --- a/lunaix-os/kernel/k_init.c +++ b/lunaix-os/kernel/k_init.c @@ -39,6 +39,10 @@ extern uint8_t __init_hhk_end; // Set remotely by kernel/asm/x86/prologue.S multiboot_info_t* _k_init_mb_info; +x86_page_table* __kernel_ptd; + +struct proc_info tmp; + LOG_MODULE("BOOT"); extern void _lxinit_main(); @@ -62,6 +66,14 @@ _kernel_pre_init() { tty_init((void*)VGA_BUFFER_PADDR); tty_set_theme(VGA_COLOR_WHITE, VGA_COLOR_BLACK); + + __kernel_ptd = cpu_rcr3(); + + tmp = (struct proc_info) { + .page_table = __kernel_ptd + }; + + __current = &tmp; } void @@ -81,8 +93,6 @@ _kernel_init() { kprintf(KINFO "[MM] Allocated %d pages for stack start at %p\n", KSTACK_SIZE>>PG_SIZE_BITS, KSTACK_START); sched_init(); - - spawn_lxinit(); } /** @@ -91,32 +101,43 @@ _kernel_init() { */ void spawn_lxinit() { struct proc_info kinit; - uint32_t* kstack = (uint32_t*)KSTACK_TOP - 4 * 5; memset(&kinit, 0, sizeof(kinit)); - kinit.page_table = (void*) cpu_rcr3(); kinit.parent = -1; kinit.pid = 1; kinit.intr_ctx = (isr_param) { - .registers.esp = kstack, + .registers.esp = KSTACK_TOP - 20, .cs = KCODE_SEG, - .eip = (void*)_kernel_post_init, + .eip = (void*)_lxinit_main, .ss = KDATA_SEG, .eflags = cpu_reflags() }; - - /* - 因为schedule从设计上是需要在中断环境中执行的 - 可是我们需要在这里手动调用 schedule,从而使我们的init能够被执行。 - 所以需要模拟中断产生时的栈里内容。 - */ - kstack[2] = kinit.intr_ctx.eip; - kstack[3] = kinit.intr_ctx.cs; - kstack[4] = kinit.intr_ctx.eflags; - + kinit.page_table = dup_pagetable(kinit.pid); + + // Ok... 准备fork进我们的init进程 + /* + 这里是一些栈的设置,因为我们将切换到一个新的地址空间里,并且使用一个全新的栈。 + 让iret满意! + */ + asm volatile( + "movl %%cr3, %%eax\n" + "movl %%esp, %%ebx\n" + "movl %0, %%cr3\n" + "movl %1, %%esp\n" + "pushf\n" + "pushl %2\n" + "pushl %3\n" + "pushl $0\n" + "pushl $0\n" + "movl %%eax, %%cr3\n" + "movl %%ebx, %%esp\n" + ::"r"(kinit.page_table), "i"(KSTACK_TOP), "i"(KCODE_SEG), "r"(kinit.intr_ctx.eip) + :"%eax", "%ebx", "memory" + ); + + // 向调度器注册进程,然后这里阻塞等待调度器调度就好了。 push_process(&kinit); - - schedule(); + } void @@ -155,7 +176,7 @@ _kernel_post_init() { vmm_unmap_page(KERNEL_PID, (void*)(i << PG_SIZE_BITS)); } - _lxinit_main(); + spawn_lxinit(); spin(); } diff --git a/lunaix-os/kernel/lxinit.c b/lunaix-os/kernel/lxinit.c index 7decff4..ec3425c 100644 --- a/lunaix-os/kernel/lxinit.c +++ b/lunaix-os/kernel/lxinit.c @@ -35,10 +35,6 @@ _lxinit_main() kprintf(KINFO "Forked %d\n", pid); } - // FIXME: 这里fork会造成下面lxmalloc产生Heap corruption,需要实现COW和加入mutex - // fork(); - - char buf[64]; kprintf(KINFO "Hello higher half kernel world!\nWe are now running in virtual " @@ -50,35 +46,14 @@ _lxinit_main() void* k_start = vmm_v2p(&__kernel_start); kprintf(KINFO "The kernel's base address mapping: %p->%p\n", &__kernel_start, k_start); - // test malloc & free - - uint8_t** arr = (uint8_t**)lxmalloc(10 * sizeof(uint8_t*)); - - for (size_t i = 0; i < 10; i++) { - arr[i] = (uint8_t*)lxmalloc((i + 1) * 2); - } - - for (size_t i = 0; i < 10; i++) { - lxfree(arr[i]); - } - - uint8_t* big_ = lxmalloc(8192); - big_[0] = 123; - big_[1] = 23; - big_[2] = 3; - - kprintf(KINFO "%u, %u, %u\n", big_[0], big_[1], big_[2]); - - // good free - lxfree(arr); - lxfree(big_); - - // timer_run_second(1, test_timer, NULL, TIMER_MODE_PERIODIC); + // no lxmalloc here! This can only be used within kernel, but here, we are in a dedicated process! + // any access to kernel method must be done via syscall struct kdb_keyinfo_pkt keyevent; while (1) { if (!kbd_recv_key(&keyevent)) { + // yield(); continue; } if ((keyevent.state & KBD_KEY_FPRESSED) && (keyevent.keycode & 0xff00) <= KEYPAD) { diff --git a/lunaix-os/kernel/mm/cow.c b/lunaix-os/kernel/mm/cow.c index 30cead9..85dcd49 100644 --- a/lunaix-os/kernel/mm/cow.c +++ b/lunaix-os/kernel/mm/cow.c @@ -1,16 +1,19 @@ #include -void* vmm_dup_page(void* va) { - void* new_ppg = pmm_alloc_page(KERNEL_PID, 0); - vmm_fmap_page(KERNEL_PID, PG_MOUNT_3, new_ppg, PG_PREM_RW); +void* vmm_dup_page(pid_t pid, void* pa) { + void* new_ppg = pmm_alloc_page(pid, 0); + vmm_fmap_page(pid, PG_MOUNT_3, new_ppg, PG_PREM_RW); + vmm_fmap_page(pid, PG_MOUNT_4, pa, PG_PREM_RW); asm volatile ( "movl %1, %%edi\n" + "movl %2, %%esi\n" "rep movsl\n" - :: "c"(1024), "r"(PG_MOUNT_3), "S"((uintptr_t)va) - : "memory", "%edi"); + :: "c"(1024), "r"(PG_MOUNT_3), "r"(PG_MOUNT_4) + : "memory", "%edi", "%esi"); vmm_unset_mapping(PG_MOUNT_3); + vmm_unset_mapping(PG_MOUNT_4); return new_ppg; } \ No newline at end of file diff --git a/lunaix-os/kernel/mm/kalloc.c b/lunaix-os/kernel/mm/kalloc.c index 91905b4..4d9a5d1 100644 --- a/lunaix-os/kernel/mm/kalloc.c +++ b/lunaix-os/kernel/mm/kalloc.c @@ -57,27 +57,32 @@ lx_grow_heap(heap_context_t* heap, size_t sz); Note: the brk always point to the beginning of epilogue. */ +static heap_context_t kheap; + int kalloc_init() { - heap_context_t* kheap = &__current->mm.k_heap; - kheap->start = &__kernel_heap_start; - kheap->brk = NULL; - kheap->max_addr = (void*)KSTACK_START; + kheap.start = &__kernel_heap_start; + kheap.brk = NULL; + kheap.max_addr = (void*)KSTACK_START; - if (!dmm_init(kheap)) { + if (!dmm_init(&kheap)) { return 0; } - SW(kheap->start, PACK(4, M_ALLOCATED)); - SW(kheap->start + WSIZE, PACK(0, M_ALLOCATED)); - kheap->brk += WSIZE; + SW(kheap.start, PACK(4, M_ALLOCATED)); + SW(kheap.start + WSIZE, PACK(0, M_ALLOCATED)); + kheap.brk += WSIZE; - return lx_grow_heap(kheap, HEAP_INIT_SIZE) != NULL; + return lx_grow_heap(&kheap, HEAP_INIT_SIZE) != NULL; } void* lxmalloc(size_t size) { - return lx_malloc_internal(&__current->mm.k_heap, size); + mutex_lock(&kheap.lock); + void* r = lx_malloc_internal(&kheap, size); + mutex_unlock(&kheap.lock); + + return r; } void* @@ -102,6 +107,7 @@ lxfree(void* ptr) { if (!ptr) { return; } + mutex_lock(&kheap.lock); uint8_t* chunk_ptr = (uint8_t*)ptr - WSIZE; uint32_t hdr = LW(chunk_ptr); @@ -122,6 +128,8 @@ lxfree(void* ptr) { SW(next_hdr, LW(next_hdr) | M_PREV_FREE); coalesce(chunk_ptr); + + mutex_unlock(&kheap.lock); } diff --git a/lunaix-os/kernel/mm/vmm.c b/lunaix-os/kernel/mm/vmm.c index f791cc3..40c8be5 100644 --- a/lunaix-os/kernel/mm/vmm.c +++ b/lunaix-os/kernel/mm/vmm.c @@ -255,4 +255,17 @@ void* vmm_v2p(void* va) { return (void*)vmm_lookup(va).pa; +} + +void* +vmm_mount_pd(void* pde) { + x86_page_table* l1pt = (x86_page_table*)L1_BASE_VADDR; + l1pt->entry[(PD_MOUNT >> 22)] = NEW_L1_ENTRY(PG_PREM_RW, pde); + return PD_MOUNT; +} + +void* +vmm_unmount_pd() { + x86_page_table* l1pt = (x86_page_table*)L1_BASE_VADDR; + l1pt->entry[(PD_MOUNT >> 22)] = 0; } \ No newline at end of file diff --git a/lunaix-os/kernel/process.c b/lunaix-os/kernel/process.c index be7d423..de6eaf6 100644 --- a/lunaix-os/kernel/process.c +++ b/lunaix-os/kernel/process.c @@ -6,12 +6,10 @@ LOG_MODULE("PROC") -void dup_proc() { - pid_t pid = alloc_pid(); - +void* __dup_pagetable(pid_t pid, uintptr_t mount_point) { void* ptd_pp = pmm_alloc_page(pid, PP_FGPERSIST); x86_page_table* ptd = vmm_fmap_page(pid, PG_MOUNT_1, ptd_pp, PG_PREM_RW); - x86_page_table* pptd = (x86_page_table*) L1_BASE_VADDR; + x86_page_table* pptd = (x86_page_table*) (mount_point | (0x3FF << 12)); for (size_t i = 0; i < PG_MAX_ENTRIES - 1; i++) { @@ -21,7 +19,7 @@ void dup_proc() { continue; } - x86_page_table* ppt = (x86_page_table*) L2_VADDR(i); + x86_page_table* ppt = (x86_page_table*) (mount_point | (i << 12)); void* pt_pp = pmm_alloc_page(pid, PP_FGPERSIST); x86_page_table* pt = vmm_fmap_page(pid, PG_MOUNT_2, pt_pp, PG_PREM_RW); @@ -36,7 +34,7 @@ void dup_proc() { // FIXME: 根据 mm_region 将读共享的页(如堆)标为只读,而私有的页(如栈),则复制;而写共享的页则无需更改flags if (va >= KSTACK_START) { - void* ppa = vmm_dup_page(va); + void* ppa = vmm_dup_page(pid, PG_ENTRY_ADDR(ppte)); ppte = ppte & 0xfff | (uintptr_t)ppa; } pt->entry[j] = ppte; @@ -50,16 +48,39 @@ void dup_proc() { ptd->entry[PG_MAX_ENTRIES - 1] = NEW_L1_ENTRY(T_SELF_REF_PERM, ptd_pp); + return ptd_pp; +} + +void* dup_pagetable(pid_t pid) { + return __dup_pagetable(pid, L2_BASE_VADDR); +} + +void dup_proc() { + pid_t pid = alloc_pid(); + + /* + FIXME: Problematic! It should mount the page table of process then copy it. + The current implementation copy the CURRENTLY loaded pgt. + However, dup_pagetable is designed to copy current loaded pgt. + + */ + + void* mnt_pt = vmm_mount_pd(__current->page_table); + + void* pg = __dup_pagetable(pid, mnt_pt); + + vmm_unmount_pd(); + struct proc_info pcb = (struct proc_info) { .created = clock_systime(), .pid = pid, .mm = __current->mm, - .page_table = ptd_pp, + .page_table = pg, .intr_ctx = __current->intr_ctx, .parent_created = __current->created }; - // 正如同fork一样,返回两次。 + // 正如同fork,返回两次。 pcb.intr_ctx.registers.eax = 0; __current->intr_ctx.registers.eax = pid; diff --git a/lunaix-os/kernel/sched.c b/lunaix-os/kernel/sched.c index ed2cc74..f4fb021 100644 --- a/lunaix-os/kernel/sched.c +++ b/lunaix-os/kernel/sched.c @@ -32,8 +32,6 @@ void sched_init() { .ptable_len = 0, .procs_index = 0 }; - - __current = &dummy; } void schedule() { @@ -61,7 +59,9 @@ void schedule() { apic_done_servicing(); - asm volatile ("pushl %0\n jmp soft_iret\n"::"r"(&__current->intr_ctx): "memory"); + asm volatile ( + "pushl %0\n" + "jmp soft_iret\n"::"r"(&__current->intr_ctx): "memory"); } pid_t alloc_pid() { diff --git a/lunaix-os/link/linker.ld b/lunaix-os/link/linker.ld index 2589aeb..ce236c0 100644 --- a/lunaix-os/link/linker.ld +++ b/lunaix-os/link/linker.ld @@ -62,7 +62,7 @@ SECTIONS { __kernel_end = ALIGN(4K); __pg_mount_point = ALIGN(4K); - . += 12K; + . += 16K; __proc_table = ALIGN(4K); . += 128M; __kernel_heap_start = ALIGN(4K); /* 内核结束的地方即堆开始的地方 */ -- 2.27.0