From: Minep Date: Mon, 6 Jun 2022 12:08:54 +0000 (+0100) Subject: feat: No more kernel page table switching upon interrupt. X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/commitdiff_plain/86a9cfeadbbe180abf6c8bd8c64c86957fdacfeb?ds=sidebyside feat: No more kernel page table switching upon interrupt. feat: 'wait' syscall implementation feat: use linked list for chaining children process. feat: Now the allocated memory can be recycled upon process destroy. feat: Interruptible syscall. refactor: correct the sbrk and brk naming. refactor: process initialization. fix: TLB caching stall the PTE rewrite which cause fork() fail randomly. fix: Add new process state: CREATE to avoid race during process creation. chores: annotations on future improvements & fixes. --- diff --git a/lunaix-os/bochs.cfg b/lunaix-os/bochs.cfg index 1e9f946..0011df5 100644 --- a/lunaix-os/bochs.cfg +++ b/lunaix-os/bochs.cfg @@ -4,4 +4,6 @@ memory: guest=1024, host=1024 clock: sync=realtime, time0=utc, rtc_sync=1 +display_library: x, options="gui_debug" + boot: cdrom \ No newline at end of file diff --git a/lunaix-os/bx_enh_dbg.ini b/lunaix-os/bx_enh_dbg.ini new file mode 100644 index 0000000..c9f2bf5 --- /dev/null +++ b/lunaix-os/bx_enh_dbg.ini @@ -0,0 +1,26 @@ +# bx_enh_dbg_ini +SeeReg[0] = TRUE +SeeReg[1] = TRUE +SeeReg[2] = TRUE +SeeReg[3] = TRUE +SeeReg[4] = FALSE +SeeReg[5] = FALSE +SeeReg[6] = FALSE +SeeReg[7] = FALSE +SingleCPU = FALSE +ShowIOWindows = TRUE +ShowButtons = TRUE +SeeRegColors = TRUE +ignoreNxtT = TRUE +ignSSDisasm = TRUE +UprCase = 1 +DumpInAsciiMode = 3 +isLittleEndian = TRUE +DefaultAsmLines = 512 +DumpWSIndex = 2 +DockOrder = 0x123 +ListWidthPix[0] = 485 +ListWidthPix[1] = 667 +ListWidthPix[2] = 764 +MainWindow = 0, 0, 1283, 500 +FontName = Normal diff --git a/lunaix-os/flags.h b/lunaix-os/flags.h new file mode 100644 index 0000000..90a5a5a --- /dev/null +++ b/lunaix-os/flags.h @@ -0,0 +1,10 @@ +#ifndef __LUNAIX_FLAGS_H +#define __LUNAIX_FLAGS_H + +/* + Uncomment below to force LunaixOS use kernel page table when context switch to kernel space + NOTE: This will make the kernel global. +*/ +// #define USE_KERNEL_PG + +#endif /* __LUNAIX_FLAGS_H */ diff --git a/lunaix-os/includes/arch/x86/idt.h b/lunaix-os/includes/arch/x86/idt.h index 294cb27..9b48eea 100644 --- a/lunaix-os/includes/arch/x86/idt.h +++ b/lunaix-os/includes/arch/x86/idt.h @@ -1,6 +1,8 @@ #ifndef __LUNAIX_IDT_H #define __LUNAIX_IDT_H -#define IDT_ATTR(dpl) ((0x70 << 5) | (dpl & 3) << 13 | 1 << 15) +#define IDT_TRAP 0x78 +#define IDT_INTERRUPT 0x70 +#define IDT_ATTR(dpl, type) (((type) << 5) | ((dpl & 3) << 13) | (1 << 15)) void _init_idt(); diff --git a/lunaix-os/includes/arch/x86/tss.h b/lunaix-os/includes/arch/x86/tss.h index 3310aa6..d8256f7 100644 --- a/lunaix-os/includes/arch/x86/tss.h +++ b/lunaix-os/includes/arch/x86/tss.h @@ -9,6 +9,6 @@ struct x86_tss { uint8_t __padding[94]; } __attribute__((packed)); -void tss_update(uint32_t ss0, uint32_t esp0); +void tss_update_esp(uint32_t esp0); #endif /* __LUNAIX_TSS_H */ diff --git a/lunaix-os/includes/lunaix/ds/llist.h b/lunaix-os/includes/lunaix/ds/llist.h index effee95..dd2a192 100644 --- a/lunaix-os/includes/lunaix/ds/llist.h +++ b/lunaix-os/includes/lunaix/ds/llist.h @@ -58,6 +58,10 @@ llist_delete(struct llist_header* elem) { elem->next = elem; } +static inline int llist_empty(struct llist_header* elem) { + return elem->next == elem; +} + /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. diff --git a/lunaix-os/includes/lunaix/lunistd.h b/lunaix-os/includes/lunaix/lunistd.h index 12faab5..a7ac03b 100644 --- a/lunaix-os/includes/lunaix/lunistd.h +++ b/lunaix-os/includes/lunaix/lunistd.h @@ -18,4 +18,6 @@ __LXSYSCALL1(void, _exit, int, status) __LXSYSCALL1(unsigned int, sleep, unsigned int, seconds) +__LXSYSCALL1(pid_t, wait, int*, status); + #endif /* __LUNAIX_UNISTD_H */ diff --git a/lunaix-os/includes/lunaix/mm/dmm.h b/lunaix-os/includes/lunaix/mm/dmm.h index 793ab3f..171c84a 100644 --- a/lunaix-os/includes/lunaix/mm/dmm.h +++ b/lunaix-os/includes/lunaix/mm/dmm.h @@ -36,9 +36,10 @@ int dmm_init(heap_context_t* heap); int -lxsbrk(heap_context_t* heap, void* addr); +lxbrk(heap_context_t* heap, void* addr); + void* -lxbrk(heap_context_t* heap, size_t size); +lxsbrk(heap_context_t* heap, size_t size); void* lx_malloc_internal(heap_context_t* heap, size_t size); diff --git a/lunaix-os/includes/lunaix/mm/mm.h b/lunaix-os/includes/lunaix/mm/mm.h index 8318eaa..04f2f02 100644 --- a/lunaix-os/includes/lunaix/mm/mm.h +++ b/lunaix-os/includes/lunaix/mm/mm.h @@ -31,6 +31,7 @@ typedef struct #define REGION_WSHARED 0x2 #define REGION_PERM_MASK 0x1c + #define REGION_READ (1 << 2) #define REGION_WRITE (1 << 3) #define REGION_EXEC (1 << 4) diff --git a/lunaix-os/includes/lunaix/mm/page.h b/lunaix-os/includes/lunaix/mm/page.h index 5755a46..35a1b47 100644 --- a/lunaix-os/includes/lunaix/mm/page.h +++ b/lunaix-os/includes/lunaix/mm/page.h @@ -33,18 +33,18 @@ #define PG_PRESENT (0x1) #define PG_WRITE (0x1 << 1) #define PG_ALLOW_USER (0x1 << 2) -#define PG_WRITE_THROUGHT (1 << 3) +#define PG_WRITE_THROUGH (1 << 3) #define PG_DISABLE_CACHE (1 << 4) #define PG_PDE_4MB (1 << 7) -#define NEW_L1_ENTRY(flags, pt_addr) (PG_ALIGN(pt_addr) | ((flags) & 0xfff)) +#define NEW_L1_ENTRY(flags, pt_addr) (PG_ALIGN(pt_addr) | (((flags) | PG_WRITE_THROUGH) & 0xfff)) #define NEW_L2_ENTRY(flags, pg_addr) (PG_ALIGN(pg_addr) | ((flags) & 0xfff)) #define V_ADDR(pd, pt, offset) ((pd) << 22 | (pt) << 12 | (offset)) #define P_ADDR(ppn, offset) ((ppn << 12) | (offset)) -#define PG_ENTRY_FLAGS(entry) (entry & 0xFFFU) -#define PG_ENTRY_ADDR(entry) (entry & ~0xFFFU) +#define PG_ENTRY_FLAGS(entry) ((entry) & 0xFFFU) +#define PG_ENTRY_ADDR(entry) ((entry) & ~0xFFFU) #define HAS_FLAGS(entry, flags) ((PG_ENTRY_FLAGS(entry) & (flags)) == flags) #define CONTAINS_FLAGS(entry, flags) (PG_ENTRY_FLAGS(entry) & (flags)) @@ -55,7 +55,7 @@ #define PG_PREM_URW PG_PRESENT | PG_WRITE | PG_ALLOW_USER // 用于对PD进行循环映射,因为我们可能需要对PD进行频繁操作,我们在这里禁用TLB缓存 -#define T_SELF_REF_PERM PG_PREM_RW | PG_DISABLE_CACHE +#define T_SELF_REF_PERM PG_PREM_RW | PG_DISABLE_CACHE | PG_WRITE_THROUGH // 页目录的虚拟基地址,可以用来访问到各个PDE @@ -107,5 +107,6 @@ extern void __pg_mount_point; #define PD_REFERENCED L2_BASE_VADDR #define CURPROC_PTE(vpn) (&((x86_page_table*)(PD_MOUNT_1 | (((vpn) & 0xffc00) << 2)))->entry[(vpn) & 0x3ff]) +#define PTE_MOUNTED(mnt, vpn) (((x86_page_table*)((mnt) | (((vpn) & 0xffc00) << 2)))->entry[(vpn) & 0x3ff]) #endif /* __LUNAIX_PAGE_H */ diff --git a/lunaix-os/includes/lunaix/process.h b/lunaix-os/includes/lunaix/process.h index d9a5bb1..76ecacc 100644 --- a/lunaix-os/includes/lunaix/process.h +++ b/lunaix-os/includes/lunaix/process.h @@ -15,8 +15,10 @@ #define PROC_RUNNING 1 #define PROC_TERMNAT 2 #define PROC_DESTROY 4 -#define PROC_SPOILED 8 -#define PROC_BLOCKED 16 +#define PROC_BLOCKED 8 +#define PROC_CREATED 16 + +#define PROC_TERMMASK 0x6 struct proc_mm { @@ -28,6 +30,8 @@ struct proc_info { pid_t pid; struct proc_info* parent; isr_param intr_ctx; + struct llist_header siblings; + struct llist_header children; struct proc_mm mm; void* page_table; time_t created; @@ -37,11 +41,13 @@ struct proc_info { struct lx_timer* timer; }; -extern struct proc_info* __current; +extern volatile struct proc_info* __current; pid_t alloc_pid(); +void init_proc(struct proc_info *pcb); + /** * @brief 向系统发布一个进程,使其可以被调度。 * @@ -49,7 +55,7 @@ pid_t alloc_pid(); */ void push_process(struct proc_info* process); -void destroy_process(pid_t pid); +pid_t destroy_process(pid_t pid); void setup_proc_mem(struct proc_info* proc, uintptr_t kstack_from); @@ -57,7 +63,7 @@ void setup_proc_mem(struct proc_info* proc, uintptr_t kstack_from); * @brief 复制当前进程(LunaixOS的类 fork (unix) 实现) * */ -void dup_proc(); +pid_t dup_proc(); /** * @brief 创建新进程(LunaixOS的类 CreateProcess (Windows) 实现) diff --git a/lunaix-os/includes/lunaix/sched.h b/lunaix-os/includes/lunaix/sched.h index 618c324..b20a33a 100644 --- a/lunaix-os/includes/lunaix/sched.h +++ b/lunaix-os/includes/lunaix/sched.h @@ -1,7 +1,7 @@ #ifndef __LUNAIX_SCHEDULER_H #define __LUNAIX_SCHEDULER_H -#define SCHED_TIME_SLICE 200 +#define SCHED_TIME_SLICE 300 struct scheduler { struct proc_info* _procs; diff --git a/lunaix-os/includes/lunaix/syscall.h b/lunaix-os/includes/lunaix/syscall.h index ee63c40..7faa7de 100644 --- a/lunaix-os/includes/lunaix/syscall.h +++ b/lunaix-os/includes/lunaix/syscall.h @@ -11,6 +11,7 @@ #define __SYSCALL_getppid 6 #define __SYSCALL_sleep 7 #define __SYSCALL__exit 8 +#define __SYSCALL_wait 9 #define __SYSCALL_MAX 0x100 diff --git a/lunaix-os/kernel/asm/x86/idt.c b/lunaix-os/kernel/asm/x86/idt.c index 9b2f0c6..6c7246c 100644 --- a/lunaix-os/kernel/asm/x86/idt.c +++ b/lunaix-os/kernel/asm/x86/idt.c @@ -7,30 +7,42 @@ uint64_t _idt[IDT_ENTRY]; uint16_t _idt_limit = sizeof(_idt) - 1; -void _set_idt_entry(uint32_t vector, uint16_t seg_selector, void (*isr)(), uint8_t dpl) { +static inline void _set_idt_entry(uint32_t vector, uint16_t seg_selector, void (*isr)(), uint8_t dpl, uint8_t type) { uintptr_t offset = (uintptr_t)isr; - _idt[vector] = (offset & 0xffff0000) | IDT_ATTR(dpl); + _idt[vector] = (offset & 0xffff0000) | IDT_ATTR(dpl, type); _idt[vector] <<= 32; _idt[vector] |= (seg_selector << 16) | (offset & 0x0000ffff); } +void _set_idt_intr_entry(uint32_t vector, uint16_t seg_selector, void (*isr)(), uint8_t dpl) { + _set_idt_entry(vector, seg_selector, isr, dpl, IDT_INTERRUPT); +} + +void _set_idt_trap_entry(uint32_t vector, uint16_t seg_selector, void (*isr)(), uint8_t dpl) { + _set_idt_entry(vector, seg_selector, isr, dpl, IDT_TRAP); +} + void _init_idt() { // CPU defined interrupts - _set_idt_entry(FAULT_DIVISION_ERROR, 0x08, _asm_isr0, 0); - _set_idt_entry(FAULT_GENERAL_PROTECTION, 0x08, _asm_isr13, 0); - _set_idt_entry(FAULT_PAGE_FAULT, 0x08, _asm_isr14, 0); + _set_idt_intr_entry(FAULT_DIVISION_ERROR, 0x08, _asm_isr0, 0); + _set_idt_intr_entry(FAULT_GENERAL_PROTECTION, 0x08, _asm_isr13, 0); + _set_idt_intr_entry(FAULT_PAGE_FAULT, 0x08, _asm_isr14, 0); - _set_idt_entry(APIC_ERROR_IV, 0x08, _asm_isr250, 0); - _set_idt_entry(APIC_LINT0_IV, 0x08, _asm_isr251, 0); - _set_idt_entry(APIC_SPIV_IV, 0x08, _asm_isr252, 0); - _set_idt_entry(APIC_TIMER_IV, 0x08, _asm_isr253, 0); - _set_idt_entry(PC_KBD_IV, 0x08, _asm_isr201, 0); + _set_idt_intr_entry(APIC_ERROR_IV, 0x08, _asm_isr250, 0); + _set_idt_intr_entry(APIC_LINT0_IV, 0x08, _asm_isr251, 0); + _set_idt_intr_entry(APIC_SPIV_IV, 0x08, _asm_isr252, 0); + _set_idt_intr_entry(APIC_TIMER_IV, 0x08, _asm_isr253, 0); + _set_idt_intr_entry(PC_KBD_IV, 0x08, _asm_isr201, 0); - _set_idt_entry(RTC_TIMER_IV, 0x08, _asm_isr210, 0); + _set_idt_intr_entry(RTC_TIMER_IV, 0x08, _asm_isr210, 0); // system defined interrupts - _set_idt_entry(LUNAIX_SYS_PANIC, 0x08, _asm_isr32, 0); - _set_idt_entry(LUNAIX_SYS_CALL, 0x08, _asm_isr33, 0); + _set_idt_intr_entry(LUNAIX_SYS_PANIC, 0x08, _asm_isr32, 0); + + // syscall is a trap gate (recall: trap does NOT clear IF flag upon interruption) + // XXX: this should be fine, as our design of context switch support interruptible syscall + // FIXME: This may cause nasty concurrency bug! We should 'lockify' our code! + _set_idt_trap_entry(LUNAIX_SYS_CALL, 0x08, _asm_isr33, 3); } \ No newline at end of file diff --git a/lunaix-os/kernel/asm/x86/interrupts.c b/lunaix-os/kernel/asm/x86/interrupts.c index 09bd1eb..55d7e9b 100644 --- a/lunaix-os/kernel/asm/x86/interrupts.c +++ b/lunaix-os/kernel/asm/x86/interrupts.c @@ -36,22 +36,20 @@ extern x86_page_table* __kernel_ptd; void intr_handler(isr_param* param) { - // if (param->vector == LUNAIX_SYS_CALL) { - // kprintf(KDEBUG "%p", param->registers.esp); - // } __current->intr_ctx = *param; - + +#ifdef USE_KERNEL_PT cpu_lcr3(__kernel_ptd); - // 将当前进程的页目录挂载到内核地址空间里(页目录挂载点#1),方便访问。 vmm_mount_pd(PD_MOUNT_1, __current->page_table); +#endif isr_param *lparam = &__current->intr_ctx; if (lparam->vector <= 255) { int_subscriber subscriber = subscribers[lparam->vector]; if (subscriber) { - subscriber(lparam); + subscriber(param); goto done; } } @@ -68,20 +66,14 @@ intr_handler(isr_param* param) lparam->eip); done: - - // if (__current->state != PROC_RUNNING) { - // schedule(); - // } - // for all external interrupts except the spurious interrupt // this is required by Intel Manual Vol.3A, section 10.8.1 & 10.8.5 if (lparam->vector >= EX_INTERRUPT_BEGIN && lparam->vector != APIC_SPIV_IV) { apic_done_servicing(); } +#ifdef USE_KERNEL_PT cpu_lcr3(__current->page_table); - - *param = __current->intr_ctx; - +#endif return; } \ No newline at end of file diff --git a/lunaix-os/kernel/asm/x86/pfault.c b/lunaix-os/kernel/asm/x86/pfault.c index f07c74f..c16385a 100644 --- a/lunaix-os/kernel/asm/x86/pfault.c +++ b/lunaix-os/kernel/asm/x86/pfault.c @@ -27,10 +27,9 @@ intr_routine_page_fault (const isr_param* param) goto segv_term; } - if (param->eip == ptr && !(hit_region->attr & REGION_EXEC)) { - // Attempt to execute non-executable page - goto segv_term; - } + // if (param->eip == ptr && !(hit_region->attr & REGION_EXEC)) { + // goto segv_term; + // } x86_pte_t* pte = CURPROC_PTE(ptr >> 12); if (*pte & PG_PRESENT) { @@ -41,22 +40,27 @@ intr_routine_page_fault (const isr_param* param) *pte = (*pte & 0xFFF) | pa | PG_WRITE; return; } - else { - // impossible cases or accessing privileged page - goto segv_term; - } - } else { - if (!(*pte)) { - // Invalid location - goto segv_term; - } - // page not present, bring it from disk or somewhere else - __print_panic_msg("WIP page fault route", param); - while (1); + // impossible cases or accessing privileged page + goto segv_term; + } + + if (!(*pte)) { + // Invalid location + goto segv_term; + } + uintptr_t loc = *pte & ~0xfff; + // a writable page, not present, pte attr is not null and no indication of cached page -> a new page need to be alloc + if ((hit_region->attr & REGION_WRITE) && (*pte & 0xfff) && !loc) { + uintptr_t pa = pmm_alloc_page(__current->pid, 0); + *pte = *pte | pa | PG_PRESENT; + return; } + // page not present, bring it from disk or somewhere else + __print_panic_msg("WIP page fault route", param); + while (1); segv_term: - kprintf(KERROR "(pid: %d) Segmentation fault on %p\n", __current->pid, ptr); + kprintf(KERROR "(pid: %d) Segmentation fault on %p (%p:%p)\n", __current->pid, ptr, param->cs, param->eip); terminate_proc(LXSEGFAULT); // should not reach } \ No newline at end of file diff --git a/lunaix-os/kernel/asm/x86/syscall.S b/lunaix-os/kernel/asm/x86/syscall.S index 5ecfd01..1d5b935 100644 --- a/lunaix-os/kernel/asm/x86/syscall.S +++ b/lunaix-os/kernel/asm/x86/syscall.S @@ -8,14 +8,15 @@ syscall_table: 1: .long 0 - .long __lxsys_fork + .long __lxsys_fork /* 1 */ .long __lxsys_yield .long __lxsys_sbrk .long __lxsys_brk - .long __lxsys_getpid + .long __lxsys_getpid /* 5 */ .long __lxsys_getppid .long __lxsys_sleep .long __lxsys_exit + .long __lxsys_wait /* 9 */ 2: .rept __SYSCALL_MAX - (2b - 1b)/4 .long 0 @@ -26,11 +27,9 @@ .section .text syscall_hndlr: pushl %ebp - movl %esp, %ebp - addl $0x8, %ebp - movl (%ebp), %ebp + movl 8(%esp), %ebp - movl (%ebp), %eax + movl (%ebp), %eax /* eax: call code as well as the return value from syscall */ cmpl $__SYSCALL_MAX, %eax jae 2f @@ -43,16 +42,18 @@ popl %ebp ret 1: - pushl 24(%ebp) /* esi - #6 arg */ - pushl 20(%ebp) /* ebp - #5 arg */ - pushl 16(%ebp) /* edi - #4 arg */ - pushl 12(%ebp) /* edx - #3 arg */ - pushl 8(%ebp) /* ecx - #2 arg */ - pushl 4(%ebp) /* ebx - #1 arg */ + pushl 24(%ebp) /* esi - #6 arg */ + pushl 20(%ebp) /* ebp - #5 arg */ + pushl 16(%ebp) /* edi - #4 arg */ + pushl 12(%ebp) /* edx - #3 arg */ + pushl 8(%ebp) /* ecx - #2 arg */ + pushl 4(%ebp) /* ebx - #1 arg */ call (%eax) - addl $24, %esp + movl %eax, (%ebp) /* save the return value */ + + addl $24, %esp /* remove the parameters from stack */ popl %ebp diff --git a/lunaix-os/kernel/asm/x86/tss.c b/lunaix-os/kernel/asm/x86/tss.c index 8946107..c09db9e 100644 --- a/lunaix-os/kernel/asm/x86/tss.c +++ b/lunaix-os/kernel/asm/x86/tss.c @@ -7,7 +7,6 @@ struct x86_tss _tss = { .ss0 = KDATA_SEG }; -void tss_update(uint32_t ss0, uint32_t esp0) { +void tss_update_esp(uint32_t esp0) { _tss.esp0 = esp0; - _tss.ss0 = ss0; } \ No newline at end of file diff --git a/lunaix-os/kernel/k_init.c b/lunaix-os/kernel/k_init.c index 5fdc9cb..863f574 100644 --- a/lunaix-os/kernel/k_init.c +++ b/lunaix-os/kernel/k_init.c @@ -102,9 +102,7 @@ _kernel_init() { void spawn_lxinit() { struct proc_info kinit; - memset(&kinit, 0, sizeof(kinit)); - kinit.parent = (void*)0; - kinit.pid = 1; + init_proc(&kinit); kinit.intr_ctx = (isr_param) { .registers.esp = KSTACK_TOP - 20, .cs = KCODE_SEG, diff --git a/lunaix-os/kernel/lxinit.c b/lunaix-os/kernel/lxinit.c index b2a9d9b..e6ca67b 100644 --- a/lunaix-os/kernel/lxinit.c +++ b/lunaix-os/kernel/lxinit.c @@ -14,12 +14,36 @@ extern uint8_t __kernel_start; LOG_MODULE("INIT") -void -test_timer(void* payload); +// #define FORK_BOMB_DEMO +#define WAIT_DEMO void _lxinit_main() { +#ifdef FORK_BOMB_DEMO + // fork炸弹 + for (;;) { + pid_t p; + if ((p = fork())) { + kprintf(KDEBUG "Forked %d\n", p); + } + } +#endif + +#ifdef WAIT_DEMO + // 测试wait + kprintf("I am parent, going to fork my child and wait.\n"); + if (!fork()) { + kprintf("I am child, going to sleep for 2 seconds\n"); + sleep(2); + kprintf("I am child, I am about to terminated\n"); + _exit(1); + } + int status; + pid_t child = wait(&status); + kprintf("I am parent, my child (%d) terminated with code: %d.\n", child, status); +#endif + // 这里是就是LunaixOS的第一个进程了! for (size_t i = 0; i < 10; i++) { @@ -27,9 +51,10 @@ _lxinit_main() if (!(pid = fork())) { sleep(i); if (i == 3) { - i = *(int*)0x400000; + i = *(int*)0xdeadc0de; // seg fault! } tty_put_char('0'+i); + tty_put_char('\n'); _exit(0); } kprintf(KINFO "Forked %d\n", pid); @@ -37,15 +62,11 @@ _lxinit_main() char buf[64]; - kprintf(KINFO "Hello higher half kernel world!\nWe are now running in virtual " - "address space!\n\n"); + kprintf(KINFO "Hello processes!\n"); cpu_get_brand(buf); kprintf("CPU: %s\n\n", buf); - void* k_start = vmm_v2p(&__kernel_start); - kprintf(KINFO "The kernel's base address mapping: %p->%p\n", &__kernel_start, k_start); - // no lxmalloc here! This can only be used within kernel, but here, we are in a dedicated process! // any access to kernel method must be done via syscall @@ -64,18 +85,4 @@ _lxinit_main() spin(); -} - -static datetime_t datetime; - -void test_timer(void* payload) { - clock_walltime(&datetime); - - kprintf(KWARN "%u/%02u/%02u %02u:%02u:%02u\r", - datetime.year, - datetime.month, - datetime.day, - datetime.hour, - datetime.minute, - datetime.second); } \ No newline at end of file diff --git a/lunaix-os/kernel/mm/dmm.c b/lunaix-os/kernel/mm/dmm.c index b4b1d0d..d582625 100644 --- a/lunaix-os/kernel/mm/dmm.c +++ b/lunaix-os/kernel/mm/dmm.c @@ -23,18 +23,18 @@ #include -__DEFINE_LXSYSCALL1(int, sbrk, void*, addr) { +__DEFINE_LXSYSCALL1(int, sbrk, size_t, size) { heap_context_t* uheap = &__current->mm.u_heap; mutex_lock(&uheap->lock); - int r = lxsbrk(uheap, addr); + void* r = lxsbrk(uheap, size); mutex_unlock(&uheap->lock); return r; } -__DEFINE_LXSYSCALL1(void*, brk, size_t, size) { +__DEFINE_LXSYSCALL1(void*, brk, void*, addr) { heap_context_t* uheap = &__current->mm.u_heap; mutex_lock(&uheap->lock); - void* r = lxbrk(uheap, size); + int r = lxbrk(uheap, addr); mutex_unlock(&uheap->lock); return r; } @@ -51,13 +51,13 @@ dmm_init(heap_context_t* heap) } int -lxsbrk(heap_context_t* heap, void* addr) +lxbrk(heap_context_t* heap, void* addr) { - return lxbrk(heap, addr - heap->brk) != NULL; + return -(lxsbrk(heap, addr - heap->brk) == (void*)-1); } void* -lxbrk(heap_context_t* heap, size_t size) +lxsbrk(heap_context_t* heap, size_t size) { if (size == 0) { return heap->brk; @@ -72,6 +72,7 @@ lxbrk(heap_context_t* heap, size_t size) // any invalid situations if (next >= heap->max_addr || next < current_brk) { __current->k_status = LXINVLDPTR; + return (void*)-1; } uintptr_t diff = PG_ALIGN(next) - PG_ALIGN(current_brk); diff --git a/lunaix-os/kernel/mm/kalloc.c b/lunaix-os/kernel/mm/kalloc.c index 4d9a5d1..e970c1d 100644 --- a/lunaix-os/kernel/mm/kalloc.c +++ b/lunaix-os/kernel/mm/kalloc.c @@ -249,7 +249,7 @@ lx_grow_heap(heap_context_t* heap, size_t sz) void* start; // The "+ WSIZE" capture the overhead for epilogue marker - if (!(start = lxbrk(heap, sz + WSIZE))) { + if (!(start = lxsbrk(heap, sz + WSIZE))) { return NULL; } sz = ROUNDUP(sz, BOUNDARY); diff --git a/lunaix-os/kernel/mm/pmm.c b/lunaix-os/kernel/mm/pmm.c index 3bf2128..ad80dbf 100644 --- a/lunaix-os/kernel/mm/pmm.c +++ b/lunaix-os/kernel/mm/pmm.c @@ -97,7 +97,6 @@ pmm_alloc_page(pid_t owner, pp_attr_t attr) while (!good_page_found && pg_lookup_ptr < upper_lim) { pm = &pm_table[pg_lookup_ptr]; - // skip the fully occupied chunk, reduce # of iterations if (!pm->ref_counts) { *pm = (struct pp_struct) { .attr = attr, @@ -105,6 +104,7 @@ pmm_alloc_page(pid_t owner, pp_attr_t attr) .ref_counts = 1 }; good_page_found = pg_lookup_ptr << 12; + break; } else { pg_lookup_ptr++; @@ -129,8 +129,8 @@ pmm_free_page(pid_t owner, void* page) { struct pp_struct* pm = &pm_table[(intptr_t)page >> 12]; - // Oops, double free! - if (!(pm->ref_counts)) { + // Is this a MMIO mapping or double free? + if (((intptr_t)page >> 12) >= max_pg || !(pm->ref_counts)) { return 0; } @@ -149,7 +149,7 @@ int pmm_ref_page(pid_t owner, void* page) { } struct pp_struct* pm = &pm_table[ppn]; - if (!pm->ref_counts) { + if (ppn >= max_pg || !pm->ref_counts) { return 0; } diff --git a/lunaix-os/kernel/mm/vmm.c b/lunaix-os/kernel/mm/vmm.c index dcd2f27..5e407ff 100644 --- a/lunaix-os/kernel/mm/vmm.c +++ b/lunaix-os/kernel/mm/vmm.c @@ -260,7 +260,8 @@ vmm_v2p(void* va) void* vmm_mount_pd(uintptr_t mnt, void* pde) { x86_page_table* l1pt = (x86_page_table*)L1_BASE_VADDR; - l1pt->entry[(mnt >> 22)] = NEW_L1_ENTRY(PG_PREM_RW, pde); + l1pt->entry[(mnt >> 22)] = NEW_L1_ENTRY(T_SELF_REF_PERM, pde); + cpu_invplg(mnt); return mnt; } @@ -268,4 +269,5 @@ void* vmm_unmount_pd(uintptr_t mnt) { x86_page_table* l1pt = (x86_page_table*)L1_BASE_VADDR; l1pt->entry[(mnt >> 22)] = 0; + cpu_invplg(mnt); } \ No newline at end of file diff --git a/lunaix-os/kernel/process.c b/lunaix-os/kernel/process.c index 29cff62..5112ae1 100644 --- a/lunaix-os/kernel/process.c +++ b/lunaix-os/kernel/process.c @@ -6,6 +6,7 @@ #include #include #include +#include LOG_MODULE("PROC") @@ -29,7 +30,7 @@ void* __dup_pagetable(pid_t pid, uintptr_t mount_point) { for (size_t j = 0; j < PG_MAX_ENTRIES; j++) { x86_pte_t pte = ppt->entry[j]; - pmm_ref_page(pid, pte & ~0xfff); + pmm_ref_page(pid, PG_ENTRY_ADDR(pte)); pt->entry[j] = pte; } @@ -41,12 +42,39 @@ void* __dup_pagetable(pid_t pid, uintptr_t mount_point) { return ptd_pp; } +void __del_pagetable(pid_t pid, uintptr_t mount_point) { + x86_page_table* pptd = (x86_page_table*) (mount_point | (0x3FF << 12)); + + for (size_t i = 0; i < PG_MAX_ENTRIES - 1; i++) + { + x86_pte_t ptde = pptd->entry[i]; + if (!ptde || !(ptde & PG_PRESENT)) { + continue; + } + + x86_page_table* ppt = (x86_page_table*) (mount_point | (i << 12)); + + for (size_t j = 0; j < PG_MAX_ENTRIES; j++) + { + x86_pte_t pte = ppt->entry[j]; + // free the 4KB data page + if ((pte & PG_PRESENT)) { + pmm_free_page(pid,PG_ENTRY_ADDR(pte)); + } + } + // free the L2 page table + pmm_free_page(pid, PG_ENTRY_ADDR(ptde)); + } + // free the L1 directory + pmm_free_page(pid, PG_ENTRY_ADDR(pptd->entry[PG_MAX_ENTRIES - 1])); +} + void* dup_pagetable(pid_t pid) { return __dup_pagetable(pid, PD_REFERENCED); } -__DEFINE_LXSYSCALL(void, fork) { - dup_proc(); +__DEFINE_LXSYSCALL(pid_t, fork) { + return dup_proc(); } __DEFINE_LXSYSCALL(pid_t, getpid) { @@ -57,57 +85,71 @@ __DEFINE_LXSYSCALL(pid_t, getppid) { return __current->parent->pid; } -void dup_proc() { - pid_t pid = alloc_pid(); +void init_proc(struct proc_info *pcb) { + memset(pcb, 0, sizeof(*pcb)); + + pcb->pid = alloc_pid(); + pcb->created = clock_systime(); + pcb->state = PROC_CREATED; +} - struct proc_info pcb = (struct proc_info) { - .created = clock_systime(), - .pid = pid, - .mm = __current->mm, - .intr_ctx = __current->intr_ctx, - .parent = __current - }; +pid_t dup_proc() { + struct proc_info pcb; + init_proc(&pcb); + pcb.mm = __current->mm; + pcb.intr_ctx = __current->intr_ctx; + pcb.parent = __current; +#ifdef USE_KERNEL_PG setup_proc_mem(&pcb, PD_MOUNT_1); //挂载点#1是当前进程的页表 +#else + setup_proc_mem(&pcb, PD_REFERENCED); +#endif // 根据 mm_region 进一步配置页表 - if (__current->mm.regions) { - struct mm_region *pos, *n; - llist_for_each(pos, n, &__current->mm.regions->head, head) { - region_add(&pcb, pos->start, pos->end, pos->attr); - - // 如果写共享,则不作处理。 - if ((pos->attr & REGION_WSHARED)) { - continue; - } + if (!__current->mm.regions) { + goto not_copy; + } + + struct mm_region *pos, *n; + llist_for_each(pos, n, &__current->mm.regions->head, head) { + region_add(&pcb, pos->start, pos->end, pos->attr); + + // 如果写共享,则不作处理。 + if ((pos->attr & REGION_WSHARED)) { + continue; + } - uintptr_t start_vpn = PG_ALIGN(pos->start) >> 12; - uintptr_t end_vpn = PG_ALIGN(pos->end) >> 12; - for (size_t i = start_vpn; i < end_vpn; i++) - { - x86_pte_t *curproc = &((x86_page_table*)(PD_MOUNT_1 | ((i & 0xffc00) << 2)))->entry[i & 0x3ff]; - x86_pte_t *newproc = &((x86_page_table*)(PD_MOUNT_2 | ((i & 0xffc00) << 2)))->entry[i & 0x3ff]; - - if (pos->attr == REGION_RSHARED) { - // 如果读共享,则将两者的都标注为只读,那么任何写入都将会应用COW策略。 - *curproc = *curproc & ~PG_WRITE; - *newproc = *newproc & ~PG_WRITE; - } - else { - // 如果是私有页,则将该页从新进程中移除。 - *newproc = 0; - } + uintptr_t start_vpn = PG_ALIGN(pos->start) >> 12; + uintptr_t end_vpn = PG_ALIGN(pos->end) >> 12; + for (size_t i = start_vpn; i < end_vpn; i++) + { + x86_pte_t *curproc = &PTE_MOUNTED(PD_MOUNT_1, i); + x86_pte_t *newproc = &PTE_MOUNTED(PD_MOUNT_2, i); + cpu_invplg(curproc); + cpu_invplg(newproc); + + if (pos->attr == REGION_RSHARED) { + // 如果读共享,则将两者的都标注为只读,那么任何写入都将会应用COW策略。 + *curproc = *curproc & ~PG_WRITE; + *newproc = *newproc & ~PG_WRITE; + } + else { + // 如果是私有页,则将该页从新进程中移除。 + *newproc = 0; } } } +not_copy: vmm_unmount_pd(PD_MOUNT_2); // 正如同fork,返回两次。 pcb.intr_ctx.registers.eax = 0; - __current->intr_ctx.registers.eax = pid; push_process(&pcb); + + return pcb.pid; } extern void __kernel_end; @@ -122,9 +164,20 @@ void setup_proc_mem(struct proc_info* proc, uintptr_t usedMnt) { // copy the kernel stack for (size_t i = KSTACK_START >> 12; i <= KSTACK_TOP >> 12; i++) { - x86_pte_t *ppte = &((x86_page_table*)(PD_MOUNT_2 | ((i & 0xffc00) << 2)))->entry[i & 0x3ff]; - void* ppa = vmm_dup_page(pid, PG_ENTRY_ADDR(*ppte)); - *ppte = (*ppte & 0xfff) | (uintptr_t)ppa; + volatile x86_pte_t *ppte = &PTE_MOUNTED(PD_MOUNT_2, i); + + /* + This is a fucking nightmare, the TLB caching keep the rewrite to PTE from updating. + Even the Nightmare Moon the Evil is far less nasty than this. + It took me hours of debugging to figure this out. + + In the name of Celestia our glorious goddess, I will fucking HATE the TLB for the rest of my LIFE! + */ + cpu_invplg(ppte); + + x86_pte_t p = *ppte; + void* ppa = vmm_dup_page(pid, PG_ENTRY_ADDR(p)); + *ppte = (p & 0xfff) | (uintptr_t)ppa; } // 我们不需要分配内核的区域,因为所有的内核代码和数据段只能通过系统调用来访问,任何非法的访问 diff --git a/lunaix-os/kernel/sched.c b/lunaix-os/kernel/sched.c index f5c2110..758b157 100644 --- a/lunaix-os/kernel/sched.c +++ b/lunaix-os/kernel/sched.c @@ -1,8 +1,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -12,7 +14,8 @@ #define MAX_PROCESS 512 -struct proc_info* __current; +volatile struct proc_info* __current; + struct proc_info dummy; extern void __proc_table; @@ -41,9 +44,17 @@ void run(struct proc_info* proc) { } proc->state = PROC_RUNNING; - __current = proc; + // FIXME: 这里还是得再考虑一下。 + // tss_update_esp(__current->intr_ctx.esp); - cpu_lcr3(__current->page_table); + if (__current->page_table != proc->page_table) { + __current = proc; + cpu_lcr3(__current->page_table); + // from now on, the we are in the kstack of another process + } + else { + __current = proc; + } apic_done_servicing(); @@ -67,7 +78,8 @@ void schedule() { } while(next->state != PROC_STOPPED && ptr != prev_ptr); sched_ctx.procs_index = ptr; - + + run(next); } @@ -77,6 +89,7 @@ static void proc_timer_callback(struct proc_info* proc) { } __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds) { + // FIXME: sleep的实现或许需要改一下。专门绑一个计时器好像没有必要…… if (!seconds) { return 0; } @@ -99,13 +112,32 @@ __DEFINE_LXSYSCALL(void, yield) { schedule(); } +__DEFINE_LXSYSCALL1(pid_t, wait, int*, status) { + pid_t cur = __current->pid; + struct proc_info *proc, *n; + if (llist_empty(&__current->children)) { + return -1; + } +repeat: + llist_for_each(proc, n, &__current->children, siblings) { + if (proc->state == PROC_TERMNAT) { + goto done; + } + } + // FIXME: 除了循环,也许有更高效的办法…… (在这里进行schedule,需要重写context switch!) + goto repeat; + +done: + *status = proc->exit_code; + return destroy_process(proc->pid); +} + pid_t alloc_pid() { pid_t i = 0; for (; i < sched_ctx.ptable_len && sched_ctx._procs[i].state != PROC_DESTROY; i++); if (i == MAX_PROCESS) { - __current->k_status = LXPROCFULL; - return -1; + panick("Process table is full"); } return i + 1; } @@ -121,27 +153,54 @@ void push_process(struct proc_info* process) { sched_ctx.ptable_len++; } - // every process is the parent of first process (pid=1) - process->parent = process->parent ? process->parent : &sched_ctx._procs; - process->state = PROC_STOPPED; - sched_ctx._procs[index] = *process; + + process = &sched_ctx._procs[index]; + + // make sure the address is in the range of process table + llist_init_head(&process->children); + // every process is the child of first process (pid=1) + if (process->parent) { + llist_append(&process->parent->children, &process->siblings); + } + else { + process->parent = &sched_ctx._procs[0]; + } + + process->state = PROC_STOPPED; } -void destroy_process(pid_t pid) { +// from +extern void __del_pagetable(pid_t pid, uintptr_t mount_point); + +pid_t destroy_process(pid_t pid) { int index = pid - 1; if (index <= 0 || index > sched_ctx.ptable_len) { __current->k_status = LXINVLDPID; return; } + struct proc_info *proc = &sched_ctx._procs[index]; + proc->state = PROC_DESTROY; + llist_delete(&proc->siblings); + + if (proc->mm.regions) { + struct mm_region *pos, *n; + llist_for_each(pos, n, &proc->mm.regions->head, head) { + lxfree(pos); + } + } + + vmm_mount_pd(PD_MOUNT_2, proc->page_table); + + __del_pagetable(pid, PD_MOUNT_2); - sched_ctx._procs[index].state = PROC_DESTROY; + vmm_unmount_pd(PD_MOUNT_2); - // TODO: recycle the physical pages used by page tables + return pid; } void terminate_proc(int exit_code) { - __current->state = exit_code < 0 ? PROC_SPOILED : PROC_TERMNAT; + __current->state = PROC_TERMNAT; __current->exit_code = exit_code; schedule(); @@ -161,7 +220,7 @@ int orphaned_proc(pid_t pid) { struct proc_info* proc = &sched_ctx._procs[pid-1]; struct proc_info* parent = proc->parent; - // 如果其父进程的状态是terminated, spoiled 或 destroy中的一种 + // 如果其父进程的状态是terminated 或 destroy中的一种 // 或者其父进程是在该进程之后创建的,那么该进程为孤儿进程 - return (parent->state & 0xe) || parent->created > proc->created; + return (parent->state & PROC_TERMMASK) || parent->created > proc->created; } \ No newline at end of file diff --git a/lunaix-os/makefile b/lunaix-os/makefile index 58bc9d7..0666dc4 100644 --- a/lunaix-os/makefile +++ b/lunaix-os/makefile @@ -47,8 +47,8 @@ all-debug: clean $(BUILD_DIR)/$(OS_ISO) @${TOOLCHAIN}/i686-elf-objdump -S $(BIN_DIR)/$(OS_BIN) > $(BUILD_DIR)/kdump.txt clean: - @rm -rf $(BUILD_DIR) - @sleep 1 + @rm -rf $(BUILD_DIR) || exit 1 + @sleep 2 run: $(BUILD_DIR)/$(OS_ISO) @qemu-system-i386 -cdrom $(BUILD_DIR)/$(OS_ISO) -monitor telnet::$(QEMU_MON_PORT),server,nowait &