refactor: add a async read/write variant to device ops, with allow async io to be...
[lunaix-os.git] / lunaix-os / kernel / process / process.c
index 1647e10dd8bc24765a4d9bc50283c3bc66416f50..bdd904917d2d2a8c6dd7b0032cf5971be8b70648 100644 (file)
@@ -1,6 +1,6 @@
 #include <klibc/string.h>
 #include <lunaix/clock.h>
-#include <lunaix/common.h>
+#include <lunaix/mm/mmap.h>
 #include <lunaix/mm/pmm.h>
 #include <lunaix/mm/region.h>
 #include <lunaix/mm/valloc.h>
 #include <lunaix/syscall.h>
 #include <lunaix/syslog.h>
 
+#include <sys/abi.h>
+#include <sys/mm/mempart.h>
+
 LOG_MODULE("PROC")
 
-void*
-__dup_pagetable(pid_t pid, uintptr_t mount_point)
+ptr_t
+__dup_pagetable(pid_t pid, ptr_t mount_point)
 {
-    void* ptd_pp = pmm_alloc_page(pid, PP_FGPERSIST);
+    ptr_t ptd_pp = pmm_alloc_page(pid, PP_FGPERSIST);
     vmm_set_mapping(VMS_SELF, PG_MOUNT_1, ptd_pp, PG_PREM_RW, VMAP_NULL);
 
-    x86_page_table* ptd = PG_MOUNT_1;
+    x86_page_table* ptd = (x86_page_table*)PG_MOUNT_1;
     x86_page_table* pptd = (x86_page_table*)(mount_point | (0x3FF << 12));
 
-    size_t kspace_l1inx = L1_INDEX(KERNEL_MM_BASE);
+    size_t kspace_l1inx = L1_INDEX(KERNEL_EXEC);
 
     for (size_t i = 0; i < PG_MAX_ENTRIES - 1; i++) {
 
@@ -35,11 +38,11 @@ __dup_pagetable(pid_t pid, uintptr_t mount_point)
         }
 
         // 复制L2页表
-        void* pt_pp = pmm_alloc_page(pid, PP_FGPERSIST);
+        ptr_t pt_pp = pmm_alloc_page(pid, PP_FGPERSIST);
         vmm_set_mapping(VMS_SELF, PG_MOUNT_2, pt_pp, PG_PREM_RW, VMAP_NULL);
 
         x86_page_table* ppt = (x86_page_table*)(mount_point | (i << 12));
-        x86_page_table* pt = PG_MOUNT_2;
+        x86_page_table* pt = (x86_page_table*)PG_MOUNT_2;
 
         for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
             x86_pte_t pte = ppt->entry[j];
@@ -47,7 +50,7 @@ __dup_pagetable(pid_t pid, uintptr_t mount_point)
             pt->entry[j] = pte;
         }
 
-        ptd->entry[i] = (uintptr_t)pt_pp | PG_ENTRY_FLAGS(ptde);
+        ptd->entry[i] = (ptr_t)pt_pp | PG_ENTRY_FLAGS(ptde);
     }
 
     ptd->entry[PG_MAX_ENTRIES - 1] = NEW_L1_ENTRY(T_SELF_REF_PERM, ptd_pp);
@@ -56,12 +59,12 @@ __dup_pagetable(pid_t pid, uintptr_t mount_point)
 }
 
 void
-__del_pagetable(pid_t pid, uintptr_t mount_point)
+__del_pagetable(pid_t pid, ptr_t mount_point)
 {
     x86_page_table* pptd = (x86_page_table*)(mount_point | (0x3FF << 12));
 
     // only remove user address space
-    for (size_t i = 0; i < L1_INDEX(KERNEL_MM_BASE); i++) {
+    for (size_t i = 0; i < L1_INDEX(KERNEL_EXEC); i++) {
         x86_pte_t ptde = pptd->entry[i];
         if (!ptde || !(ptde & PG_PRESENT)) {
             continue;
@@ -83,7 +86,7 @@ __del_pagetable(pid_t pid, uintptr_t mount_point)
     pmm_free_page(pid, PG_ENTRY_ADDR(pptd->entry[PG_MAX_ENTRIES - 1]));
 }
 
-void*
+ptr_t
 vmm_dup_vmspace(pid_t pid)
 {
     return __dup_pagetable(pid, VMS_SELF);
@@ -134,6 +137,12 @@ __DEFINE_LXSYSCALL2(int, setpgid, pid_t, pid, pid_t, pgid)
     return 0;
 }
 
+void
+__stack_copied(struct mm_region* region)
+{
+    mm_index((void**)&region->proc_vms->stack, region);
+}
+
 void
 init_proc_user_space(struct proc_info* pcb)
 {
@@ -141,35 +150,41 @@ init_proc_user_space(struct proc_info* pcb)
 
     /*---  分配用户栈  ---*/
 
-    struct mm_region* stack_vm;
-
-    stack_vm = region_create_range(
-      USTACK_END, USTACK_SIZE, REGION_RW | REGION_RSHARED | REGION_ANON);
-    // 注册用户栈区域
-    region_add(&pcb->mm.regions, stack_vm);
-
-    // 预留地址空间,具体物理页将由Page Fault Handler按需分配。
-    for (uintptr_t i = PG_ALIGN(USTACK_END); i < USTACK_TOP; i += PG_SIZE) {
-        vmm_set_mapping(VMS_MOUNT_1, i, 0, PG_ALLOW_USER | PG_WRITE, VMAP_NULL);
+    struct mm_region* mapped;
+    struct mmap_param param = { .vms_mnt = VMS_MOUNT_1,
+                                .pvms = &pcb->mm,
+                                .mlen = USR_STACK_SIZE,
+                                .proct = PROT_READ | PROT_WRITE,
+                                .flags = MAP_ANON | MAP_PRIVATE | MAP_FIXED,
+                                .type = REGION_TYPE_STACK };
+
+    int status = 0;
+    if ((status = mem_map(NULL, &mapped, USR_STACK, NULL, &param))) {
+        kprintf(KFATAL "fail to alloc user stack: %d", status);
     }
 
+    mapped->region_copied = __stack_copied;
+    mm_index((void**)&pcb->mm.stack, mapped);
+
     // TODO other uspace initialization stuff
 
     vmm_unmount_pd(VMS_MOUNT_1);
 }
 
 void
-__mark_region(uintptr_t start_vpn, uintptr_t end_vpn, int attr)
+__mark_region(ptr_t start_vpn, ptr_t end_vpn, int attr)
 {
     for (size_t i = start_vpn; i <= end_vpn; i++) {
         x86_pte_t* curproc = &PTE_MOUNTED(VMS_SELF, i);
         x86_pte_t* newproc = &PTE_MOUNTED(VMS_MOUNT_1, i);
-        cpu_invplg(newproc);
+
+        cpu_flush_page((ptr_t)newproc);
 
         if ((attr & REGION_MODE_MASK) == REGION_RSHARED) {
             // 如果读共享,则将两者的都标注为只读,那么任何写入都将会应用COW策略。
-            cpu_invplg(curproc);
-            cpu_invplg(i << 12);
+            cpu_flush_page((ptr_t)curproc);
+            cpu_flush_page((ptr_t)(i << 12));
+
             *curproc = *curproc & ~PG_WRITE;
             *newproc = *newproc & ~PG_WRITE;
         } else {
@@ -194,21 +209,24 @@ pid_t
 dup_proc()
 {
     struct proc_info* pcb = alloc_process();
-    pcb->mm.u_heap = __current->mm.u_heap;
     pcb->intr_ctx = __current->intr_ctx;
     pcb->parent = __current;
 
-    memcpy(pcb->fxstate, __current->fxstate, 512);
-
     if (__current->cwd) {
         pcb->cwd = __current->cwd;
         vfs_ref_dnode(pcb->cwd);
     }
 
     __copy_fdtable(pcb);
-    region_copy(&__current->mm.regions, &pcb->mm.regions);
+    region_copy_mm(&__current->mm, &pcb->mm);
 
-    setup_proc_mem(pcb, VMS_SELF);
+    /*
+     *  store the return value for forked process.
+     *  this will be implicit carried over after kernel stack is copied.
+     */
+    store_retval(0);
+
+    copy_kernel_stack(pcb, VMS_SELF);
 
     // 根据 mm_region 进一步配置页表
 
@@ -220,34 +238,31 @@ dup_proc()
             continue;
         }
 
-        uintptr_t start_vpn = pos->start >> 12;
-        uintptr_t end_vpn = pos->end >> 12;
+        ptr_t start_vpn = pos->start >> 12;
+        ptr_t end_vpn = pos->end >> 12;
         __mark_region(start_vpn, end_vpn, pos->attr);
     }
 
     vmm_unmount_pd(VMS_MOUNT_1);
 
-    // 正如同fork,返回两次。
-    pcb->intr_ctx.registers.eax = 0;
-
     commit_process(pcb);
 
     return pcb->pid;
 }
 
-extern void __kernel_end;
+extern void __kexec_end;
 
 void
-setup_proc_mem(struct proc_info* proc, uintptr_t usedMnt)
+copy_kernel_stack(struct proc_info* proc, ptr_t usedMnt)
 {
     // copy the entire kernel page table
     pid_t pid = proc->pid;
-    void* pt_copy = __dup_pagetable(pid, usedMnt);
+    ptr_t pt_copy = __dup_pagetable(pid, usedMnt);
 
     vmm_mount_pd(VMS_MOUNT_1, pt_copy); // 将新进程的页表挂载到挂载点#2
 
     // copy the kernel stack
-    for (size_t i = KSTACK_START >> 12; i <= KSTACK_TOP >> 12; i++) {
+    for (size_t i = KERNEL_STACK >> 12; i <= KERNEL_STACK_END >> 12; i++) {
         volatile x86_pte_t* ppte = &PTE_MOUNTED(VMS_MOUNT_1, i);
 
         /*
@@ -258,17 +273,13 @@ setup_proc_mem(struct proc_info* proc, uintptr_t usedMnt)
             In the name of Celestia our glorious goddess, I will fucking HATE
            the TLB for the rest of my LIFE!
         */
-        cpu_invplg(ppte);
+        cpu_flush_page((ptr_t)ppte);
 
         x86_pte_t p = *ppte;
-        void* ppa = vmm_dup_page(pid, PG_ENTRY_ADDR(p));
+        ptr_t ppa = vmm_dup_page(pid, PG_ENTRY_ADDR(p));
         pmm_free_page(pid, PG_ENTRY_ADDR(p));
-        *ppte = (p & 0xfff) | (uintptr_t)ppa;
+        *ppte = (p & 0xfff) | ppa;
     }
 
-    // 我们不需要分配内核的区域,因为所有的内核代码和数据段只能通过系统调用来访问,任何非法的访问
-    // 都会导致eip落在区域外面,从而segmentation fault.
-
-    // 至于其他的区域我们暂时没有办法知道,因为那需要知道用户程序的信息。我们留到之后在处理。
     proc->page_table = pt_copy;
 }
\ No newline at end of file