X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/blobdiff_plain/69777bdcab284335651a8002e2896f3862fa423d..28c176b668c841a3b7fb093faccf0efa39257603:/lunaix-os/kernel/mm/procvm.c diff --git a/lunaix-os/kernel/mm/procvm.c b/lunaix-os/kernel/mm/procvm.c index ea49624..5b7afad 100644 --- a/lunaix-os/kernel/mm/procvm.c +++ b/lunaix-os/kernel/mm/procvm.c @@ -1,8 +1,7 @@ #include #include #include -#include -#include +#include #include #include @@ -23,6 +22,13 @@ procvm_create(struct proc_info* proc) { return mm; } +static inline unsigned int +__ptep_advancement(struct leaflet* leaflet, int level) +{ + size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT; + return (1 << (leaflet_order(leaflet) % shifts)) - 1; +} + static ptr_t vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) { @@ -32,14 +38,50 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) pte_t* ptep_kernel = mkl0tep(mkptep_va(src_mnt, KERNEL_RESIDENT)); // Build the self-reference on dest vms - pte_t* ptep_sms = mkptep_va(VMS_SELF, (ptr_t)ptep_dest); - pte_t* ptep_ssm = mkptep_va(VMS_SELF, (ptr_t)ptep_sms); + + /* + * -- What the heck are ptep_ssm and ptep_sms ? -- + * + * ptep_dest point to the pagetable itself that is mounted + * at dest_mnt (or simply mnt): + * mnt -> self -> self -> self -> L0TE@offset + * + * ptep_sms shallowed the recursion chain: + * self -> mnt -> self -> self -> L0TE@self + * + * ptep_ssm shallowed the recursion chain: + * self -> self -> mnt -> self -> L0TE@self + * + * Now, here is the problem, back to x86_32, the translation is + * a depth-3 recursion: + * L0T -> LFT -> Page + * + * So ptep_ssm will terminate at mnt and give us a leaf + * slot for allocate a fresh page table for mnt: + * self -> self -> L0TE@mnt + * + * but in x86_64 translation has extra two more step: + * L0T -> L1T -> L2T -> LFT -> Page + * + * So we must continue push down.... + * ptep_sssms shallowed the recursion chain: + * self -> self -> self -> mnt -> L0TE@self + * + * ptep_ssssm shallowed the recursion chain: + * self -> self -> self -> self -> L0TE@mnt + * + * Note: PML4: 2 extra steps + * PML5: 3 extra steps + */ + pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt); + pte_t* ptep_sms = mkl1tep_va(VMS_SELF, dest_mnt) + VMS_SELF_L0TI; pte_t pte_sms = mkpte_prot(KERNEL_DATA); - pte_sms = vmm_alloc_page(ptep_ssm, pte_sms); + pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0); set_pte(ptep_sms, pte_sms); - cpu_flush_page((ptr_t)dest_mnt); + tlb_flush_kernel((ptr_t)dest_mnt); + tlb_flush_kernel((ptr_t)ptep_sms); if (only_kernel) { ptep = ptep_kernel; @@ -50,10 +92,11 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) } int level = 0; + struct leaflet* leaflet; + while (ptep < ptep_kernel) { pte_t pte = *ptep; - ptr_t pa = pte_paddr(pte); if (pte_isnull(pte)) { goto cont; @@ -61,12 +104,18 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) if (pt_last_level(level) || pte_huge(pte)) { set_pte(ptep_dest, pte); - - if (pte_isloaded(pte)) - pmm_ref_page(pa); + + if (pte_isloaded(pte)) { + leaflet = pte_leaflet(pte); + assert(leaflet_refcount(leaflet)); + + if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) { + leaflet_borrow(leaflet); + } + } } else if (!pt_last_level(level)) { - vmm_alloc_page(ptep_dest, pte); + alloc_kpage_at(ptep_dest, pte, 0); ptep = ptep_step_into(ptep); ptep_dest = ptep_step_into(ptep_dest); @@ -76,7 +125,7 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) } cont: - if (ptep_vfn(ptep) == MAX_PTEN - 1) { + while (ptep_vfn(ptep) == MAX_PTEN - 1) { assert(level > 0); ptep = ptep_step_out(ptep); ptep_dest = ptep_step_out(ptep_dest); @@ -92,28 +141,43 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) assert(ptep_dest == ptepd_kernel); // Carry over the kernel (exclude last two entry) - while (ptep_vfn(ptep) < MAX_PTEN - 2) { + unsigned int i = ptep_vfn(ptep); + while (i++ < MAX_PTEN) { pte_t pte = *ptep; + + if (l0tep_impile_vmnts(ptep)) { + goto _cont; + } + assert(!pte_isnull(pte)); + // Ensure it is a next level pagetable, + // we MAY relax this later allow kernel + // to have huge leaflet mapped at L0T + leaflet = pte_leaflet_aligned(pte); + assert(leaflet_order(leaflet) == 0); + set_pte(ptep_dest, pte); - pmm_ref_page(pte_paddr(pte)); - + leaflet_borrow(leaflet); + + _cont: ptep++; ptep_dest++; } - return pte_paddr(*(ptep_dest + 1)); + return pte_paddr(pte_sms); } -static void optimize("O0") +static void vmsfree(ptr_t vm_mnt) { + struct leaflet* leaflet; pte_t* ptep_head = mkl0tep(mkptep_va(vm_mnt, 0)); + pte_t* ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF)); pte_t* ptep_kernel = mkl0tep(mkptep_va(vm_mnt, KERNEL_RESIDENT)); int level = 0; - volatile pte_t* ptep = ptep_head; + pte_t* ptep = ptep_head; while (ptep < ptep_kernel) { pte_t pte = *ptep; @@ -130,21 +194,29 @@ vmsfree(ptr_t vm_mnt) continue; } - if (pte_isloaded(pte)) - pmm_free_any(pa); + if (pte_isloaded(pte)) { + leaflet = pte_leaflet_aligned(pte); + leaflet_return(leaflet); + + ptep += __ptep_advancement(leaflet, level); + } cont: - if (ptep_vfn(ptep) == MAX_PTEN - 1) { + while (ptep_vfn(ptep) == MAX_PTEN - 1) { ptep = ptep_step_out(ptep); - pmm_free_any(pte_paddr(pte_at(ptep))); + leaflet = pte_leaflet_aligned(pte_at(ptep)); + + assert(leaflet_order(leaflet) == 0); + leaflet_return(leaflet); + level--; } ptep++; } - ptr_t self_pa = pte_paddr(ptep_head[MAX_PTEN - 1]); - pmm_free_any(self_pa); + leaflet = pte_leaflet_aligned(pte_at(ptep_self)); + leaflet_return(leaflet); } static inline void @@ -251,7 +323,7 @@ procvm_mount_self(struct proc_mm* mm) void procvm_unmount_self(struct proc_mm* mm) { - assert(mm->vm_mnt == VMS_SELF); + assert(active_vms(mm->vm_mnt)); mm->vm_mnt = 0; } @@ -263,7 +335,7 @@ procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm, ptr_t vm_mnt = mm->vm_mnt; assert(vm_mnt); - pfn_t size_pn = pfn(size + MEM_PAGE); + pfn_t size_pn = pfn(size + PAGE_SIZE); assert(size_pn < REMOTEVM_MAX_PAGES); struct mm_region* region = region_get(&mm->regions, remote_base); @@ -272,9 +344,9 @@ procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm, rvmctx->vms_mnt = vm_mnt; rvmctx->page_cnt = size_pn; - remote_base = va_align(remote_base); + remote_base = page_aligned(remote_base); rvmctx->remote = remote_base; - rvmctx->local_mnt = PG_MOUNT_4_END + 1; + rvmctx->local_mnt = PG_MOUNT_VAR; pte_t* rptep = mkptep_va(vm_mnt, remote_base); pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt); @@ -288,7 +360,7 @@ procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm, continue; } - ptr_t pa = pmm_alloc_page(0); + ptr_t pa = ppage_addr(pmm_alloc_normal(0)); set_pte(lptep, mkpte(pa, KERNEL_DATA)); set_pte(rptep, mkpte(pa, pattr)); }