#include <lunaix/mm/procvm.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/mm/region.h>
-#include <lunaix/mm/pmm.h>
-#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/page.h>
#include <lunaix/mm/mmap.h>
#include <lunaix/process.h>
-#include <sys/mm/mempart.h>
+#include <sys/mm/mm_defs.h>
#include <klibc/string.h>
struct proc_mm*
procvm_create(struct proc_info* proc) {
- struct proc_mm* mm = valloc(sizeof(struct proc_mm));
+ struct proc_mm* mm = vzalloc(sizeof(struct proc_mm));
assert(mm);
return mm;
}
+static inline unsigned int
+__ptep_advancement(struct leaflet* leaflet, int level)
+{
+ size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT;
+ return (1 << (leaflet_order(leaflet) % shifts)) - 1;
+}
static ptr_t
-__dup_vmspace(ptr_t mount_point, bool only_kernel)
+vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel)
{
- ptr_t ptd_pp = pmm_alloc_page(PP_FGPERSIST);
- vmm_set_mapping(VMS_SELF, PG_MOUNT_1, ptd_pp, PG_PREM_RW, VMAP_NULL);
+ pte_t* ptep_dest = mkl0tep(mkptep_va(dest_mnt, 0));
+ pte_t* ptep = mkl0tep(mkptep_va(src_mnt, 0));
+ pte_t* ptepd_kernel = mkl0tep(mkptep_va(dest_mnt, KERNEL_RESIDENT));
+ pte_t* ptep_kernel = mkl0tep(mkptep_va(src_mnt, KERNEL_RESIDENT));
+
+ // Build the self-reference on dest vms
+
+ /*
+ * -- What the heck are ptep_ssm and ptep_sms ? --
+ *
+ * ptep_dest point to the pagetable itself that is mounted
+ * at dest_mnt (or simply mnt):
+ * mnt -> self -> self -> self -> L0TE@offset
+ *
+ * ptep_sms shallowed the recursion chain:
+ * self -> mnt -> self -> self -> L0TE@self
+ *
+ * ptep_ssm shallowed the recursion chain:
+ * self -> self -> mnt -> self -> L0TE@self
+ *
+ * Now, here is the problem, back to x86_32, the translation is
+ * a depth-3 recursion:
+ * L0T -> LFT -> Page
+ *
+ * So ptep_ssm will terminate at mnt and give us a leaf
+ * slot for allocate a fresh page table for mnt:
+ * self -> self -> L0TE@mnt
+ *
+ * but in x86_64 translation has extra two more step:
+ * L0T -> L1T -> L2T -> LFT -> Page
+ *
+ * So we must continue push down....
+ * ptep_sssms shallowed the recursion chain:
+ * self -> self -> self -> mnt -> L0TE@self
+ *
+ * ptep_ssssm shallowed the recursion chain:
+ * self -> self -> self -> self -> L0TE@mnt
+ *
+ * Note: PML4: 2 extra steps
+ * PML5: 3 extra steps
+ */
+ pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt);
+ pte_t* ptep_sms = mkl1tep_va(VMS_SELF, dest_mnt) + VMS_SELF_L0TI;
+ pte_t pte_sms = mkpte_prot(KERNEL_DATA);
+
+ pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0);
+ set_pte(ptep_sms, pte_sms);
+
+ tlb_flush_kernel((ptr_t)dest_mnt);
+ tlb_flush_kernel((ptr_t)ptep_sms);
+
+ if (only_kernel) {
+ ptep = ptep_kernel;
+ ptep_dest += ptep_vfn(ptep_kernel);
+ } else {
+ ptep++;
+ ptep_dest++;
+ }
- x86_page_table* ptd = (x86_page_table*)PG_MOUNT_1;
- x86_page_table* pptd = (x86_page_table*)(mount_point | (0x3FF << 12));
+ int level = 0;
+ struct leaflet* leaflet;
- size_t kspace_l1inx = L1_INDEX(KERNEL_EXEC);
- size_t i = 1; // skip first 4MiB, to avoid bring other thread's stack
+ while (ptep < ptep_kernel)
+ {
+ pte_t pte = *ptep;
+
+ if (pte_isnull(pte)) {
+ goto cont;
+ }
+
+ if (pt_last_level(level) || pte_huge(pte)) {
+ set_pte(ptep_dest, pte);
+
+ if (pte_isloaded(pte)) {
+ leaflet = pte_leaflet(pte);
+ assert(leaflet_refcount(leaflet));
+
+ if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
+ leaflet_borrow(leaflet);
+ }
+ }
+ }
+ else if (!pt_last_level(level)) {
+ alloc_kpage_at(ptep_dest, pte, 0);
- ptd->entry[0] = 0;
- if (only_kernel) {
- i = kspace_l1inx;
- memset(ptd, 0, PG_SIZE);
+ ptep = ptep_step_into(ptep);
+ ptep_dest = ptep_step_into(ptep_dest);
+ level++;
+
+ continue;
+ }
+
+ cont:
+ while (ptep_vfn(ptep) == MAX_PTEN - 1) {
+ assert(level > 0);
+ ptep = ptep_step_out(ptep);
+ ptep_dest = ptep_step_out(ptep_dest);
+ level--;
+ }
+
+ ptep++;
+ ptep_dest++;
+ }
+
+ // Ensure we step back to L0T
+ assert(!level);
+ assert(ptep_dest == ptepd_kernel);
+
+ // Carry over the kernel (exclude last two entry)
+ unsigned int i = ptep_vfn(ptep);
+ while (i++ < MAX_PTEN) {
+ pte_t pte = *ptep;
+
+ if (l0tep_impile_vmnts(ptep)) {
+ goto _cont;
+ }
+
+ assert(!pte_isnull(pte));
+
+ // Ensure it is a next level pagetable,
+ // we MAY relax this later allow kernel
+ // to have huge leaflet mapped at L0T
+ leaflet = pte_leaflet_aligned(pte);
+ assert(leaflet_order(leaflet) == 0);
+
+ set_pte(ptep_dest, pte);
+ leaflet_borrow(leaflet);
+
+ _cont:
+ ptep++;
+ ptep_dest++;
}
- for (; i < PG_MAX_ENTRIES - 1; i++) {
+ return pte_paddr(pte_sms);
+}
+
+static void
+vmsfree(ptr_t vm_mnt)
+{
+ struct leaflet* leaflet;
+ pte_t* ptep_head = mkl0tep(mkptep_va(vm_mnt, 0));
+ pte_t* ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF));
+ pte_t* ptep_kernel = mkl0tep(mkptep_va(vm_mnt, KERNEL_RESIDENT));
+
+ int level = 0;
+ pte_t* ptep = ptep_head;
+ while (ptep < ptep_kernel)
+ {
+ pte_t pte = *ptep;
+ ptr_t pa = pte_paddr(pte);
+
+ if (pte_isnull(pte)) {
+ goto cont;
+ }
+
+ if (!pt_last_level(level) && !pte_huge(pte)) {
+ ptep = ptep_step_into(ptep);
+ level++;
- x86_pte_t ptde = pptd->entry[i];
- // 空或者是未在内存中的L1页表项直接照搬过去。
- // 内核地址空间直接共享过去。
- if (!ptde || i >= kspace_l1inx || !(ptde & PG_PRESENT)) {
- ptd->entry[i] = ptde;
continue;
}
- // 复制L2页表
- ptr_t pt_pp = pmm_alloc_page(PP_FGPERSIST);
- vmm_set_mapping(VMS_SELF, PG_MOUNT_2, pt_pp, PG_PREM_RW, VMAP_NULL);
+ if (pte_isloaded(pte)) {
+ leaflet = pte_leaflet_aligned(pte);
+ leaflet_return(leaflet);
- x86_page_table* ppt = (x86_page_table*)(mount_point | (i << 12));
- x86_page_table* pt = (x86_page_table*)PG_MOUNT_2;
+ ptep += __ptep_advancement(leaflet, level);
+ }
- for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
- x86_pte_t pte = ppt->entry[j];
- pmm_ref_page(PG_ENTRY_ADDR(pte));
- pt->entry[j] = pte;
+ cont:
+ while (ptep_vfn(ptep) == MAX_PTEN - 1) {
+ ptep = ptep_step_out(ptep);
+ leaflet = pte_leaflet_aligned(pte_at(ptep));
+
+ assert(leaflet_order(leaflet) == 0);
+ leaflet_return(leaflet);
+
+ level--;
}
- ptd->entry[i] = (ptr_t)pt_pp | PG_ENTRY_FLAGS(ptde);
+ ptep++;
}
- ptd->entry[PG_MAX_ENTRIES - 1] = NEW_L1_ENTRY(T_SELF_REF_PERM, ptd_pp);
+ leaflet = pte_leaflet_aligned(pte_at(ptep_self));
+ leaflet_return(leaflet);
+}
- return ptd_pp;
+static inline void
+__attach_to_current_vms(struct proc_mm* guest_mm)
+{
+ struct proc_mm* mm_current = vmspace(__current);
+ if (mm_current) {
+ assert(!mm_current->guest_mm);
+ mm_current->guest_mm = guest_mm;
+ }
}
+static inline void
+__detach_from_current_vms(struct proc_mm* guest_mm)
+{
+ struct proc_mm* mm_current = vmspace(__current);
+ if (mm_current) {
+ assert(mm_current->guest_mm == guest_mm);
+ mm_current->guest_mm = NULL;
+ }
+}
+
+
void
-procvm_dup(struct proc_info* proc) {
- struct proc_mm* mm = vmspace(proc);
- struct proc_mm* mm_current = vmspace(__current);
-
- mm->heap = mm_current->heap;
- mm->vmroot = __dup_vmspace(VMS_SELF, false);
+procvm_dupvms_mount(struct proc_mm* mm) {
+ assert(__current);
+ assert(!mm->vm_mnt);
+
+ struct proc_mm* mm_current = vmspace(__current);
+
+ __attach_to_current_vms(mm);
- region_copy_mm(mm_current, mm);
+ mm->heap = mm_current->heap;
+ mm->vm_mnt = VMS_MOUNT_1;
+ mm->vmroot = vmscpy(VMS_MOUNT_1, VMS_SELF, false);
+
+ region_copy_mm(mm_current, mm);
}
void
-procvm_init_clean(struct proc_info* proc)
+procvm_mount(struct proc_mm* mm)
{
- struct proc_mm* mm = vmspace(proc);
- mm->vmroot = __dup_vmspace(VMS_SELF, true);
-}
+ assert(!mm->vm_mnt);
+ assert(mm->vmroot);
+ vms_mount(VMS_MOUNT_1, mm->vmroot);
-static void
-__delete_vmspace(ptr_t vm_mnt)
-{
- x86_page_table* pptd = (x86_page_table*)(vm_mnt | (0x3FF << 12));
+ __attach_to_current_vms(mm);
- // only remove user address space
- for (size_t i = 0; i < L1_INDEX(KERNEL_EXEC); i++) {
- x86_pte_t ptde = pptd->entry[i];
- if (!ptde || !(ptde & PG_PRESENT)) {
- continue;
- }
+ mm->vm_mnt = VMS_MOUNT_1;
+}
- x86_page_table* ppt = (x86_page_table*)(vm_mnt | (i << 12));
+void
+procvm_unmount(struct proc_mm* mm)
+{
+ assert(mm->vm_mnt);
- for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
- x86_pte_t pte = ppt->entry[j];
- // free the 4KB data page
- if ((pte & PG_PRESENT)) {
- pmm_free_page(PG_ENTRY_ADDR(pte));
- }
- }
- // free the L2 page table
- pmm_free_page(PG_ENTRY_ADDR(ptde));
+ vms_unmount(VMS_MOUNT_1);
+ struct proc_mm* mm_current = vmspace(__current);
+ if (mm_current) {
+ mm_current->guest_mm = NULL;
}
- // free the L1 directory
- pmm_free_page(PG_ENTRY_ADDR(pptd->entry[PG_MAX_ENTRIES - 1]));
+
+ mm->vm_mnt = 0;
}
void
-procvm_cleanup(ptr_t vm_mnt, struct proc_info* proc) {
+procvm_initvms_mount(struct proc_mm* mm)
+{
+ assert(!mm->vm_mnt);
+
+ __attach_to_current_vms(mm);
+
+ mm->vm_mnt = VMS_MOUNT_1;
+ mm->vmroot = vmscpy(VMS_MOUNT_1, VMS_SELF, true);
+}
+
+void
+procvm_unmount_release(struct proc_mm* mm) {
+ ptr_t vm_mnt = mm->vm_mnt;
struct mm_region *pos, *n;
- llist_for_each(pos, n, vmregions(proc), head)
+ llist_for_each(pos, n, &mm->regions, head)
{
mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
region_release(pos);
}
- vfree(proc->mm);
+ vfree(mm);
+ vmsfree(vm_mnt);
+ vms_unmount(vm_mnt);
+
+ __detach_from_current_vms(mm);
+}
+
+void
+procvm_mount_self(struct proc_mm* mm)
+{
+ assert(!mm->vm_mnt);
+ assert(!mm->guest_mm);
+
+ mm->vm_mnt = VMS_SELF;
+}
+
+void
+procvm_unmount_self(struct proc_mm* mm)
+{
+ assert(active_vms(mm->vm_mnt));
- __delete_vmspace(vm_mnt);
+ mm->vm_mnt = 0;
}
ptr_t
procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm,
- ptr_t vm_mnt, ptr_t remote_base, size_t size)
+ ptr_t remote_base, size_t size)
{
- ptr_t size_pn = PN(size + MEM_PAGE);
+ ptr_t vm_mnt = mm->vm_mnt;
+ assert(vm_mnt);
+
+ pfn_t size_pn = pfn(size + PAGE_SIZE);
assert(size_pn < REMOTEVM_MAX_PAGES);
struct mm_region* region = region_get(&mm->regions, remote_base);
rvmctx->vms_mnt = vm_mnt;
rvmctx->page_cnt = size_pn;
- remote_base = PG_ALIGN(remote_base);
+ remote_base = page_aligned(remote_base);
rvmctx->remote = remote_base;
- rvmctx->local_mnt = PG_MOUNT_4_END + 1;
+ rvmctx->local_mnt = PG_MOUNT_VAR;
+
+ pte_t* rptep = mkptep_va(vm_mnt, remote_base);
+ pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
+ unsigned int pattr = region_pteprot(region);
- v_mapping m;
- unsigned int pattr = region_ptattr(region);
- ptr_t raddr = remote_base, lmnt = rvmctx->local_mnt;
- for (size_t i = 0; i < size_pn; i++, lmnt += MEM_PAGE, raddr += MEM_PAGE)
+ for (size_t i = 0; i < size_pn; i++)
{
- if (vmm_lookupat(vm_mnt, raddr, &m) && PG_IS_PRESENT(m.flags)) {
- vmm_set_mapping(VMS_SELF, lmnt, m.pa, PG_PREM_RW, 0);
+ pte_t pte = vmm_tryptep(rptep, PAGE_SIZE);
+ if (pte_isloaded(pte)) {
+ set_pte(lptep, pte);
continue;
}
- ptr_t pa = pmm_alloc_page(0);
- vmm_set_mapping(VMS_SELF, lmnt, pa, PG_PREM_RW, 0);
- vmm_set_mapping(vm_mnt, raddr, pa, pattr, 0);
+ ptr_t pa = ppage_addr(pmm_alloc_normal(0));
+ set_pte(lptep, mkpte(pa, KERNEL_DATA));
+ set_pte(rptep, mkpte(pa, pattr));
}
return vm_mnt;
}
ptr_t offset = remote_dest - rvmctx->remote;
- if (PN(offset + sz) >= rvmctx->page_cnt) {
+ if (pfn(offset + sz) >= rvmctx->page_cnt) {
return -1;
}
}
void
-procvm_exit_remote_transaction(struct remote_vmctx* rvmctx)
+procvm_exit_remote(struct remote_vmctx* rvmctx)
{
- ptr_t lmnt = rvmctx->local_mnt;
- for (size_t i = 0; i < rvmctx->page_cnt; i++, lmnt += MEM_PAGE)
- {
- vmm_del_mapping(VMS_SELF, lmnt);
- }
+ pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
+ vmm_unset_ptes(lptep, rvmctx->page_cnt);
}
\ No newline at end of file