#include <lunaix/mm/procvm.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/mm/region.h>
-#include <lunaix/mm/pmm.h>
-#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/page.h>
#include <lunaix/mm/mmap.h>
#include <lunaix/process.h>
return mm;
}
+static inline unsigned int
+__ptep_advancement(struct leaflet* leaflet, int level)
+{
+ size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT;
+ return (1 << (leaflet_order(leaflet) % shifts)) - 1;
+}
+
static ptr_t
vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel)
{
pte_t* ptep_kernel = mkl0tep(mkptep_va(src_mnt, KERNEL_RESIDENT));
// Build the self-reference on dest vms
- pte_t* ptep_sms = mkptep_va(VMS_SELF, (ptr_t)ptep_dest);
- pte_t* ptep_ssm = mkptep_va(VMS_SELF, (ptr_t)ptep_sms);
- pte_t pte_sms = mkpte_prot(KERNEL_DATA);
- pte_sms = vmm_alloc_page(ptep_ssm, pte_sms);
+ /*
+ * -- What the heck are ptep_ssm and ptep_sms ? --
+ *
+ * ptep_dest point to the pagetable itself that is mounted
+ * at dest_mnt (or simply mnt):
+ * mnt -> self -> self -> self -> L0TE@offset
+ *
+ * ptep_sms shallowed the recursion chain:
+ * self -> mnt -> self -> self -> L0TE@self
+ *
+ * ptep_ssm shallowed the recursion chain:
+ * self -> self -> mnt -> self -> L0TE@self
+ *
+ * Now, here is the problem, back to x86_32, the translation is
+ * a depth-3 recursion:
+ * L0T -> LFT -> Page
+ *
+ * So ptep_ssm will terminate at mnt and give us a leaf
+ * slot for allocate a fresh page table for mnt:
+ * self -> self -> L0TE@mnt
+ *
+ * but in x86_64 translation has extra two more step:
+ * L0T -> L1T -> L2T -> LFT -> Page
+ *
+ * So we must continue push down....
+ * ptep_sssms shallowed the recursion chain:
+ * self -> self -> self -> mnt -> L0TE@self
+ *
+ * ptep_ssssm shallowed the recursion chain:
+ * self -> self -> self -> self -> L0TE@mnt
+ *
+ * Note: PML4: 2 extra steps
+ * PML5: 3 extra steps
+ */
+ pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt);
+ pte_t* ptep_sms = mkl1tep_va(VMS_SELF, dest_mnt) + VMS_SELF_L0TI;
+ pte_t pte_sms = mkpte_prot(KERNEL_PGTAB);
+
+ pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0);
set_pte(ptep_sms, pte_sms);
- cpu_flush_page((ptr_t)dest_mnt);
+ tlb_flush_kernel((ptr_t)dest_mnt);
+ tlb_flush_kernel((ptr_t)ptep_sms);
if (only_kernel) {
ptep = ptep_kernel;
}
int level = 0;
+ struct leaflet* leaflet;
+
while (ptep < ptep_kernel)
{
pte_t pte = *ptep;
- ptr_t pa = pte_paddr(pte);
if (pte_isnull(pte)) {
goto cont;
if (pt_last_level(level) || pte_huge(pte)) {
set_pte(ptep_dest, pte);
-
- if (pte_isloaded(pte))
- pmm_ref_page(pa);
+
+ if (pte_isloaded(pte)) {
+ leaflet = pte_leaflet(pte);
+ assert(leaflet_refcount(leaflet));
+
+ if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
+ leaflet_borrow(leaflet);
+ }
+ }
}
else if (!pt_last_level(level)) {
- vmm_alloc_page(ptep_dest, pte);
+ alloc_kpage_at(ptep_dest, pte, 0);
ptep = ptep_step_into(ptep);
ptep_dest = ptep_step_into(ptep_dest);
}
cont:
- if (ptep_vfn(ptep) == MAX_PTEN - 1) {
+ while (ptep_vfn(ptep) == MAX_PTEN - 1) {
assert(level > 0);
ptep = ptep_step_out(ptep);
ptep_dest = ptep_step_out(ptep_dest);
assert(ptep_dest == ptepd_kernel);
// Carry over the kernel (exclude last two entry)
- while (ptep_vfn(ptep) < MAX_PTEN - 2) {
+ unsigned int i = ptep_vfn(ptep);
+ while (i++ < MAX_PTEN) {
pte_t pte = *ptep;
+
+ if (l0tep_implie_vmnts(ptep)) {
+ goto _cont;
+ }
+
assert(!pte_isnull(pte));
+ // Ensure it is a next level pagetable,
+ // we MAY relax this later allow kernel
+ // to have huge leaflet mapped at L0T
+ leaflet = pte_leaflet_aligned(pte);
+ assert(leaflet_order(leaflet) == 0);
+
set_pte(ptep_dest, pte);
- pmm_ref_page(pte_paddr(pte));
-
+ leaflet_borrow(leaflet);
+
+ _cont:
ptep++;
ptep_dest++;
}
- return pte_paddr(*(ptep_dest + 1));
+ return pte_paddr(pte_sms);
}
-static void optimize("O0")
+static void
vmsfree(ptr_t vm_mnt)
{
+ struct leaflet* leaflet;
pte_t* ptep_head = mkl0tep(mkptep_va(vm_mnt, 0));
+ pte_t* ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF));
pte_t* ptep_kernel = mkl0tep(mkptep_va(vm_mnt, KERNEL_RESIDENT));
int level = 0;
- volatile pte_t* ptep = ptep_head;
+ pte_t* ptep = ptep_head;
while (ptep < ptep_kernel)
{
pte_t pte = *ptep;
continue;
}
- if (pte_isloaded(pte))
- pmm_free_any(pa);
+ if (pte_isloaded(pte)) {
+ leaflet = pte_leaflet_aligned(pte);
+ leaflet_return(leaflet);
+
+ ptep += __ptep_advancement(leaflet, level);
+ }
cont:
- if (ptep_vfn(ptep) == MAX_PTEN - 1) {
+ while (ptep_vfn(ptep) == MAX_PTEN - 1) {
ptep = ptep_step_out(ptep);
- pmm_free_any(pte_paddr(pte_at(ptep)));
+ leaflet = pte_leaflet_aligned(pte_at(ptep));
+
+ assert(leaflet_order(leaflet) == 0);
+ leaflet_return(leaflet);
+
level--;
}
ptep++;
}
- ptr_t self_pa = pte_paddr(ptep_head[MAX_PTEN - 1]);
- pmm_free_any(self_pa);
+ leaflet = pte_leaflet_aligned(pte_at(ptep_self));
+ leaflet_return(leaflet);
}
static inline void
void
procvm_mount(struct proc_mm* mm)
{
+ // if current mm is already active
+ if (active_vms(mm->vm_mnt)) {
+ return;
+ }
+
+ // we are double mounting
assert(!mm->vm_mnt);
assert(mm->vmroot);
void
procvm_unmount(struct proc_mm* mm)
{
+ if (active_vms(mm->vm_mnt)) {
+ return;
+ }
+
assert(mm->vm_mnt);
-
vms_unmount(VMS_MOUNT_1);
+
struct proc_mm* mm_current = vmspace(__current);
if (mm_current) {
mm_current->guest_mm = NULL;
procvm_mount_self(struct proc_mm* mm)
{
assert(!mm->vm_mnt);
- assert(!mm->guest_mm);
mm->vm_mnt = VMS_SELF;
}
void
procvm_unmount_self(struct proc_mm* mm)
{
- assert(mm->vm_mnt == VMS_SELF);
+ assert(active_vms(mm->vm_mnt));
mm->vm_mnt = 0;
}
ptr_t vm_mnt = mm->vm_mnt;
assert(vm_mnt);
- pfn_t size_pn = pfn(size + MEM_PAGE);
+ pfn_t size_pn = pfn(size + PAGE_SIZE);
assert(size_pn < REMOTEVM_MAX_PAGES);
struct mm_region* region = region_get(&mm->regions, remote_base);
rvmctx->vms_mnt = vm_mnt;
rvmctx->page_cnt = size_pn;
- remote_base = va_align(remote_base);
+ remote_base = page_aligned(remote_base);
rvmctx->remote = remote_base;
- rvmctx->local_mnt = PG_MOUNT_4_END + 1;
+ rvmctx->local_mnt = PG_MOUNT_VAR;
pte_t* rptep = mkptep_va(vm_mnt, remote_base);
pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
- unsigned int pattr = region_pteprot(region);
+
+ pte_t pte, rpte = null_pte;
+ rpte = region_tweakpte(region, rpte);
for (size_t i = 0; i < size_pn; i++)
{
- pte_t pte = vmm_tryptep(rptep, PAGE_SIZE);
+ pte = vmm_tryptep(rptep, PAGE_SIZE);
if (pte_isloaded(pte)) {
set_pte(lptep, pte);
continue;
}
- ptr_t pa = pmm_alloc_page(0);
+ ptr_t pa = ppage_addr(pmm_alloc_normal(0));
set_pte(lptep, mkpte(pa, KERNEL_DATA));
- set_pte(rptep, mkpte(pa, pattr));
+ set_pte(rptep, pte_setpaddr(rpte, pa));
}
return vm_mnt;