#include <lunaix/mm/page.h>
#include <lunaix/mm/mmap.h>
#include <lunaix/process.h>
+#include <lunaix/syslog.h>
-#include <sys/mm/mm_defs.h>
+#include <asm/mm_defs.h>
#include <klibc/string.h>
+#define alloc_pagetable_trace(ptep, pte, ord, level) \
+ ({ \
+ alloc_kpage_at(ptep, pte, ord); \
+ })
+
+#define free_pagetable_trace(ptep, pte, level) \
+ ({ \
+ struct leaflet* leaflet = pte_leaflet_aligned(pte); \
+ assert(leaflet_order(leaflet) == 0); \
+ leaflet_return(leaflet); \
+ set_pte(ptep, null_pte); \
+ })
+
struct proc_mm*
procvm_create(struct proc_info* proc) {
struct proc_mm* mm = vzalloc(sizeof(struct proc_mm));
return (1 << (leaflet_order(leaflet) % shifts)) - 1;
}
-static ptr_t
-vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel)
+static inline int
+__descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc)
+{
+ pte_t *dest, *src, pte;
+
+ int i = 0;
+ while (!pt_last_level(i))
+ {
+ dest = mklntep_va(i, dest_mnt, va);
+ src = mklntep_va(i, src_mnt, va);
+ pte = pte_at(src);
+
+ if (!pte_isloaded(pte) || pte_huge(pte)) {
+ break;
+ }
+
+ if (alloc && pte_isnull(pte_at(dest))) {
+ alloc_pagetable_trace(dest, pte, 0, i);
+ }
+
+ i++;
+ }
+
+ return i;
+}
+
+static inline void
+copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level)
+{
+ struct leaflet* leaflet;
+
+ set_pte(dest, pte);
+
+ if (!pte_isloaded(pte)) {
+ return;
+ }
+
+ leaflet = pte_leaflet(pte);
+ assert(leaflet_refcount(leaflet));
+
+ if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
+ leaflet_borrow(leaflet);
+ }
+}
+
+static inline void
+copy_root(pte_t* dest, pte_t* src, pte_t pte, int level)
+{
+ alloc_pagetable_trace(dest, pte, 0, level);
+}
+
+static void
+vmrcpy(ptr_t dest_mnt, ptr_t src_mnt, struct mm_region* region)
+{
+ pte_t *src, *dest;
+ ptr_t loc;
+ int level;
+ struct leaflet* leaflet;
+
+ loc = region->start;
+ src = mkptep_va(src_mnt, loc);
+ dest = mkptep_va(dest_mnt, loc);
+
+ level = __descend(dest_mnt, src_mnt, loc, true);
+
+ while (loc < region->end)
+ {
+ pte_t pte = *src;
+
+ if (pte_isnull(pte)) {
+ goto cont;
+ }
+
+ if (pt_last_level(level) || pte_huge(pte)) {
+ copy_leaf(dest, src, pte, level);
+ goto cont;
+ }
+
+ if (!pt_last_level(level)) {
+ copy_root(dest, src, pte, level);
+
+ src = ptep_step_into(src);
+ dest = ptep_step_into(dest);
+ level++;
+
+ continue;
+ }
+
+ cont:
+ loc += lnt_page_size(level);
+ while (ptep_vfn(src) == MAX_PTEN - 1) {
+ assert(level > 0);
+ src = ptep_step_out(src);
+ dest = ptep_step_out(dest);
+ level--;
+ }
+
+ src++;
+ dest++;
+ }
+}
+
+static void
+vmrfree(ptr_t vm_mnt, struct mm_region* region)
{
- pte_t* ptep_dest = mkl0tep(mkptep_va(dest_mnt, 0));
- pte_t* ptep = mkl0tep(mkptep_va(src_mnt, 0));
- pte_t* ptepd_kernel = mkl0tep(mkptep_va(dest_mnt, KERNEL_RESIDENT));
- pte_t* ptep_kernel = mkl0tep(mkptep_va(src_mnt, KERNEL_RESIDENT));
+ pte_t *src, *end;
+ ptr_t loc;
+ int level;
+ struct leaflet* leaflet;
+
+ loc = region->start;
+ src = mkptep_va(vm_mnt, region->start);
+ end = mkptep_va(vm_mnt, region->end);
+
+ level = __descend(vm_mnt, vm_mnt, loc, false);
+
+ while (src < end)
+ {
+ pte_t pte = *src;
+ ptr_t pa = pte_paddr(pte);
+
+ if (pte_isnull(pte)) {
+ goto cont;
+ }
+
+ if (!pt_last_level(level) && !pte_huge(pte)) {
+ src = ptep_step_into(src);
+ level++;
+
+ continue;
+ }
+
+ set_pte(src, null_pte);
+
+ if (pte_isloaded(pte)) {
+ leaflet = pte_leaflet_aligned(pte);
+ leaflet_return(leaflet);
+
+ src += __ptep_advancement(leaflet, level);
+ }
+
+ cont:
+ while (ptep_vfn(src) == MAX_PTEN - 1) {
+ src = ptep_step_out(src);
+ free_pagetable_trace(src, pte_at(src), level);
+
+ level--;
+ }
+
+ src++;
+ }
+}
+static void
+vmscpy(struct proc_mm* dest_mm, struct proc_mm* src_mm)
+{
// Build the self-reference on dest vms
/*
* Note: PML4: 2 extra steps
* PML5: 3 extra steps
*/
+
+ ptr_t dest_mnt, src_mnt;
+
+ dest_mnt = dest_mm->vm_mnt;
+ assert(dest_mnt);
+
pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt);
- pte_t* ptep_sms = mkl1tep_va(VMS_SELF, dest_mnt) + VMS_SELF_L0TI;
- pte_t pte_sms = mkpte_prot(KERNEL_DATA);
+ pte_t* ptep_smx = mkl1tep_va(VMS_SELF, dest_mnt);
+ pte_t pte_sms = mkpte_prot(KERNEL_PGTAB);
- pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0);
- set_pte(ptep_sms, pte_sms);
+ pte_sms = alloc_pagetable_trace(ptep_ssm, pte_sms, 0, 0);
+ set_pte(&ptep_smx[VMS_SELF_L0TI], pte_sms);
tlb_flush_kernel((ptr_t)dest_mnt);
- tlb_flush_kernel((ptr_t)ptep_sms);
-
- if (only_kernel) {
- ptep = ptep_kernel;
- ptep_dest += ptep_vfn(ptep_kernel);
- } else {
- ptep++;
- ptep_dest++;
+
+ if (!src_mm) {
+ goto done;
}
- int level = 0;
- struct leaflet* leaflet;
+ src_mnt = src_mm->vm_mnt;
- while (ptep < ptep_kernel)
+ struct mm_region *pos, *n;
+ llist_for_each(pos, n, &src_mm->regions, head)
{
- pte_t pte = *ptep;
-
- if (pte_isnull(pte)) {
- goto cont;
- }
-
- if (pt_last_level(level) || pte_huge(pte)) {
- set_pte(ptep_dest, pte);
-
- if (pte_isloaded(pte)) {
- leaflet = pte_leaflet(pte);
- assert(leaflet_refcount(leaflet));
-
- if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
- leaflet_borrow(leaflet);
- }
- }
- }
- else if (!pt_last_level(level)) {
- alloc_kpage_at(ptep_dest, pte, 0);
+ vmrcpy(dest_mnt, src_mnt, pos);
+ }
- ptep = ptep_step_into(ptep);
- ptep_dest = ptep_step_into(ptep_dest);
- level++;
+done:;
+ procvm_link_kernel(dest_mnt);
+
+ dest_mm->vmroot = pte_paddr(pte_sms);
+}
- continue;
- }
-
- cont:
- while (ptep_vfn(ptep) == MAX_PTEN - 1) {
- assert(level > 0);
- ptep = ptep_step_out(ptep);
- ptep_dest = ptep_step_out(ptep_dest);
- level--;
- }
+static void
+__purge_vms_residual(struct proc_mm* mm, int level, ptr_t va)
+{
+ pte_t *ptep, pte;
+ ptr_t _va;
- ptep++;
- ptep_dest++;
+ if (level >= MAX_LEVEL) {
+ return;
}
- // Ensure we step back to L0T
- assert(!level);
- assert(ptep_dest == ptepd_kernel);
-
- // Carry over the kernel (exclude last two entry)
- unsigned int i = ptep_vfn(ptep);
- while (i++ < MAX_PTEN) {
- pte_t pte = *ptep;
+ ptep = mklntep_va(level, mm->vm_mnt, va);
- if (l0tep_impile_vmnts(ptep)) {
- goto _cont;
+ for (unsigned i = 0; i < LEVEL_SIZE; i++, ptep++)
+ {
+ pte = pte_at(ptep);
+ if (pte_isnull(pte) || !pte_isloaded(pte)) {
+ continue;
}
- assert(!pte_isnull(pte));
-
- // Ensure it is a next level pagetable,
- // we MAY relax this later allow kernel
- // to have huge leaflet mapped at L0T
- leaflet = pte_leaflet_aligned(pte);
- assert(leaflet_order(leaflet) == 0);
+ if (lntep_implie_vmnts(ptep, lnt_page_size(level))) {
+ continue;
+ }
- set_pte(ptep_dest, pte);
- leaflet_borrow(leaflet);
-
- _cont:
- ptep++;
- ptep_dest++;
+ _va = va + (i * lnt_page_size(level));
+ __purge_vms_residual(mm, level + 1, _va);
+
+ set_pte(ptep, null_pte);
+ leaflet_return(pte_leaflet_aligned(pte));
}
-
- return pte_paddr(pte_sms);
}
static void
-vmsfree(ptr_t vm_mnt)
+vmsfree(struct proc_mm* mm)
{
struct leaflet* leaflet;
- pte_t* ptep_head = mkl0tep(mkptep_va(vm_mnt, 0));
- pte_t* ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF));
- pte_t* ptep_kernel = mkl0tep(mkptep_va(vm_mnt, KERNEL_RESIDENT));
+ struct mm_region *pos, *n;
+ ptr_t vm_mnt;
+ pte_t* ptep_self;
+
+ vm_mnt = mm->vm_mnt;
+ ptep_self = mkl0tep_va(vm_mnt, VMS_SELF);
- int level = 0;
- pte_t* ptep = ptep_head;
- while (ptep < ptep_kernel)
+ // first pass: free region mappings
+ llist_for_each(pos, n, &mm->regions, head)
{
- pte_t pte = *ptep;
- ptr_t pa = pte_paddr(pte);
-
- if (pte_isnull(pte)) {
- goto cont;
- }
-
- if (!pt_last_level(level) && !pte_huge(pte)) {
- ptep = ptep_step_into(ptep);
- level++;
-
- continue;
- }
-
- if (pte_isloaded(pte)) {
- leaflet = pte_leaflet_aligned(pte);
- leaflet_return(leaflet);
-
- ptep += __ptep_advancement(leaflet, level);
- }
+ vmrfree(vm_mnt, pos);
+ }
- cont:
- while (ptep_vfn(ptep) == MAX_PTEN - 1) {
- ptep = ptep_step_out(ptep);
- leaflet = pte_leaflet_aligned(pte_at(ptep));
-
- assert(leaflet_order(leaflet) == 0);
- leaflet_return(leaflet);
-
- level--;
- }
+ procvm_unlink_kernel(vm_mnt);
- ptep++;
- }
+ // free up all allocated tables on intermediate levels
+ __purge_vms_residual(mm, 0, 0);
- leaflet = pte_leaflet_aligned(pte_at(ptep_self));
- leaflet_return(leaflet);
+ free_pagetable_trace(ptep_self, pte_at(ptep_self), 0);
}
static inline void
}
}
+void
+procvm_prune_vmr(ptr_t vm_mnt, struct mm_region* region)
+{
+ vmrfree(vm_mnt, region);
+}
void
procvm_dupvms_mount(struct proc_mm* mm) {
mm->heap = mm_current->heap;
mm->vm_mnt = VMS_MOUNT_1;
- mm->vmroot = vmscpy(VMS_MOUNT_1, VMS_SELF, false);
+ vmscpy(mm, mm_current);
region_copy_mm(mm_current, mm);
}
void
procvm_mount(struct proc_mm* mm)
{
+ // if current mm is already active
+ if (active_vms(mm->vm_mnt)) {
+ return;
+ }
+
+ // we are double mounting
assert(!mm->vm_mnt);
assert(mm->vmroot);
void
procvm_unmount(struct proc_mm* mm)
{
+ if (active_vms(mm->vm_mnt)) {
+ return;
+ }
+
assert(mm->vm_mnt);
-
vms_unmount(VMS_MOUNT_1);
+
struct proc_mm* mm_current = vmspace(__current);
if (mm_current) {
mm_current->guest_mm = NULL;
__attach_to_current_vms(mm);
mm->vm_mnt = VMS_MOUNT_1;
- mm->vmroot = vmscpy(VMS_MOUNT_1, VMS_SELF, true);
+ vmscpy(mm, NULL);
}
void
procvm_unmount_release(struct proc_mm* mm) {
ptr_t vm_mnt = mm->vm_mnt;
struct mm_region *pos, *n;
+
llist_for_each(pos, n, &mm->regions, head)
{
mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
+ }
+
+ vmsfree(mm);
+
+ llist_for_each(pos, n, &mm->regions, head)
+ {
region_release(pos);
}
- vfree(mm);
- vmsfree(vm_mnt);
vms_unmount(vm_mnt);
+ vfree(mm);
__detach_from_current_vms(mm);
}
procvm_mount_self(struct proc_mm* mm)
{
assert(!mm->vm_mnt);
- assert(!mm->guest_mm);
mm->vm_mnt = VMS_SELF;
}
pte_t* rptep = mkptep_va(vm_mnt, remote_base);
pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
- unsigned int pattr = region_pteprot(region);
+
+ pte_t pte, rpte = null_pte;
+ rpte = region_tweakpte(region, rpte);
for (size_t i = 0; i < size_pn; i++)
{
- pte_t pte = vmm_tryptep(rptep, PAGE_SIZE);
+ pte = vmm_tryptep(rptep, PAGE_SIZE);
if (pte_isloaded(pte)) {
set_pte(lptep, pte);
continue;
ptr_t pa = ppage_addr(pmm_alloc_normal(0));
set_pte(lptep, mkpte(pa, KERNEL_DATA));
- set_pte(rptep, mkpte(pa, pattr));
+ set_pte(rptep, pte_setpaddr(rpte, pa));
}
return vm_mnt;