1 #include <lunaix/mm/procvm.h>
2 #include <lunaix/mm/valloc.h>
3 #include <lunaix/mm/region.h>
4 #include <lunaix/mm/page.h>
5 #include <lunaix/mm/mmap.h>
6 #include <lunaix/process.h>
8 #include <sys/mm/mm_defs.h>
10 #include <klibc/string.h>
13 procvm_create(struct proc_info* proc) {
14 struct proc_mm* mm = vzalloc(sizeof(struct proc_mm));
21 llist_init_head(&mm->regions);
25 static inline unsigned int
26 __ptep_advancement(struct leaflet* leaflet, int level)
28 size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT;
29 return (1 << (leaflet_order(leaflet) % shifts)) - 1;
33 vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel)
35 pte_t* ptep_dest = mkl0tep(mkptep_va(dest_mnt, 0));
36 pte_t* ptep = mkl0tep(mkptep_va(src_mnt, 0));
37 pte_t* ptepd_kernel = mkl0tep(mkptep_va(dest_mnt, KERNEL_RESIDENT));
38 pte_t* ptep_kernel = mkl0tep(mkptep_va(src_mnt, KERNEL_RESIDENT));
40 // Build the self-reference on dest vms
43 * -- What the heck are ptep_ssm and ptep_sms ? --
45 * ptep_dest point to the pagetable itself that is mounted
46 * at dest_mnt (or simply mnt):
47 * mnt -> self -> self -> self -> L0TE@offset
49 * ptep_sms shallowed the recursion chain:
50 * self -> mnt -> self -> self -> L0TE@self
52 * ptep_ssm shallowed the recursion chain:
53 * self -> self -> mnt -> self -> L0TE@self
55 * Now, here is the problem, back to x86_32, the translation is
56 * a depth-3 recursion:
59 * So ptep_ssm will terminate at mnt and give us a leaf
60 * slot for allocate a fresh page table for mnt:
61 * self -> self -> L0TE@mnt
63 * but in x86_64 translation has extra two more step:
64 * L0T -> L1T -> L2T -> LFT -> Page
66 * So we must continue push down....
67 * ptep_sssms shallowed the recursion chain:
68 * self -> self -> self -> mnt -> L0TE@self
70 * ptep_ssssm shallowed the recursion chain:
71 * self -> self -> self -> self -> L0TE@mnt
73 * Note: PML4: 2 extra steps
76 pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt);
77 pte_t* ptep_sms = mkl1tep_va(VMS_SELF, dest_mnt) + VMS_SELF_L0TI;
78 pte_t pte_sms = mkpte_prot(KERNEL_PGTAB);
80 pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0);
81 set_pte(ptep_sms, pte_sms);
83 tlb_flush_kernel((ptr_t)dest_mnt);
84 tlb_flush_kernel((ptr_t)ptep_sms);
88 ptep_dest += ptep_vfn(ptep_kernel);
95 struct leaflet* leaflet;
97 while (ptep < ptep_kernel)
101 if (pte_isnull(pte)) {
105 if (pt_last_level(level) || pte_huge(pte)) {
106 set_pte(ptep_dest, pte);
108 if (pte_isloaded(pte)) {
109 leaflet = pte_leaflet(pte);
110 assert(leaflet_refcount(leaflet));
112 if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
113 leaflet_borrow(leaflet);
117 else if (!pt_last_level(level)) {
118 alloc_kpage_at(ptep_dest, pte, 0);
120 ptep = ptep_step_into(ptep);
121 ptep_dest = ptep_step_into(ptep_dest);
128 while (ptep_vfn(ptep) == MAX_PTEN - 1) {
130 ptep = ptep_step_out(ptep);
131 ptep_dest = ptep_step_out(ptep_dest);
139 // Ensure we step back to L0T
141 assert(ptep_dest == ptepd_kernel);
143 // Carry over the kernel (exclude last two entry)
144 unsigned int i = ptep_vfn(ptep);
145 while (i++ < MAX_PTEN) {
148 if (l0tep_implie_vmnts(ptep)) {
152 assert(!pte_isnull(pte));
154 // Ensure it is a next level pagetable,
155 // we MAY relax this later allow kernel
156 // to have huge leaflet mapped at L0T
157 leaflet = pte_leaflet_aligned(pte);
158 assert(leaflet_order(leaflet) == 0);
160 set_pte(ptep_dest, pte);
161 leaflet_borrow(leaflet);
168 return pte_paddr(pte_sms);
172 vmsfree(ptr_t vm_mnt)
174 struct leaflet* leaflet;
175 pte_t* ptep_head = mkl0tep(mkptep_va(vm_mnt, 0));
176 pte_t* ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF));
177 pte_t* ptep_kernel = mkl0tep(mkptep_va(vm_mnt, KERNEL_RESIDENT));
180 pte_t* ptep = ptep_head;
181 while (ptep < ptep_kernel)
184 ptr_t pa = pte_paddr(pte);
186 if (pte_isnull(pte)) {
190 if (!pt_last_level(level) && !pte_huge(pte)) {
191 ptep = ptep_step_into(ptep);
197 if (pte_isloaded(pte)) {
198 leaflet = pte_leaflet_aligned(pte);
199 leaflet_return(leaflet);
201 ptep += __ptep_advancement(leaflet, level);
205 while (ptep_vfn(ptep) == MAX_PTEN - 1) {
206 ptep = ptep_step_out(ptep);
207 leaflet = pte_leaflet_aligned(pte_at(ptep));
209 assert(leaflet_order(leaflet) == 0);
210 leaflet_return(leaflet);
218 leaflet = pte_leaflet_aligned(pte_at(ptep_self));
219 leaflet_return(leaflet);
223 __attach_to_current_vms(struct proc_mm* guest_mm)
225 struct proc_mm* mm_current = vmspace(__current);
227 assert(!mm_current->guest_mm);
228 mm_current->guest_mm = guest_mm;
233 __detach_from_current_vms(struct proc_mm* guest_mm)
235 struct proc_mm* mm_current = vmspace(__current);
237 assert(mm_current->guest_mm == guest_mm);
238 mm_current->guest_mm = NULL;
244 procvm_dupvms_mount(struct proc_mm* mm) {
248 struct proc_mm* mm_current = vmspace(__current);
250 __attach_to_current_vms(mm);
252 mm->heap = mm_current->heap;
253 mm->vm_mnt = VMS_MOUNT_1;
254 mm->vmroot = vmscpy(VMS_MOUNT_1, VMS_SELF, false);
256 region_copy_mm(mm_current, mm);
260 procvm_mount(struct proc_mm* mm)
262 // if current mm is already active
263 if (active_vms(mm->vm_mnt)) {
267 // we are double mounting
271 vms_mount(VMS_MOUNT_1, mm->vmroot);
273 __attach_to_current_vms(mm);
275 mm->vm_mnt = VMS_MOUNT_1;
279 procvm_unmount(struct proc_mm* mm)
281 if (active_vms(mm->vm_mnt)) {
286 vms_unmount(VMS_MOUNT_1);
288 struct proc_mm* mm_current = vmspace(__current);
290 mm_current->guest_mm = NULL;
297 procvm_initvms_mount(struct proc_mm* mm)
301 __attach_to_current_vms(mm);
303 mm->vm_mnt = VMS_MOUNT_1;
304 mm->vmroot = vmscpy(VMS_MOUNT_1, VMS_SELF, true);
308 procvm_unmount_release(struct proc_mm* mm) {
309 ptr_t vm_mnt = mm->vm_mnt;
310 struct mm_region *pos, *n;
311 llist_for_each(pos, n, &mm->regions, head)
313 mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
321 __detach_from_current_vms(mm);
325 procvm_mount_self(struct proc_mm* mm)
329 mm->vm_mnt = VMS_SELF;
333 procvm_unmount_self(struct proc_mm* mm)
335 assert(active_vms(mm->vm_mnt));
341 procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm,
342 ptr_t remote_base, size_t size)
344 ptr_t vm_mnt = mm->vm_mnt;
347 pfn_t size_pn = pfn(size + PAGE_SIZE);
348 assert(size_pn < REMOTEVM_MAX_PAGES);
350 struct mm_region* region = region_get(&mm->regions, remote_base);
351 assert(region && region_contains(region, remote_base + size));
353 rvmctx->vms_mnt = vm_mnt;
354 rvmctx->page_cnt = size_pn;
356 remote_base = page_aligned(remote_base);
357 rvmctx->remote = remote_base;
358 rvmctx->local_mnt = PG_MOUNT_VAR;
360 pte_t* rptep = mkptep_va(vm_mnt, remote_base);
361 pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
363 pte_t pte, rpte = null_pte;
364 rpte = region_tweakpte(region, rpte);
366 for (size_t i = 0; i < size_pn; i++)
368 pte = vmm_tryptep(rptep, PAGE_SIZE);
369 if (pte_isloaded(pte)) {
374 ptr_t pa = ppage_addr(pmm_alloc_normal(0));
375 set_pte(lptep, mkpte(pa, KERNEL_DATA));
376 set_pte(rptep, pte_setpaddr(rpte, pa));
384 procvm_copy_remote_transaction(struct remote_vmctx* rvmctx,
385 ptr_t remote_dest, void* local_src, size_t sz)
387 if (remote_dest < rvmctx->remote) {
391 ptr_t offset = remote_dest - rvmctx->remote;
392 if (pfn(offset + sz) >= rvmctx->page_cnt) {
396 memcpy((void*)(rvmctx->local_mnt + offset), local_src, sz);
402 procvm_exit_remote(struct remote_vmctx* rvmctx)
404 pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
405 vmm_unset_ptes(lptep, rvmctx->page_cnt);