1 #include <lunaix/mm/procvm.h>
2 #include <lunaix/mm/valloc.h>
3 #include <lunaix/mm/region.h>
4 #include <lunaix/mm/page.h>
5 #include <lunaix/mm/mmap.h>
6 #include <lunaix/process.h>
7 #include <lunaix/syslog.h>
9 #include <asm/mm_defs.h>
11 #include <klibc/string.h>
13 #define alloc_pagetable_trace(ptep, pte, ord, level) \
15 alloc_kpage_at(ptep, pte, ord); \
18 #define free_pagetable_trace(ptep, pte, level) \
20 struct leaflet* leaflet = pte_leaflet_aligned(pte); \
21 assert(leaflet_order(leaflet) == 0); \
22 leaflet_return(leaflet); \
23 set_pte(ptep, null_pte); \
27 procvm_create(struct proc_info* proc) {
28 struct proc_mm* mm = vzalloc(sizeof(struct proc_mm));
35 llist_init_head(&mm->regions);
39 static inline unsigned int
40 __ptep_advancement(struct leaflet* leaflet, int level)
42 size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT;
43 return (1 << (leaflet_order(leaflet) % shifts)) - 1;
47 __descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc)
49 pte_t *dest, *src, pte;
52 while (!pt_last_level(i))
54 dest = mklntep_va(i, dest_mnt, va);
55 src = mklntep_va(i, src_mnt, va);
58 if (!pte_isloaded(pte) || pte_huge(pte)) {
62 if (alloc && pte_isnull(pte_at(dest))) {
63 alloc_pagetable_trace(dest, pte, 0, i);
73 copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level)
75 struct leaflet* leaflet;
79 if (!pte_isloaded(pte)) {
83 leaflet = pte_leaflet(pte);
84 assert(leaflet_refcount(leaflet));
86 if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
87 leaflet_borrow(leaflet);
92 copy_root(pte_t* dest, pte_t* src, pte_t pte, int level)
94 alloc_pagetable_trace(dest, pte, 0, level);
98 vmrcpy(ptr_t dest_mnt, ptr_t src_mnt, struct mm_region* region)
103 struct leaflet* leaflet;
106 src = mkptep_va(src_mnt, loc);
107 dest = mkptep_va(dest_mnt, loc);
109 level = __descend(dest_mnt, src_mnt, loc, true);
111 while (loc < region->end)
115 if (pte_isnull(pte)) {
119 if (pt_last_level(level) || pte_huge(pte)) {
120 copy_leaf(dest, src, pte, level);
124 if (!pt_last_level(level)) {
125 copy_root(dest, src, pte, level);
127 src = ptep_step_into(src);
128 dest = ptep_step_into(dest);
135 loc += lnt_page_size(level);
136 while (ptep_vfn(src) == MAX_PTEN - 1) {
138 src = ptep_step_out(src);
139 dest = ptep_step_out(dest);
149 vmrfree(ptr_t vm_mnt, struct mm_region* region)
154 struct leaflet* leaflet;
157 src = mkptep_va(vm_mnt, region->start);
158 end = mkptep_va(vm_mnt, region->end);
160 level = __descend(vm_mnt, vm_mnt, loc, false);
165 ptr_t pa = pte_paddr(pte);
167 if (pte_isnull(pte)) {
171 if (!pt_last_level(level) && !pte_huge(pte)) {
172 src = ptep_step_into(src);
178 set_pte(src, null_pte);
180 if (pte_isloaded(pte)) {
181 leaflet = pte_leaflet_aligned(pte);
182 leaflet_return(leaflet);
184 src += __ptep_advancement(leaflet, level);
188 while (ptep_vfn(src) == MAX_PTEN - 1) {
189 src = ptep_step_out(src);
190 free_pagetable_trace(src, pte_at(src), level);
200 vmscpy(struct proc_mm* dest_mm, struct proc_mm* src_mm)
202 // Build the self-reference on dest vms
205 * -- What the heck are ptep_ssm and ptep_sms ? --
207 * ptep_dest point to the pagetable itself that is mounted
208 * at dest_mnt (or simply mnt):
209 * mnt -> self -> self -> self -> L0TE@offset
211 * ptep_sms shallowed the recursion chain:
212 * self -> mnt -> self -> self -> L0TE@self
214 * ptep_ssm shallowed the recursion chain:
215 * self -> self -> mnt -> self -> L0TE@self
217 * Now, here is the problem, back to x86_32, the translation is
218 * a depth-3 recursion:
221 * So ptep_ssm will terminate at mnt and give us a leaf
222 * slot for allocate a fresh page table for mnt:
223 * self -> self -> L0TE@mnt
225 * but in x86_64 translation has extra two more step:
226 * L0T -> L1T -> L2T -> LFT -> Page
228 * So we must continue push down....
229 * ptep_sssms shallowed the recursion chain:
230 * self -> self -> self -> mnt -> L0TE@self
232 * ptep_ssssm shallowed the recursion chain:
233 * self -> self -> self -> self -> L0TE@mnt
235 * Note: PML4: 2 extra steps
236 * PML5: 3 extra steps
239 ptr_t dest_mnt, src_mnt;
241 dest_mnt = dest_mm->vm_mnt;
244 pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt);
245 pte_t* ptep_smx = mkl1tep_va(VMS_SELF, dest_mnt);
246 pte_t pte_sms = mkpte_prot(KERNEL_PGTAB);
248 pte_sms = alloc_pagetable_trace(ptep_ssm, pte_sms, 0, 0);
249 set_pte(&ptep_smx[VMS_SELF_L0TI], pte_sms);
251 tlb_flush_kernel((ptr_t)dest_mnt);
257 src_mnt = src_mm->vm_mnt;
259 struct mm_region *pos, *n;
260 llist_for_each(pos, n, &src_mm->regions, head)
262 vmrcpy(dest_mnt, src_mnt, pos);
266 procvm_link_kernel(dest_mnt);
268 dest_mm->vmroot = pte_paddr(pte_sms);
272 __purge_vms_residual(struct proc_mm* mm, int level, ptr_t va)
277 if (level >= MAX_LEVEL) {
281 ptep = mklntep_va(level, mm->vm_mnt, va);
283 for (unsigned i = 0; i < LEVEL_SIZE; i++, ptep++)
286 if (pte_isnull(pte) || !pte_isloaded(pte)) {
290 if (lntep_implie_vmnts(ptep, lnt_page_size(level))) {
294 _va = va + (i * lnt_page_size(level));
295 __purge_vms_residual(mm, level + 1, _va);
297 set_pte(ptep, null_pte);
298 leaflet_return(pte_leaflet_aligned(pte));
303 vmsfree(struct proc_mm* mm)
305 struct leaflet* leaflet;
306 struct mm_region *pos, *n;
311 ptep_self = mkl0tep_va(vm_mnt, VMS_SELF);
313 // first pass: free region mappings
314 llist_for_each(pos, n, &mm->regions, head)
316 vmrfree(vm_mnt, pos);
319 procvm_unlink_kernel(vm_mnt);
321 // free up all allocated tables on intermediate levels
322 __purge_vms_residual(mm, 0, 0);
324 free_pagetable_trace(ptep_self, pte_at(ptep_self), 0);
328 __attach_to_current_vms(struct proc_mm* guest_mm)
330 struct proc_mm* mm_current = vmspace(__current);
332 assert(!mm_current->guest_mm);
333 mm_current->guest_mm = guest_mm;
338 __detach_from_current_vms(struct proc_mm* guest_mm)
340 struct proc_mm* mm_current = vmspace(__current);
342 assert(mm_current->guest_mm == guest_mm);
343 mm_current->guest_mm = NULL;
348 procvm_prune_vmr(ptr_t vm_mnt, struct mm_region* region)
350 vmrfree(vm_mnt, region);
354 procvm_dupvms_mount(struct proc_mm* mm) {
358 struct proc_mm* mm_current = vmspace(__current);
360 __attach_to_current_vms(mm);
362 mm->heap = mm_current->heap;
363 mm->vm_mnt = VMS_MOUNT_1;
365 vmscpy(mm, mm_current);
366 region_copy_mm(mm_current, mm);
370 procvm_mount(struct proc_mm* mm)
372 // if current mm is already active
373 if (active_vms(mm->vm_mnt)) {
377 // we are double mounting
381 vms_mount(VMS_MOUNT_1, mm->vmroot);
383 __attach_to_current_vms(mm);
385 mm->vm_mnt = VMS_MOUNT_1;
389 procvm_unmount(struct proc_mm* mm)
391 if (active_vms(mm->vm_mnt)) {
396 vms_unmount(VMS_MOUNT_1);
398 struct proc_mm* mm_current = vmspace(__current);
400 mm_current->guest_mm = NULL;
407 procvm_initvms_mount(struct proc_mm* mm)
411 __attach_to_current_vms(mm);
413 mm->vm_mnt = VMS_MOUNT_1;
418 procvm_unmount_release(struct proc_mm* mm) {
419 ptr_t vm_mnt = mm->vm_mnt;
420 struct mm_region *pos, *n;
422 llist_for_each(pos, n, &mm->regions, head)
424 mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
429 llist_for_each(pos, n, &mm->regions, head)
437 __detach_from_current_vms(mm);
441 procvm_mount_self(struct proc_mm* mm)
445 mm->vm_mnt = VMS_SELF;
449 procvm_unmount_self(struct proc_mm* mm)
451 assert(active_vms(mm->vm_mnt));
457 procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm,
458 ptr_t remote_base, size_t size)
460 ptr_t vm_mnt = mm->vm_mnt;
463 pfn_t size_pn = pfn(size + PAGE_SIZE);
464 assert(size_pn < REMOTEVM_MAX_PAGES);
466 struct mm_region* region = region_get(&mm->regions, remote_base);
467 assert(region && region_contains(region, remote_base + size));
469 rvmctx->vms_mnt = vm_mnt;
470 rvmctx->page_cnt = size_pn;
472 remote_base = page_aligned(remote_base);
473 rvmctx->remote = remote_base;
474 rvmctx->local_mnt = PG_MOUNT_VAR;
476 pte_t* rptep = mkptep_va(vm_mnt, remote_base);
477 pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
479 pte_t pte, rpte = null_pte;
480 rpte = region_tweakpte(region, rpte);
482 for (size_t i = 0; i < size_pn; i++)
484 pte = vmm_tryptep(rptep, PAGE_SIZE);
485 if (pte_isloaded(pte)) {
490 ptr_t pa = ppage_addr(pmm_alloc_normal(0));
491 set_pte(lptep, mkpte(pa, KERNEL_DATA));
492 set_pte(rptep, pte_setpaddr(rpte, pa));
500 procvm_copy_remote_transaction(struct remote_vmctx* rvmctx,
501 ptr_t remote_dest, void* local_src, size_t sz)
503 if (remote_dest < rvmctx->remote) {
507 ptr_t offset = remote_dest - rvmctx->remote;
508 if (pfn(offset + sz) >= rvmctx->page_cnt) {
512 memcpy((void*)(rvmctx->local_mnt + offset), local_src, sz);
518 procvm_exit_remote(struct remote_vmctx* rvmctx)
520 pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
521 vmm_unset_ptes(lptep, rvmctx->page_cnt);