1 #include <lunaix/mm/procvm.h>
2 #include <lunaix/mm/valloc.h>
3 #include <lunaix/mm/region.h>
4 #include <lunaix/mm/page.h>
5 #include <lunaix/mm/mmap.h>
6 #include <lunaix/process.h>
8 #include <asm/mm_defs.h>
10 #include <klibc/string.h>
13 procvm_create(struct proc_info* proc) {
14 struct proc_mm* mm = vzalloc(sizeof(struct proc_mm));
21 llist_init_head(&mm->regions);
25 static inline unsigned int
26 __ptep_advancement(struct leaflet* leaflet, int level)
28 size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT;
29 return (1 << (leaflet_order(leaflet) % shifts)) - 1;
33 __descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc)
35 pte_t *dest, *src, pte;
38 while (!pt_last_level(i))
40 dest = mklntep_va(i, dest_mnt, va);
41 src = mklntep_va(i, src_mnt, va);
44 if (!pte_isloaded(pte) || pte_huge(pte)) {
48 if (alloc && pte_isnull(pte_at(dest))) {
49 alloc_kpage_at(dest, pte, 0);
59 copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level)
61 struct leaflet* leaflet;
65 if (!pte_isloaded(pte)) {
69 leaflet = pte_leaflet(pte);
70 assert(leaflet_refcount(leaflet));
72 if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) {
73 leaflet_borrow(leaflet);
78 copy_root(pte_t* dest, pte_t* src, pte_t pte, int level)
80 alloc_kpage_at(dest, pte, 0);
84 vmrcpy(ptr_t dest_mnt, ptr_t src_mnt, struct mm_region* region)
89 struct leaflet* leaflet;
92 src = mkptep_va(src_mnt, loc);
93 dest = mkptep_va(dest_mnt, loc);
95 level = __descend(dest_mnt, src_mnt, loc, true);
97 while (loc < region->end)
101 if (pte_isnull(pte)) {
105 if (pt_last_level(level) || pte_huge(pte)) {
106 copy_leaf(dest, src, pte, level);
110 if (!pt_last_level(level)) {
111 copy_root(dest, src, pte, level);
113 src = ptep_step_into(src);
114 dest = ptep_step_into(dest);
121 loc += lnt_page_size(level);
122 while (ptep_vfn(src) == MAX_PTEN - 1) {
124 src = ptep_step_out(src);
125 dest = ptep_step_out(dest);
135 vmrfree(ptr_t vm_mnt, struct mm_region* region)
140 struct leaflet* leaflet;
143 src = mkptep_va(vm_mnt, region->start);
144 end = mkptep_va(vm_mnt, region->end);
146 level = __descend(0, vm_mnt, loc, false);
151 ptr_t pa = pte_paddr(pte);
153 if (pte_isnull(pte)) {
157 if (!pt_last_level(level) && !pte_huge(pte)) {
158 src = ptep_step_into(src);
164 if (pte_isloaded(pte)) {
165 leaflet = pte_leaflet_aligned(pte);
166 leaflet_return(leaflet);
168 src += __ptep_advancement(leaflet, level);
172 while (ptep_vfn(src) == MAX_PTEN - 1) {
173 src = ptep_step_out(src);
174 leaflet = pte_leaflet_aligned(pte_at(src));
176 assert(leaflet_order(leaflet) == 0);
177 leaflet_return(leaflet);
187 vmscpy(struct proc_mm* dest_mm, struct proc_mm* src_mm)
189 // Build the self-reference on dest vms
192 * -- What the heck are ptep_ssm and ptep_sms ? --
194 * ptep_dest point to the pagetable itself that is mounted
195 * at dest_mnt (or simply mnt):
196 * mnt -> self -> self -> self -> L0TE@offset
198 * ptep_sms shallowed the recursion chain:
199 * self -> mnt -> self -> self -> L0TE@self
201 * ptep_ssm shallowed the recursion chain:
202 * self -> self -> mnt -> self -> L0TE@self
204 * Now, here is the problem, back to x86_32, the translation is
205 * a depth-3 recursion:
208 * So ptep_ssm will terminate at mnt and give us a leaf
209 * slot for allocate a fresh page table for mnt:
210 * self -> self -> L0TE@mnt
212 * but in x86_64 translation has extra two more step:
213 * L0T -> L1T -> L2T -> LFT -> Page
215 * So we must continue push down....
216 * ptep_sssms shallowed the recursion chain:
217 * self -> self -> self -> mnt -> L0TE@self
219 * ptep_ssssm shallowed the recursion chain:
220 * self -> self -> self -> self -> L0TE@mnt
222 * Note: PML4: 2 extra steps
223 * PML5: 3 extra steps
226 ptr_t dest_mnt, src_mnt;
228 dest_mnt = dest_mm->vm_mnt;
231 pte_t* ptep_ssm = mkl0tep_va(VMS_SELF, dest_mnt);
232 pte_t* ptep_smx = mkl1tep_va(VMS_SELF, dest_mnt);
233 pte_t pte_sms = mkpte_prot(KERNEL_PGTAB);
235 pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0);
236 set_pte(&ptep_smx[VMS_SELF_L0TI], pte_sms);
238 tlb_flush_kernel((ptr_t)dest_mnt);
244 src_mnt = src_mm->vm_mnt;
246 struct mm_region *pos, *n;
247 llist_for_each(pos, n, &src_mm->regions, head)
249 vmrcpy(dest_mnt, src_mnt, pos);
253 procvm_link_kernel(dest_mnt);
255 dest_mm->vmroot = pte_paddr(pte_sms);
259 vmsfree(struct proc_mm* mm)
261 struct leaflet* leaflet;
266 ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF));
268 struct mm_region *pos, *n;
269 llist_for_each(pos, n, &mm->regions, head)
271 vmrfree(vm_mnt, pos);
274 procvm_unlink_kernel();
276 leaflet = pte_leaflet_aligned(pte_at(ptep_self));
277 leaflet_return(leaflet);
281 __attach_to_current_vms(struct proc_mm* guest_mm)
283 struct proc_mm* mm_current = vmspace(__current);
285 assert(!mm_current->guest_mm);
286 mm_current->guest_mm = guest_mm;
291 __detach_from_current_vms(struct proc_mm* guest_mm)
293 struct proc_mm* mm_current = vmspace(__current);
295 assert(mm_current->guest_mm == guest_mm);
296 mm_current->guest_mm = NULL;
302 procvm_dupvms_mount(struct proc_mm* mm) {
306 struct proc_mm* mm_current = vmspace(__current);
308 __attach_to_current_vms(mm);
310 mm->heap = mm_current->heap;
311 mm->vm_mnt = VMS_MOUNT_1;
313 vmscpy(mm, mm_current);
314 region_copy_mm(mm_current, mm);
318 procvm_mount(struct proc_mm* mm)
320 // if current mm is already active
321 if (active_vms(mm->vm_mnt)) {
325 // we are double mounting
329 vms_mount(VMS_MOUNT_1, mm->vmroot);
331 __attach_to_current_vms(mm);
333 mm->vm_mnt = VMS_MOUNT_1;
337 procvm_unmount(struct proc_mm* mm)
339 if (active_vms(mm->vm_mnt)) {
344 vms_unmount(VMS_MOUNT_1);
346 struct proc_mm* mm_current = vmspace(__current);
348 mm_current->guest_mm = NULL;
355 procvm_initvms_mount(struct proc_mm* mm)
359 __attach_to_current_vms(mm);
361 mm->vm_mnt = VMS_MOUNT_1;
366 procvm_unmount_release(struct proc_mm* mm) {
367 ptr_t vm_mnt = mm->vm_mnt;
368 struct mm_region *pos, *n;
369 llist_for_each(pos, n, &mm->regions, head)
371 mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
379 __detach_from_current_vms(mm);
383 procvm_mount_self(struct proc_mm* mm)
387 mm->vm_mnt = VMS_SELF;
391 procvm_unmount_self(struct proc_mm* mm)
393 assert(active_vms(mm->vm_mnt));
399 procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm,
400 ptr_t remote_base, size_t size)
402 ptr_t vm_mnt = mm->vm_mnt;
405 pfn_t size_pn = pfn(size + PAGE_SIZE);
406 assert(size_pn < REMOTEVM_MAX_PAGES);
408 struct mm_region* region = region_get(&mm->regions, remote_base);
409 assert(region && region_contains(region, remote_base + size));
411 rvmctx->vms_mnt = vm_mnt;
412 rvmctx->page_cnt = size_pn;
414 remote_base = page_aligned(remote_base);
415 rvmctx->remote = remote_base;
416 rvmctx->local_mnt = PG_MOUNT_VAR;
418 pte_t* rptep = mkptep_va(vm_mnt, remote_base);
419 pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
421 pte_t pte, rpte = null_pte;
422 rpte = region_tweakpte(region, rpte);
424 for (size_t i = 0; i < size_pn; i++)
426 pte = vmm_tryptep(rptep, PAGE_SIZE);
427 if (pte_isloaded(pte)) {
432 ptr_t pa = ppage_addr(pmm_alloc_normal(0));
433 set_pte(lptep, mkpte(pa, KERNEL_DATA));
434 set_pte(rptep, pte_setpaddr(rpte, pa));
442 procvm_copy_remote_transaction(struct remote_vmctx* rvmctx,
443 ptr_t remote_dest, void* local_src, size_t sz)
445 if (remote_dest < rvmctx->remote) {
449 ptr_t offset = remote_dest - rvmctx->remote;
450 if (pfn(offset + sz) >= rvmctx->page_cnt) {
454 memcpy((void*)(rvmctx->local_mnt + offset), local_src, sz);
460 procvm_exit_remote(struct remote_vmctx* rvmctx)
462 pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt);
463 vmm_unset_ptes(lptep, rvmctx->page_cnt);