fix leakage on mid-level page table when freeing vms
authorLunaixsky <lunaixsky@qq.com>
Thu, 6 Feb 2025 06:05:22 +0000 (06:05 +0000)
committerLunaixsky <lunaixsky@qq.com>
Thu, 6 Feb 2025 06:05:22 +0000 (06:05 +0000)
* fix the mm stats in lunadbg to exclude reserved page and account for
  composite pages
* reduce the qemu memory config to 32M for faster mm stats while makeing
  it sufficient amount of avaliable memory

lunaix-os/includes/lunaix/mm/procvm.h
lunaix-os/includes/lunaix/mm/region.h
lunaix-os/kernel/mm/mmap.c
lunaix-os/kernel/mm/pmalloc_simple.c
lunaix-os/kernel/mm/procvm.c
lunaix-os/kernel/mm/valloc.c
lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py
lunaix-os/scripts/gdb/lunadbg/structs/page.py
lunaix-os/scripts/qemus/qemu_x86_dev.json

index 82a2de35a2e252584b3c847041361ba5aeb7bdd4..8eb81ddc4260fd26759d3ad4e91c4cdc10024f97 100644 (file)
@@ -49,6 +49,9 @@ struct proc_mm
 struct proc_mm*
 procvm_create(struct proc_info* proc);
 
+void
+procvm_prune_vmr(ptr_t vm_mnt, struct mm_region* region);
+
 /**
  * @brief Initialize and mount the vm of `proc` to duplication of current process
  * 
index c15f1c3b62947820652154de38b0564fd2119bf1..34e517ea41fbeb747aeb0eabd5c1b13a00945289 100644 (file)
@@ -40,6 +40,16 @@ writable_region(struct mm_region* mm) {
     return !!(mm->attr & (REGION_RSHARED | REGION_WRITE));
 }
 
+static inline bool
+readable_region(struct mm_region* mm) {
+    return !!(mm->attr & (REGION_RSHARED | REGION_READ));
+}
+
+static inline bool
+executable_region(struct mm_region* mm) {
+    return !!(mm->attr & REGION_EXEC);
+}
+
 static inline bool
 shared_writable_region(struct mm_region* mm) {
     return !!(mm->attr & REGION_WSHARED);
index 493d26cf3fe0876df64b1d5594f64fcfda67bd01..ff9e538668cf2206a2563ffaa78bbcc8c979981b 100644 (file)
@@ -349,9 +349,7 @@ mem_unmap_region(ptr_t mnt, struct mm_region* region)
     pfn_t pglen = leaf_count(region->end - region->start);
     mem_sync_pages(mnt, region, region->start, pglen * PAGE_SIZE, 0);
 
-    pte_t* ptep = mkptep_va(mnt, region->start);
-    __remove_ranged_mappings(ptep, pglen);
-
+    procvm_prune_vmr(mnt, region);
     tlb_flush_vmr_all(region);
     
     llist_delete(&region->head);
index 868c4701db79365e8f27f1266239e286f1aa61a5..93fae04fb55945a401ddb20924bda961854757ea 100644 (file)
@@ -127,6 +127,7 @@ pmm_looknext(struct pmem_pool* pool, size_t order)
         page->order = order;
         page->companion = i;
         page->pool = pool->type;
+        page->refs = 0;
         llist_init_head(&page->sibs);
         __set_page_initialized(page);
     }
index 31d9f3f5503ead2155867c4c9c9f01c1294a852f..88f512cc1b32ed92d0058b6cd4cd5056164ac7ef 100644 (file)
@@ -4,11 +4,25 @@
 #include <lunaix/mm/page.h>
 #include <lunaix/mm/mmap.h>
 #include <lunaix/process.h>
+#include <lunaix/syslog.h>
 
 #include <asm/mm_defs.h>
 
 #include <klibc/string.h>
 
+#define alloc_pagetable_trace(ptep, pte, ord, level)                        \
+    ({                                                                      \
+        alloc_kpage_at(ptep, pte, ord);                                     \
+    })
+
+#define free_pagetable_trace(ptep, pte, level)                              \
+    ({                                                                      \
+        struct leaflet* leaflet = pte_leaflet_aligned(pte);                 \
+        assert(leaflet_order(leaflet) == 0);                                \
+        leaflet_return(leaflet);                                            \
+        set_pte(ptep, null_pte);                                            \
+    })
+
 struct proc_mm*
 procvm_create(struct proc_info* proc) {
     struct proc_mm* mm = vzalloc(sizeof(struct proc_mm));
@@ -46,7 +60,7 @@ __descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc)
         }
 
         if (alloc && pte_isnull(pte_at(dest))) {
-            alloc_kpage_at(dest, pte, 0);
+            alloc_pagetable_trace(dest, pte, 0, i);
         }
 
         i++;
@@ -55,6 +69,34 @@ __descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc)
     return i;
 }
 
+static void
+__free_hierarchy(ptr_t mnt, ptr_t va, int level)
+{
+    pte_t pte, *ptep, *ptep_next;
+
+    if (pt_last_level(level)) {
+        return;
+    }
+
+    __free_hierarchy(mnt, va, level + 1);
+
+    ptep = mklntep_va(level, mnt, va);
+    pte = pte_at(ptep);
+    if (pte_isnull(pte)) {
+        return;
+    }
+
+    ptep_next = ptep_step_into(ptep);
+    for (unsigned i = 0; i < LEVEL_SIZE; i++, ptep_next++)
+    {
+        if (!pte_isnull(pte_at(ptep_next))) {
+            return;
+        }
+    }
+    
+    free_pagetable_trace(ptep, pte, level);
+}
+
 static inline void
 copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level)
 {
@@ -77,7 +119,7 @@ copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level)
 static inline void
 copy_root(pte_t* dest, pte_t* src, pte_t pte, int level)
 {
-    alloc_kpage_at(dest, pte, 0);
+    alloc_pagetable_trace(dest, pte, 0, level);
 }
 
 static void
@@ -131,6 +173,12 @@ vmrcpy(ptr_t dest_mnt, ptr_t src_mnt, struct mm_region* region)
     }
 }
 
+static inline void
+vmrfree_hierachy(ptr_t vm_mnt, struct mm_region* region)
+{
+    __free_hierarchy(vm_mnt, region->start, 0);
+}
+
 static void
 vmrfree(ptr_t vm_mnt, struct mm_region* region)
 {
@@ -143,7 +191,7 @@ vmrfree(ptr_t vm_mnt, struct mm_region* region)
     src  = mkptep_va(vm_mnt, region->start);
     end  = mkptep_va(vm_mnt, region->end);
 
-    level = __descend(0, vm_mnt, loc, false);
+    level = __descend(vm_mnt, vm_mnt, loc, false);
 
     while (src < end)
     {
@@ -161,6 +209,8 @@ vmrfree(ptr_t vm_mnt, struct mm_region* region)
             continue;
         }
 
+        set_pte(src, null_pte);
+        
         if (pte_isloaded(pte)) {
             leaflet = pte_leaflet_aligned(pte);
             leaflet_return(leaflet);
@@ -171,10 +221,7 @@ vmrfree(ptr_t vm_mnt, struct mm_region* region)
     cont:
         while (ptep_vfn(src) == MAX_PTEN - 1) {
             src = ptep_step_out(src);
-            leaflet = pte_leaflet_aligned(pte_at(src));
-            
-            assert(leaflet_order(leaflet) == 0);
-            leaflet_return(leaflet);
+            free_pagetable_trace(src, pte_at(src), level);
             
             level--;
         }
@@ -232,7 +279,7 @@ vmscpy(struct proc_mm* dest_mm, struct proc_mm* src_mm)
     pte_t* ptep_smx     = mkl1tep_va(VMS_SELF, dest_mnt);
     pte_t  pte_sms      = mkpte_prot(KERNEL_PGTAB);
 
-    pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0);
+    pte_sms = alloc_pagetable_trace(ptep_ssm, pte_sms, 0, 0);
     set_pte(&ptep_smx[VMS_SELF_L0TI], pte_sms);
     
     tlb_flush_kernel((ptr_t)dest_mnt);
@@ -259,22 +306,28 @@ static void
 vmsfree(struct proc_mm* mm)
 {
     struct leaflet* leaflet;
+    struct mm_region *pos, *n;
     ptr_t vm_mnt;
     pte_t* ptep_self;
     
     vm_mnt    = mm->vm_mnt;
     ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF));
 
-    struct mm_region *pos, *n;
+    // first pass: free region mappings
     llist_for_each(pos, n, &mm->regions, head)
     {
         vmrfree(vm_mnt, pos);
     }
 
+    // second pass: free the hierarchical 
+    llist_for_each(pos, n, &mm->regions, head)
+    {
+        vmrfree_hierachy(vm_mnt, pos);
+    }
+
     procvm_unlink_kernel();
 
-    leaflet = pte_leaflet_aligned(pte_at(ptep_self));
-    leaflet_return(leaflet);
+    free_pagetable_trace(ptep_self, pte_at(ptep_self), 0);
 }
 
 static inline void
@@ -297,6 +350,12 @@ __detach_from_current_vms(struct proc_mm* guest_mm)
     }
 }
 
+void
+procvm_prune_vmr(ptr_t vm_mnt, struct mm_region* region)
+{
+    vmrfree(vm_mnt, region);
+    vmrfree_hierachy(vm_mnt, region);
+}
 
 void
 procvm_dupvms_mount(struct proc_mm* mm) {
@@ -366,13 +425,19 @@ void
 procvm_unmount_release(struct proc_mm* mm) {
     ptr_t vm_mnt = mm->vm_mnt;
     struct mm_region *pos, *n;
+
     llist_for_each(pos, n, &mm->regions, head)
     {
         mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
-        region_release(pos);
     }
 
     vmsfree(mm);
+
+    llist_for_each(pos, n, &mm->regions, head)
+    {
+        region_release(pos);
+    }
+
     vms_unmount(vm_mnt);
     vfree(mm);
 
index d2baa352b05ee81f231b0739c75caa022ec175b4..bab5b4fd1dd4d405eaaff845aebc61622edbacfc 100644 (file)
@@ -91,6 +91,8 @@ __vfree(void* ptr, struct cake_pile** segregate_list, size_t len)
             return;
         }
     }
+
+    fail("this is really not a piece of cake");
 }
 
 void*
index 308a9808f8191925d21895b45cc85bfb661ac4eb..209a4c50414badbcc55257fbb6cf39c54b5ca116 100644 (file)
@@ -4,6 +4,9 @@ from ..structs.pmem import PMem
 from ..pp import MyPrettyPrinter
 import math
 
+ENTER_CONTIG = 0
+LEAVE_CONTIG = 1
+
 class PhysicalMemProfile:
     def __init__(self) -> None:
         super().__init__()
@@ -22,18 +25,29 @@ class PhysicalMemProfile:
         remainder = self.max_mem_pg % self.__mem_distr_granule
         bucket = 0
         non_contig = 0
-        last_contig = False
+        contig_state = LEAVE_CONTIG
+        new_state = LEAVE_CONTIG
 
-        for i in range(self.max_mem_pg):
+        i = 0
+        while i < self.max_mem_pg:
             element = PageStruct(pplist[i].address)
-            bucket += int(element.busy())
-            if last_contig:
-                last_contig = element.busy()
-                non_contig += int(not last_contig)
-            else:
-                last_contig = element.busy()
-
-            if (i + 1) % page_per_granule == 0:
+
+            nr_pgs = 1
+            if element.lead_page():
+                nr_pgs = 1 << element.order
+                if element.busy():
+                    bucket += nr_pgs
+                    new_state = ENTER_CONTIG
+                else:
+                    new_state = LEAVE_CONTIG
+            
+            i += nr_pgs
+
+            if contig_state != new_state:
+                non_contig += int(new_state == LEAVE_CONTIG)
+                contig_state = new_state
+
+            if i % page_per_granule == 0:
                 self.mem_distr.append(bucket)
                 bucket = 0
         
index 6dc5bf98ff46236cd92f85ceae79895624a3a275..cbb17f5079f8d2143378a9072549df8d67e3ba3c 100644 (file)
@@ -9,6 +9,7 @@ class PageStruct(KernelStruct):
         self.flags = self._kstruct["flags"]
         self.order = self._kstruct["order"]
         self.pool = self._kstruct["pool"]
+        self.companion = self._kstruct["companion"]
 
     def uninitialized(self):
         return not (self.flags & 0b10)
@@ -20,8 +21,12 @@ class PageStruct(KernelStruct):
     
     def busy(self):
         return (not self.uninitialized()
+                and self.type != 0b1000
                 and self.ref > 0)
 
+    def lead_page(self):
+        return self.companion == 0 and not self.uninitialized()
+
     @staticmethod
     def get_type() -> Type:
         return lookup_type("struct ppage").pointer()
index f52ed94e69afaa7323241f581bdc32085bc1b55e..33f2aaa8a38fa0e289edb659907a65416cd0432b 100644 (file)
@@ -1,6 +1,6 @@
 {
     "arch": "$ARCH",
-    "memory": "1G",
+    "memory": "32M",
     "ncpu": 1,
     "machine": "q35",
     "cpu": {