From e2994a5332194a132c60db35c55a006bdd1f6566 Mon Sep 17 00:00:00 2001 From: Lunaixsky Date: Thu, 6 Feb 2025 06:05:22 +0000 Subject: [PATCH] fix leakage on mid-level page table when freeing vms * fix the mm stats in lunadbg to exclude reserved page and account for composite pages * reduce the qemu memory config to 32M for faster mm stats while makeing it sufficient amount of avaliable memory --- lunaix-os/includes/lunaix/mm/procvm.h | 3 + lunaix-os/includes/lunaix/mm/region.h | 10 +++ lunaix-os/kernel/mm/mmap.c | 4 +- lunaix-os/kernel/mm/pmalloc_simple.c | 1 + lunaix-os/kernel/mm/procvm.c | 89 ++++++++++++++++--- lunaix-os/kernel/mm/valloc.c | 2 + .../scripts/gdb/lunadbg/profiling/pmstat.py | 34 ++++--- lunaix-os/scripts/gdb/lunadbg/structs/page.py | 5 ++ lunaix-os/scripts/qemus/qemu_x86_dev.json | 2 +- 9 files changed, 124 insertions(+), 26 deletions(-) diff --git a/lunaix-os/includes/lunaix/mm/procvm.h b/lunaix-os/includes/lunaix/mm/procvm.h index 82a2de3..8eb81dd 100644 --- a/lunaix-os/includes/lunaix/mm/procvm.h +++ b/lunaix-os/includes/lunaix/mm/procvm.h @@ -49,6 +49,9 @@ struct proc_mm struct proc_mm* procvm_create(struct proc_info* proc); +void +procvm_prune_vmr(ptr_t vm_mnt, struct mm_region* region); + /** * @brief Initialize and mount the vm of `proc` to duplication of current process * diff --git a/lunaix-os/includes/lunaix/mm/region.h b/lunaix-os/includes/lunaix/mm/region.h index c15f1c3..34e517e 100644 --- a/lunaix-os/includes/lunaix/mm/region.h +++ b/lunaix-os/includes/lunaix/mm/region.h @@ -40,6 +40,16 @@ writable_region(struct mm_region* mm) { return !!(mm->attr & (REGION_RSHARED | REGION_WRITE)); } +static inline bool +readable_region(struct mm_region* mm) { + return !!(mm->attr & (REGION_RSHARED | REGION_READ)); +} + +static inline bool +executable_region(struct mm_region* mm) { + return !!(mm->attr & REGION_EXEC); +} + static inline bool shared_writable_region(struct mm_region* mm) { return !!(mm->attr & REGION_WSHARED); diff --git a/lunaix-os/kernel/mm/mmap.c b/lunaix-os/kernel/mm/mmap.c index 493d26c..ff9e538 100644 --- a/lunaix-os/kernel/mm/mmap.c +++ b/lunaix-os/kernel/mm/mmap.c @@ -349,9 +349,7 @@ mem_unmap_region(ptr_t mnt, struct mm_region* region) pfn_t pglen = leaf_count(region->end - region->start); mem_sync_pages(mnt, region, region->start, pglen * PAGE_SIZE, 0); - pte_t* ptep = mkptep_va(mnt, region->start); - __remove_ranged_mappings(ptep, pglen); - + procvm_prune_vmr(mnt, region); tlb_flush_vmr_all(region); llist_delete(®ion->head); diff --git a/lunaix-os/kernel/mm/pmalloc_simple.c b/lunaix-os/kernel/mm/pmalloc_simple.c index 868c470..93fae04 100644 --- a/lunaix-os/kernel/mm/pmalloc_simple.c +++ b/lunaix-os/kernel/mm/pmalloc_simple.c @@ -127,6 +127,7 @@ pmm_looknext(struct pmem_pool* pool, size_t order) page->order = order; page->companion = i; page->pool = pool->type; + page->refs = 0; llist_init_head(&page->sibs); __set_page_initialized(page); } diff --git a/lunaix-os/kernel/mm/procvm.c b/lunaix-os/kernel/mm/procvm.c index 31d9f3f..88f512c 100644 --- a/lunaix-os/kernel/mm/procvm.c +++ b/lunaix-os/kernel/mm/procvm.c @@ -4,11 +4,25 @@ #include #include #include +#include #include #include +#define alloc_pagetable_trace(ptep, pte, ord, level) \ + ({ \ + alloc_kpage_at(ptep, pte, ord); \ + }) + +#define free_pagetable_trace(ptep, pte, level) \ + ({ \ + struct leaflet* leaflet = pte_leaflet_aligned(pte); \ + assert(leaflet_order(leaflet) == 0); \ + leaflet_return(leaflet); \ + set_pte(ptep, null_pte); \ + }) + struct proc_mm* procvm_create(struct proc_info* proc) { struct proc_mm* mm = vzalloc(sizeof(struct proc_mm)); @@ -46,7 +60,7 @@ __descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc) } if (alloc && pte_isnull(pte_at(dest))) { - alloc_kpage_at(dest, pte, 0); + alloc_pagetable_trace(dest, pte, 0, i); } i++; @@ -55,6 +69,34 @@ __descend(ptr_t dest_mnt, ptr_t src_mnt, ptr_t va, bool alloc) return i; } +static void +__free_hierarchy(ptr_t mnt, ptr_t va, int level) +{ + pte_t pte, *ptep, *ptep_next; + + if (pt_last_level(level)) { + return; + } + + __free_hierarchy(mnt, va, level + 1); + + ptep = mklntep_va(level, mnt, va); + pte = pte_at(ptep); + if (pte_isnull(pte)) { + return; + } + + ptep_next = ptep_step_into(ptep); + for (unsigned i = 0; i < LEVEL_SIZE; i++, ptep_next++) + { + if (!pte_isnull(pte_at(ptep_next))) { + return; + } + } + + free_pagetable_trace(ptep, pte, level); +} + static inline void copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level) { @@ -77,7 +119,7 @@ copy_leaf(pte_t* dest, pte_t* src, pte_t pte, int level) static inline void copy_root(pte_t* dest, pte_t* src, pte_t pte, int level) { - alloc_kpage_at(dest, pte, 0); + alloc_pagetable_trace(dest, pte, 0, level); } static void @@ -131,6 +173,12 @@ vmrcpy(ptr_t dest_mnt, ptr_t src_mnt, struct mm_region* region) } } +static inline void +vmrfree_hierachy(ptr_t vm_mnt, struct mm_region* region) +{ + __free_hierarchy(vm_mnt, region->start, 0); +} + static void vmrfree(ptr_t vm_mnt, struct mm_region* region) { @@ -143,7 +191,7 @@ vmrfree(ptr_t vm_mnt, struct mm_region* region) src = mkptep_va(vm_mnt, region->start); end = mkptep_va(vm_mnt, region->end); - level = __descend(0, vm_mnt, loc, false); + level = __descend(vm_mnt, vm_mnt, loc, false); while (src < end) { @@ -161,6 +209,8 @@ vmrfree(ptr_t vm_mnt, struct mm_region* region) continue; } + set_pte(src, null_pte); + if (pte_isloaded(pte)) { leaflet = pte_leaflet_aligned(pte); leaflet_return(leaflet); @@ -171,10 +221,7 @@ vmrfree(ptr_t vm_mnt, struct mm_region* region) cont: while (ptep_vfn(src) == MAX_PTEN - 1) { src = ptep_step_out(src); - leaflet = pte_leaflet_aligned(pte_at(src)); - - assert(leaflet_order(leaflet) == 0); - leaflet_return(leaflet); + free_pagetable_trace(src, pte_at(src), level); level--; } @@ -232,7 +279,7 @@ vmscpy(struct proc_mm* dest_mm, struct proc_mm* src_mm) pte_t* ptep_smx = mkl1tep_va(VMS_SELF, dest_mnt); pte_t pte_sms = mkpte_prot(KERNEL_PGTAB); - pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0); + pte_sms = alloc_pagetable_trace(ptep_ssm, pte_sms, 0, 0); set_pte(&ptep_smx[VMS_SELF_L0TI], pte_sms); tlb_flush_kernel((ptr_t)dest_mnt); @@ -259,22 +306,28 @@ static void vmsfree(struct proc_mm* mm) { struct leaflet* leaflet; + struct mm_region *pos, *n; ptr_t vm_mnt; pte_t* ptep_self; vm_mnt = mm->vm_mnt; ptep_self = mkl0tep(mkptep_va(vm_mnt, VMS_SELF)); - struct mm_region *pos, *n; + // first pass: free region mappings llist_for_each(pos, n, &mm->regions, head) { vmrfree(vm_mnt, pos); } + // second pass: free the hierarchical + llist_for_each(pos, n, &mm->regions, head) + { + vmrfree_hierachy(vm_mnt, pos); + } + procvm_unlink_kernel(); - leaflet = pte_leaflet_aligned(pte_at(ptep_self)); - leaflet_return(leaflet); + free_pagetable_trace(ptep_self, pte_at(ptep_self), 0); } static inline void @@ -297,6 +350,12 @@ __detach_from_current_vms(struct proc_mm* guest_mm) } } +void +procvm_prune_vmr(ptr_t vm_mnt, struct mm_region* region) +{ + vmrfree(vm_mnt, region); + vmrfree_hierachy(vm_mnt, region); +} void procvm_dupvms_mount(struct proc_mm* mm) { @@ -366,13 +425,19 @@ void procvm_unmount_release(struct proc_mm* mm) { ptr_t vm_mnt = mm->vm_mnt; struct mm_region *pos, *n; + llist_for_each(pos, n, &mm->regions, head) { mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0); - region_release(pos); } vmsfree(mm); + + llist_for_each(pos, n, &mm->regions, head) + { + region_release(pos); + } + vms_unmount(vm_mnt); vfree(mm); diff --git a/lunaix-os/kernel/mm/valloc.c b/lunaix-os/kernel/mm/valloc.c index d2baa35..bab5b4f 100644 --- a/lunaix-os/kernel/mm/valloc.c +++ b/lunaix-os/kernel/mm/valloc.c @@ -91,6 +91,8 @@ __vfree(void* ptr, struct cake_pile** segregate_list, size_t len) return; } } + + fail("this is really not a piece of cake"); } void* diff --git a/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py b/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py index 308a980..209a4c5 100644 --- a/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py +++ b/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py @@ -4,6 +4,9 @@ from ..structs.pmem import PMem from ..pp import MyPrettyPrinter import math +ENTER_CONTIG = 0 +LEAVE_CONTIG = 1 + class PhysicalMemProfile: def __init__(self) -> None: super().__init__() @@ -22,18 +25,29 @@ class PhysicalMemProfile: remainder = self.max_mem_pg % self.__mem_distr_granule bucket = 0 non_contig = 0 - last_contig = False + contig_state = LEAVE_CONTIG + new_state = LEAVE_CONTIG - for i in range(self.max_mem_pg): + i = 0 + while i < self.max_mem_pg: element = PageStruct(pplist[i].address) - bucket += int(element.busy()) - if last_contig: - last_contig = element.busy() - non_contig += int(not last_contig) - else: - last_contig = element.busy() - - if (i + 1) % page_per_granule == 0: + + nr_pgs = 1 + if element.lead_page(): + nr_pgs = 1 << element.order + if element.busy(): + bucket += nr_pgs + new_state = ENTER_CONTIG + else: + new_state = LEAVE_CONTIG + + i += nr_pgs + + if contig_state != new_state: + non_contig += int(new_state == LEAVE_CONTIG) + contig_state = new_state + + if i % page_per_granule == 0: self.mem_distr.append(bucket) bucket = 0 diff --git a/lunaix-os/scripts/gdb/lunadbg/structs/page.py b/lunaix-os/scripts/gdb/lunadbg/structs/page.py index 6dc5bf9..cbb17f5 100644 --- a/lunaix-os/scripts/gdb/lunadbg/structs/page.py +++ b/lunaix-os/scripts/gdb/lunadbg/structs/page.py @@ -9,6 +9,7 @@ class PageStruct(KernelStruct): self.flags = self._kstruct["flags"] self.order = self._kstruct["order"] self.pool = self._kstruct["pool"] + self.companion = self._kstruct["companion"] def uninitialized(self): return not (self.flags & 0b10) @@ -20,8 +21,12 @@ class PageStruct(KernelStruct): def busy(self): return (not self.uninitialized() + and self.type != 0b1000 and self.ref > 0) + def lead_page(self): + return self.companion == 0 and not self.uninitialized() + @staticmethod def get_type() -> Type: return lookup_type("struct ppage").pointer() diff --git a/lunaix-os/scripts/qemus/qemu_x86_dev.json b/lunaix-os/scripts/qemus/qemu_x86_dev.json index f52ed94..33f2aaa 100644 --- a/lunaix-os/scripts/qemus/qemu_x86_dev.json +++ b/lunaix-os/scripts/qemus/qemu_x86_dev.json @@ -1,6 +1,6 @@ { "arch": "$ARCH", - "memory": "1G", + "memory": "32M", "ncpu": 1, "machine": "q35", "cpu": { -- 2.27.0