From c166bd62fbb907f95f79f621e2a2fb4fdde08e01 Mon Sep 17 00:00:00 2001 From: Lunaixsky Date: Sat, 2 Mar 2024 00:22:18 +0000 Subject: [PATCH] Unifying the Lunaix's Physical Memory Model (#28) * * Introduce new physical page model, which swap out the previous address based one, also support dynamically allocate required page list based on system mem map provided by upstream bootloader * Rename page alignment utils from pagetable.h for better expresivenness * Rewrite the next fit allocator with order-based free-list caching feature. (PMALLOC_SIMPLE) * Intented to add more advanced pmem allocator. * Add config.h file that provide finer control on the "hyperparameter" of Lunaix kernel (we should switch to tools like kconfig later) * * Introduce struct leaflet, which is a wrapper around struct ppage used to tackle the ambiguity of head or tail when struct ppage has a order > 0 base page compounded. * Refactoring done regarding to this new abstraction. * * Fix compiler-time errors * Remove the pmm_init_freeze_range api and init_begin/end things this forcing allocator to explicitly initialize entire pplist during a single pmm_init invoke * Address all issues found when doing smoking through bootstraping stage * * Fix issues discovered when preforming smoke test from bootstage to initd spawn. 1. pte skipped from copying for grouped pages. 2. symlink creation ignore the null terminator in ramfs 3. Add null-ptr termination on when user-provided envp and argc are absent (thus only default one get injected) 4. physical page does not get marked as initialized when allocated from uninitialized memory region 5. a typo cause vunmap accidentially remove L0T mapping 6. ahci: fis and cb region should not mapped through ioremap. * Need investigate: seems the physical page got smashed with each other when intensive alloc and freeing taking place. Could be issues within allocator * * Fix issue that dup_leaflet copy incomplete data when dealing with leaflet with order > 1. This due to lack of flush ranged tlb records * Fix a memory leakage on leaflet when releasing thread kstack * * Rework the tlb flushing functions, introduce the ability to respect different address space, this allow seamless porting to architecture with TLB ASID-tagging support to eliminate un-needed tlb flushing * * Remove obsoleted vmm api * * Move all kernel built-in stack to dedicated section * Add failsafe stack to run failsafe handler without need to worry current stack validity (as everything could be messed up when shit happened) * Add failsafe handler to gather diagnostic info and centralise stack trace printing, which is robost as it use an dedicated stack and all calling being inlined so avoid any stack operation before entering it * Add check on init stack smashing. * * (LunaDBG) update pmem profiling to reflect the latest changes --- lunaix-os/.vscode/c_cpp_properties.json | 3 +- lunaix-os/arch/i386/boot/kpt_setup.c | 5 + lunaix-os/arch/i386/boot/prologue.S | 20 +- .../arch/i386/exceptions/intr_routines.c | 15 +- lunaix-os/arch/i386/failsafe.S | 6 + lunaix-os/arch/i386/includes/sys/cpu.h | 36 --- lunaix-os/arch/i386/includes/sys/failsafe.h | 43 +++ lunaix-os/arch/i386/includes/sys/mm/mempart.h | 4 + lunaix-os/arch/i386/includes/sys/mm/mm_defs.h | 1 + .../arch/i386/includes/sys/mm/pagetable.h | 12 + .../arch/i386/includes/sys/mm/physical.h | 15 + lunaix-os/arch/i386/includes/sys/mm/tlb.h | 184 +++++++++++ lunaix-os/arch/i386/includes/sys/trace.h | 14 + lunaix-os/arch/i386/mm/pmm.c | 68 ++++ lunaix-os/arch/i386/mm/tlb.c | 34 ++ lunaix-os/arch/i386/mm/vmutils.c | 31 +- lunaix-os/config.h | 19 ++ lunaix-os/hal/ahci/ahci.c | 14 +- lunaix-os/includes/lunaix/boot_generic.h | 19 ++ lunaix-os/includes/lunaix/compiler.h | 3 + lunaix-os/includes/lunaix/failsafe.h | 9 + lunaix-os/includes/lunaix/mm/fault.h | 19 +- lunaix-os/includes/lunaix/mm/page.h | 299 ++++++++++++++++++ lunaix-os/includes/lunaix/mm/pagetable.h | 32 +- lunaix-os/includes/lunaix/mm/physical.h | 54 ++++ lunaix-os/includes/lunaix/mm/pmm.h | 233 ++++++++------ lunaix-os/includes/lunaix/mm/vmm.h | 129 +------- lunaix-os/includes/lunaix/mm/vmtlb.h | 8 + lunaix-os/includes/lunaix/process.h | 7 + lunaix-os/kernel.mk | 1 + lunaix-os/kernel/boot_helper.c | 39 +-- lunaix-os/kernel/debug/failsafe.c | 24 ++ lunaix-os/kernel/debug/trace.c | 9 +- lunaix-os/kernel/exe/elf32/ldelf32.c | 8 +- lunaix-os/kernel/exe/exec.c | 8 +- lunaix-os/kernel/fs/pcache.c | 17 +- lunaix-os/kernel/fs/ramfs/ramfs.c | 2 +- lunaix-os/kernel/kinit.c | 18 +- lunaix-os/kernel/lunad.c | 2 +- lunaix-os/kernel/mm/cake.c | 13 +- lunaix-os/kernel/mm/fault.c | 98 ++++-- lunaix-os/kernel/mm/mmap.c | 61 ++-- lunaix-os/kernel/mm/mmio.c | 31 +- lunaix-os/kernel/mm/page.c | 23 ++ lunaix-os/kernel/mm/pmalloc_buddy.c | 9 + lunaix-os/kernel/mm/pmalloc_ncontig.c | 9 + lunaix-os/kernel/mm/pmalloc_simple.c | 204 ++++++++++++ lunaix-os/kernel/mm/pmm.c | 197 ++++-------- lunaix-os/kernel/mm/pmm_internal.h | 29 ++ lunaix-os/kernel/mm/procvm.c | 64 ++-- lunaix-os/kernel/mm/region.c | 2 +- lunaix-os/kernel/mm/vmap.c | 31 +- lunaix-os/kernel/mm/vmm.c | 64 +--- lunaix-os/kernel/process/fork.c | 22 +- lunaix-os/kernel/process/process.c | 2 +- lunaix-os/kernel/process/thread.c | 19 +- lunaix-os/kernel/spike.c | 6 +- lunaix-os/link/linker.ld | 9 + .../scripts/gdb/lunadbg/profiling/pmstat.py | 15 +- lunaix-os/scripts/gdb/lunadbg/structs/page.py | 21 +- lunaix-os/scripts/gdb/lunadbg/structs/pmem.py | 16 + lunaix-os/scripts/templates/i386/config.json | 3 + 62 files changed, 1744 insertions(+), 668 deletions(-) create mode 100644 lunaix-os/arch/i386/failsafe.S create mode 100644 lunaix-os/arch/i386/includes/sys/failsafe.h create mode 100644 lunaix-os/arch/i386/includes/sys/mm/physical.h create mode 100644 lunaix-os/arch/i386/includes/sys/mm/tlb.h create mode 100644 lunaix-os/arch/i386/includes/sys/trace.h create mode 100644 lunaix-os/arch/i386/mm/pmm.c create mode 100644 lunaix-os/arch/i386/mm/tlb.c create mode 100644 lunaix-os/config.h create mode 100644 lunaix-os/includes/lunaix/failsafe.h create mode 100644 lunaix-os/includes/lunaix/mm/page.h create mode 100644 lunaix-os/includes/lunaix/mm/physical.h create mode 100644 lunaix-os/includes/lunaix/mm/vmtlb.h create mode 100644 lunaix-os/kernel/debug/failsafe.c create mode 100644 lunaix-os/kernel/mm/page.c create mode 100644 lunaix-os/kernel/mm/pmalloc_buddy.c create mode 100644 lunaix-os/kernel/mm/pmalloc_ncontig.c create mode 100644 lunaix-os/kernel/mm/pmalloc_simple.c create mode 100644 lunaix-os/kernel/mm/pmm_internal.h create mode 100644 lunaix-os/scripts/gdb/lunadbg/structs/pmem.py diff --git a/lunaix-os/.vscode/c_cpp_properties.json b/lunaix-os/.vscode/c_cpp_properties.json index 3718f2b..d3390d0 100644 --- a/lunaix-os/.vscode/c_cpp_properties.json +++ b/lunaix-os/.vscode/c_cpp_properties.json @@ -12,7 +12,8 @@ "-ffreestanding", "-D__ARCH__=i386", "-D__LUNAIXOS_DEBUG__", - "-include flags.h" + "-include flags.h", + "-include config.h" ], "defines": [], "compilerPath": "${HOME}/opt/i686-gcc-12/bin/i686-elf-gcc", diff --git a/lunaix-os/arch/i386/boot/kpt_setup.c b/lunaix-os/arch/i386/boot/kpt_setup.c index 26a348d..c353d77 100644 --- a/lunaix-os/arch/i386/boot/kpt_setup.c +++ b/lunaix-os/arch/i386/boot/kpt_setup.c @@ -17,6 +17,7 @@ extern u8_t __kboot_end[]; // define the initial page table layout struct kernel_map { pte_t l0t[_PAGE_LEVEL_SIZE]; + pte_t pg_mnt[_PAGE_LEVEL_SIZE]; struct { pte_t _lft[_PAGE_LEVEL_SIZE]; @@ -86,6 +87,10 @@ _init_page() // XXX: Mapping the kernel .rodata section? + // set mount point + pte_t* kmntep = (pte_t*) &kpt_pa->l0t[pfn_at(PG_MOUNT_1, L0T_SIZE)]; + set_pte(kmntep, mkpte((ptr_t)kpt_pa->pg_mnt, KERNEL_DATA)); + // Build up self-reference pte = mkpte_root((ptr_t)kpt_pa, KERNEL_DATA); set_pte(boot_l0tep + _PAGE_LEVEL_MASK, pte); diff --git a/lunaix-os/arch/i386/boot/prologue.S b/lunaix-os/arch/i386/boot/prologue.S index f8d59ce..dc17242 100644 --- a/lunaix-os/arch/i386/boot/prologue.S +++ b/lunaix-os/arch/i386/boot/prologue.S @@ -2,10 +2,14 @@ #define __ASM__ #include +#include -.section .bss +.section .bss.kstack + .global __kinit_stack_end + .align 16 - .skip 2048, 0 + __kinit_stack_end: + .skip 2048, 0 __kinit_stack_top: # TODO # This stack was too small that corrupt the ambient kernel structures. @@ -22,11 +26,15 @@ */ movl $__kinit_stack_top, %esp andl $stack_alignment, %esp + + movl $__kinit_stack_end, %eax + movl $STACK_SANITY, (%eax) + movl $STACK_SANITY, 4(%eax) + movl $STACK_SANITY, 8(%eax) + movl $STACK_SANITY, 12(%eax) + subl $16, %esp - /* - 最终还是决定将IDT&GDT的初始化和安装放在这里 - 注意:由于已开启分页,GDTR与IDTR里面放的不是物理地址,是线性地址! - */ + /* 加载 GDT P.s. 虽然GDT在分页后已变得不重要,甚至可以忽略不作。但为了保持完整性,还是选择加载他 diff --git a/lunaix-os/arch/i386/exceptions/intr_routines.c b/lunaix-os/arch/i386/exceptions/intr_routines.c index 91366bf..b93e679 100644 --- a/lunaix-os/arch/i386/exceptions/intr_routines.c +++ b/lunaix-os/arch/i386/exceptions/intr_routines.c @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -23,39 +24,31 @@ void __print_panic_msg(const char* msg, const isr_param* param) { ERROR("panic: %s", msg); - trace_printstack_isr(param); + failsafe_diagnostic(); } void intr_routine_divide_zero(const isr_param* param) { __print_panic_msg("div zero", param); - - spin(); } void intr_routine_general_protection(const isr_param* param) { __print_panic_msg("general protection", param); - - spin(); } void intr_routine_sys_panic(const isr_param* param) { __print_panic_msg((char*)(param->registers.edi), param); - - spin(); } void intr_routine_fallback(const isr_param* param) { __print_panic_msg("unknown interrupt", param); - - spin(); } /** @@ -76,9 +69,7 @@ intr_routine_apic_error(const isr_param* param) char buf[32]; ksprintf(buf, "APIC error, ESR=0x%x", error_reg); - __print_panic_msg(buf, param); - - spin(); + failsafe_diagnostic(); } void diff --git a/lunaix-os/arch/i386/failsafe.S b/lunaix-os/arch/i386/failsafe.S new file mode 100644 index 0000000..6f311e6 --- /dev/null +++ b/lunaix-os/arch/i386/failsafe.S @@ -0,0 +1,6 @@ + +.section .bss.kstack + .skip 1024 + .align 16 + .global failsafe_stack_top + failsafe_stack_top: diff --git a/lunaix-os/arch/i386/includes/sys/cpu.h b/lunaix-os/arch/i386/includes/sys/cpu.h index a4be798..d8b8a23 100644 --- a/lunaix-os/arch/i386/includes/sys/cpu.h +++ b/lunaix-os/arch/i386/includes/sys/cpu.h @@ -57,19 +57,6 @@ cpu_chconfig(u32_t val) asm("mov %0, %%cr0" ::"r"(val)); } -/** - * @brief Load current virtual memory space - * - * @return u32_t - */ -static inline u32_t -cpu_ldvmspace() -{ - ptr_t val; - asm volatile("movl %%cr3,%0" : "=r"(val)); - return val; -} - /** * @brief Change current virtual memory space * @@ -81,29 +68,6 @@ cpu_chvmspace(u32_t val) asm("mov %0, %%cr3" ::"r"(val)); } -/** - * @brief Flush a certain TLB record - * - * @return u32_t - */ -static inline void -cpu_flush_page(ptr_t va) -{ - asm volatile("invlpg (%0)" ::"r"(va) : "memory"); -} - -/** - * @brief Flush entire TLB - * - */ -static inline void -cpu_flush_vmspace() -{ - asm("movl %%cr3, %%eax\n" - "movl %%eax, %%cr3" :: - : "eax"); -} - static inline void cpu_enable_interrupt() { diff --git a/lunaix-os/arch/i386/includes/sys/failsafe.h b/lunaix-os/arch/i386/includes/sys/failsafe.h new file mode 100644 index 0000000..149efd2 --- /dev/null +++ b/lunaix-os/arch/i386/includes/sys/failsafe.h @@ -0,0 +1,43 @@ +#ifndef __LUNAIX_ARCH_FAILSAFE_H +#define __LUNAIX_ARCH_FAILSAFE_H + +#define STACK_SANITY 0xbeefc0de + +#ifndef __ASM__ + +#include + +static inline bool +check_bootstack_sanity() +{ + extern unsigned int __kinit_stack_end[]; + + return ( __kinit_stack_end[0] + | __kinit_stack_end[1] + | __kinit_stack_end[2] + | __kinit_stack_end[3]) == STACK_SANITY; +} + +static inline void must_inline noret +failsafe_diagnostic() { + // asm ("jmp __fatal_state"); + extern int failsafe_stack_top[]; + asm ( + "movl %%esp, %%eax\n" + "movl %%ebp, %%ebx\n" + + "movl %0, %%esp\n" + + "pushl %%eax\n" + "pushl %%ebx\n" + + "call do_failsafe_unrecoverable\n" + ::"r"(failsafe_stack_top) + :"memory" + ); + unreachable; +} + +#endif + +#endif /* __LUNAIX_FAILSAFE_H */ diff --git a/lunaix-os/arch/i386/includes/sys/mm/mempart.h b/lunaix-os/arch/i386/includes/sys/mm/mempart.h index d923d45..b604265 100644 --- a/lunaix-os/arch/i386/includes/sys/mm/mempart.h +++ b/lunaix-os/arch/i386/includes/sys/mm/mempart.h @@ -45,6 +45,10 @@ #define PG_MOUNT_4_SIZE 0x1000UL #define PG_MOUNT_4_END 0xc4003fffUL +#define PG_MOUNT_VAR 0xc4004000UL +#define PG_MOUNT_VAR_SIZE 0x3fc000UL +#define PG_MOUNT_VAR_END 0xc43fffffUL + #define VMAP 0xc4400000UL #define VMAP_SIZE 0x3b400000UL #define VMAP_END 0xff7fffffUL diff --git a/lunaix-os/arch/i386/includes/sys/mm/mm_defs.h b/lunaix-os/arch/i386/includes/sys/mm/mm_defs.h index 8dd7df6..5c098d3 100644 --- a/lunaix-os/arch/i386/includes/sys/mm/mm_defs.h +++ b/lunaix-os/arch/i386/includes/sys/mm/mm_defs.h @@ -1,6 +1,7 @@ #ifndef __LUNAIX_MM_DEFS_H #define __LUNAIX_MM_DEFS_H + #include "mempart.h" #include "pagetable.h" diff --git a/lunaix-os/arch/i386/includes/sys/mm/pagetable.h b/lunaix-os/arch/i386/includes/sys/mm/pagetable.h index 4fd9439..2b188cd 100644 --- a/lunaix-os/arch/i386/includes/sys/mm/pagetable.h +++ b/lunaix-os/arch/i386/includes/sys/mm/pagetable.h @@ -150,12 +150,24 @@ pte_setpaddr(pte_t pte, ptr_t paddr) return __mkpte_from((pte.val & _PAGE_BASE_MASK) | (paddr & ~_PAGE_BASE_MASK)); } +static inline pte_t +pte_setppfn(pte_t pte, pfn_t ppfn) +{ + return __mkpte_from((pte.val & _PAGE_BASE_MASK) | (ppfn * PAGE_SIZE)); +} + static inline ptr_t pte_paddr(pte_t pte) { return pte.val & ~_PAGE_BASE_MASK; } +static inline pfn_t +pte_ppfn(pte_t pte) +{ + return pte.val >> _PAGE_BASE_SHIFT; +} + static inline pte_t pte_setprot(pte_t pte, ptr_t prot) { diff --git a/lunaix-os/arch/i386/includes/sys/mm/physical.h b/lunaix-os/arch/i386/includes/sys/mm/physical.h new file mode 100644 index 0000000..88f59ad --- /dev/null +++ b/lunaix-os/arch/i386/includes/sys/mm/physical.h @@ -0,0 +1,15 @@ +#ifndef __LUNAIX_ARCH_PHYSICAL_H +#define __LUNAIX_ARCH_PHYSICAL_H + +#include +#include "mm_defs.h" + +#define MAX_GROUP_PAGE_SIZE ( 0x8000 ) +#define PPLIST_STARTVM VMAP + +struct ppage_arch +{ + +}; + +#endif /* __LUNAIX_ARCH_PHYSICAL_H */ diff --git a/lunaix-os/arch/i386/includes/sys/mm/tlb.h b/lunaix-os/arch/i386/includes/sys/mm/tlb.h new file mode 100644 index 0000000..7e3b65a --- /dev/null +++ b/lunaix-os/arch/i386/includes/sys/mm/tlb.h @@ -0,0 +1,184 @@ +#ifndef __LUNAIX_ARCH_TLB_H +#define __LUNAIX_ARCH_TLB_H + +#include +#include +#include + +/** + * @brief Invalidate an entry of all address space + * + * @param va + */ +static inline void must_inline +__tlb_invalidate(ptr_t va) +{ + asm volatile("invlpg (%0)" ::"r"(va) : "memory"); +} + +/** + * @brief Invalidate an entry of an address space indetified + * by ASID + * + * @param va + */ +static inline void must_inline +__tlb_flush_asid(unsigned int asid, ptr_t va) +{ + // not supported on x86_32 + asm volatile("invlpg (%0)" ::"r"(va) : "memory"); +} + +/** + * @brief Invalidate an entry of global address space + * + * @param va + */ +static inline void must_inline +__tlb_flush_global(ptr_t va) +{ + // not supported on x86_32 + asm volatile("invlpg (%0)" ::"r"(va) : "memory"); +} + +/** + * @brief Invalidate an entire TLB + * + * @param va + */ +static inline void must_inline +__tlb_flush_all() +{ + asm volatile( + "movl %%cr3, %%eax\n" + "movl %%eax, %%cr3" + :::"eax" + ); +} + +/** + * @brief Invalidate an entire address space + * + * @param va + */ +static inline void must_inline +__tlb_flush_asid_all(unsigned int asid) +{ + // not supported on x86_32 + __tlb_flush_all(); +} + + +/** + * @brief Invalidate entries of all address spaces + * + * @param asid + * @param addr + * @param npages + */ +static inline void +tlb_flush_range(ptr_t addr, unsigned int npages) +{ + for (unsigned int i = 0; i < npages; i++) + { + __tlb_invalidate(addr + i * PAGE_SIZE); + } +} + +/** + * @brief Invalidate entries of an address space identified + * by ASID + * + * @param asid + * @param addr + * @param npages + */ +static inline void +tlb_flush_asid_range(unsigned int asid, ptr_t addr, unsigned int npages) +{ + for (unsigned int i = 0; i < npages; i++) + { + __tlb_flush_asid(asid, addr + i * PAGE_SIZE); + } +} + +/** + * @brief Invalidate an entry of kernel address spaces + * + * @param asid + * @param addr + * @param npages + */ +static inline void +tlb_flush_kernel(ptr_t addr) +{ + __tlb_flush_global(addr); +} + +/** + * @brief Invalidate entries of kernel address spaces + * + * @param asid + * @param addr + * @param npages + */ +static inline void +tlb_flush_kernel_ranged(ptr_t addr, unsigned int npages) +{ + for (unsigned int i = 0; i < npages; i++) + { + tlb_flush_kernel(addr + i * PAGE_SIZE); + } +} + +/** + * @brief Invalidate an entry within a process memory space + * + * @param asid + * @param addr + * @param npages + */ +void +tlb_flush_mm(struct proc_mm* mm, ptr_t addr); + +/** + * @brief Invalidate entries within a process memory space + * + * @param asid + * @param addr + * @param npages + */ +void +tlb_flush_mm_range(struct proc_mm* mm, ptr_t addr, unsigned int npages); + +/** + * @brief Invalidate an entry within a vm region + * + * @param asid + * @param addr + * @param npages + */ +void +tlb_flush_vmr(struct mm_region* vmr, ptr_t va); + +/** + * @brief Invalidate all entries within a vm region + * + * @param asid + * @param addr + * @param npages + */ +void +tlb_flush_vmr_all(struct mm_region* vmr); + +/** + * @brief Invalidate entries within a vm region + * + * @param asid + * @param addr + * @param npages + */ +void +tlb_flush_vmr_range(struct mm_region* vmr, ptr_t addr, unsigned int npages); + +#endif /* __LUNAIX_VMTLB_H */ diff --git a/lunaix-os/arch/i386/includes/sys/trace.h b/lunaix-os/arch/i386/includes/sys/trace.h new file mode 100644 index 0000000..fb3443d --- /dev/null +++ b/lunaix-os/arch/i386/includes/sys/trace.h @@ -0,0 +1,14 @@ +#ifndef __LUNAIX_ARCH_TRACE_H +#define __LUNAIX_ARCH_TRACE_H + +#include + +static inline bool +arch_valid_fp(ptr_t ptr) { + extern int __bsskstack_end[]; + extern int __bsskstack_start[]; + return ((ptr_t)__bsskstack_start <= ptr && ptr <= (ptr_t)__bsskstack_end); +} + + +#endif /* __LUNAIX_TRACE_H */ diff --git a/lunaix-os/arch/i386/mm/pmm.c b/lunaix-os/arch/i386/mm/pmm.c new file mode 100644 index 0000000..7a6ef29 --- /dev/null +++ b/lunaix-os/arch/i386/mm/pmm.c @@ -0,0 +1,68 @@ +#include +#include + +extern unsigned int __kexec_end[]; + +void +pmm_arch_init_pool(struct pmem* memory) +{ + pmm_declare_pool(POOL_UNIFIED, 1, memory->list_len); +} + +ptr_t +pmm_arch_init_remap(struct pmem* memory, struct boot_handoff* bctx) +{ + size_t ppfn_total = pfn(bctx->mem.size) + 1; + size_t pool_size = ppfn_total * sizeof(struct ppage); + + size_t i = 0; + struct boot_mmapent* ent; + for (; i < bctx->mem.mmap_len; i++) { + ent = &bctx->mem.mmap[i]; + if (free_memregion(ent) && ent->size > pool_size) { + goto found; + } + } + + // fail to find a viable free region to host pplist + return 0; + +found:; + ptr_t kexec_end = to_kphysical(__kexec_end); + ptr_t aligned_pplist = MAX(ent->start, kexec_end); + + // FIXME this is a temporary hack, we need a better way to convey + // the mem-map for us to settle the pplist safely + + for (i = 0; i mods.mods_num; i++) { + aligned_pplist = MAX(aligned_pplist, bctx->mods.entries[i].end); + } + + aligned_pplist = napot_upaligned(aligned_pplist, L0T_SIZE); + + if (aligned_pplist + pool_size > ent->start + ent->size) { + return 0; + } + + // for x86_32, the upper bound of memory requirement for pplist + // is sizeof(struct ppage) * 1MiB. For simplicity (as well as + // efficiency), we limit the granule to 4M huge page, thus, + // it will take away at least 4M worth of vm address resource + // regardless the actual physical memory size + + // anchor the pplist at vmap location (right after kernel) + memory->pplist = (struct ppage*)VMAP; + memory->list_len = ppfn_total; + + pfn_t nhuge = page_count(pool_size, L0T_SIZE); + pte_t* ptep = mkl0tep_va(VMS_SELF, VMAP); + pte_t pte = mkpte(aligned_pplist, KERNEL_DATA); + + vmm_set_ptes_contig(ptep, pte_mkhuge(pte), L0T_SIZE, nhuge); + tlb_flush_kernel(VMAP); + + // shift the actual vmap start address + vmap_set_start(VMAP + nhuge * L0T_SIZE); + + return aligned_pplist; +} \ No newline at end of file diff --git a/lunaix-os/arch/i386/mm/tlb.c b/lunaix-os/arch/i386/mm/tlb.c new file mode 100644 index 0000000..2ee5d48 --- /dev/null +++ b/lunaix-os/arch/i386/mm/tlb.c @@ -0,0 +1,34 @@ +#include +#include + +void +tlb_flush_mm(struct proc_mm* mm, ptr_t addr) +{ + __tlb_flush_asid(procvm_asid(mm), addr); +} + +void +tlb_flush_mm_range(struct proc_mm* mm, ptr_t addr, unsigned int npages) +{ + tlb_flush_asid_range(procvm_asid(mm), addr, npages); +} + + +void +tlb_flush_vmr(struct mm_region* vmr, ptr_t va) +{ + __tlb_flush_asid(procvm_asid(vmr->proc_vms), va); +} + +void +tlb_flush_vmr_all(struct mm_region* vmr) +{ + tlb_flush_asid_range(procvm_asid(vmr->proc_vms), + vmr->start, leaf_count(vmr->end - vmr->start)); +} + +void +tlb_flush_vmr_range(struct mm_region* vmr, ptr_t addr, unsigned int npages) +{ + tlb_flush_asid_range(procvm_asid(vmr->proc_vms), addr, npages); +} \ No newline at end of file diff --git a/lunaix-os/arch/i386/mm/vmutils.c b/lunaix-os/arch/i386/mm/vmutils.c index d7a98ea..c787892 100644 --- a/lunaix-os/arch/i386/mm/vmutils.c +++ b/lunaix-os/arch/i386/mm/vmutils.c @@ -1,23 +1,28 @@ -#include -#include +#include #include -ptr_t -vmm_dup_page(ptr_t pa) +struct leaflet* +dup_leaflet(struct leaflet* leaflet) { - ptr_t new_ppg = pmm_alloc_page(0); - mount_page(PG_MOUNT_3, new_ppg); - mount_page(PG_MOUNT_4, pa); + ptr_t dest_va, src_va; + struct leaflet* new_leaflet; + + new_leaflet = alloc_leaflet(leaflet_order(leaflet)); + + src_va = leaflet_mount(leaflet); + dest_va = vmap(new_leaflet, KERNEL_DATA); + + size_t cnt_wordsz = leaflet_size(new_leaflet) / sizeof(ptr_t); asm volatile("movl %1, %%edi\n" "movl %2, %%esi\n" - "rep movsl\n" ::"c"(1024), - "r"(PG_MOUNT_3), - "r"(PG_MOUNT_4) + "rep movsl\n" ::"c"(cnt_wordsz), + "r"(dest_va), + "r"(src_va) : "memory", "%edi", "%esi"); - unmount_page(PG_MOUNT_3); - unmount_page(PG_MOUNT_4); + leaflet_unmount(leaflet); + vunmap(dest_va, new_leaflet); - return new_ppg; + return new_leaflet; } \ No newline at end of file diff --git a/lunaix-os/config.h b/lunaix-os/config.h new file mode 100644 index 0000000..1d5386d --- /dev/null +++ b/lunaix-os/config.h @@ -0,0 +1,19 @@ +#ifndef __LUNAIX_CONFIG_H +#define __LUNAIX_CONFIG_H + +// #define CONFIG_PMALLOC_BUDDY +// #define CONFIG_PMALLOC_NCONTIG +#define CONFIG_PMALLOC_SIMPLE + +#define CONFIG_PMALLOC_SIMPLE_PO0_THRES 4096 +#define CONFIG_PMALLOC_SIMPLE_PO1_THRES 2048 +#define CONFIG_PMALLOC_SIMPLE_PO2_THRES 2048 +#define CONFIG_PMALLOC_SIMPLE_PO3_THRES 2048 +#define CONFIG_PMALLOC_SIMPLE_PO4_THRES 512 +#define CONFIG_PMALLOC_SIMPLE_PO5_THRES 512 +#define CONFIG_PMALLOC_SIMPLE_PO6_THRES 128 +#define CONFIG_PMALLOC_SIMPLE_PO7_THRES 128 +#define CONFIG_PMALLOC_SIMPLE_PO8_THRES 64 +#define CONFIG_PMALLOC_SIMPLE_PO9_THRES 16 + +#endif /* __LUNAIX_CONFIG_H */ diff --git a/lunaix-os/hal/ahci/ahci.c b/lunaix-os/hal/ahci/ahci.c index 0f1ebf3..11e1621 100644 --- a/lunaix-os/hal/ahci/ahci.c +++ b/lunaix-os/hal/ahci/ahci.c @@ -21,9 +21,8 @@ #include #include #include -#include #include -#include +#include #include #include @@ -128,16 +127,19 @@ ahci_driver_init(struct ahci_driver_param* param) __hba_reset_port(port_regs); #endif + struct leaflet* leaflet; if (!clbp) { // 每页最多4个命令队列 - clb_pa = pmm_alloc_page(PP_FGLOCKED); - clb_pg_addr = (ptr_t)ioremap(clb_pa, 0x1000); + leaflet = alloc_leaflet(0); + clb_pa = leaflet_addr(leaflet); + clb_pg_addr = vmap(leaflet, KERNEL_DATA); memset((void*)clb_pg_addr, 0, 0x1000); } if (!fisp) { // 每页最多16个FIS - fis_pa = pmm_alloc_page(PP_FGLOCKED); - fis_pg_addr = (ptr_t)ioremap(fis_pa, 0x1000); + leaflet = alloc_leaflet(0); + fis_pa = leaflet_addr(leaflet); + fis_pg_addr = vmap(leaflet, KERNEL_DATA); memset((void*)fis_pg_addr, 0, 0x1000); } diff --git a/lunaix-os/includes/lunaix/boot_generic.h b/lunaix-os/includes/lunaix/boot_generic.h index a229c45..dca7b9a 100644 --- a/lunaix-os/includes/lunaix/boot_generic.h +++ b/lunaix-os/includes/lunaix/boot_generic.h @@ -69,6 +69,25 @@ boot_end(struct boot_handoff*); void boot_cleanup(); + +static inline bool +free_memregion(struct boot_mmapent* mmapent) +{ + return !mmapent->type; +} + +static inline bool +reserved_memregion(struct boot_mmapent* mmapent) +{ + return !!(mmapent->type & BOOT_MMAP_RSVD); +} + +static inline bool +reclaimable_memregion(struct boot_mmapent* mmapent) +{ + return !!(mmapent->type & BOOT_MMAP_RCLM); +} + #endif #endif /* __LUNAIX_BOOT_GENERIC_H */ diff --git a/lunaix-os/includes/lunaix/compiler.h b/lunaix-os/includes/lunaix/compiler.h index a0b8374..78387bd 100644 --- a/lunaix-os/includes/lunaix/compiler.h +++ b/lunaix-os/includes/lunaix/compiler.h @@ -28,6 +28,9 @@ #define compact __attribute__((packed)) #define align(v) __attribute__((aligned (v))) +#define cacheline_size 64 +#define cacheline_align align(cacheline_size) + #define export_symbol(domain, namespace, symbol)\ typeof(symbol)* must_emit __SYMEXPORT_Z##domain##_N##namespace##_S##symbol = &(symbol) diff --git a/lunaix-os/includes/lunaix/failsafe.h b/lunaix-os/includes/lunaix/failsafe.h new file mode 100644 index 0000000..7c72e2f --- /dev/null +++ b/lunaix-os/includes/lunaix/failsafe.h @@ -0,0 +1,9 @@ +#ifndef __LUNAIX_FAILSAFE_H +#define __LUNAIX_FAILSAFE_H + +#include + +void +do_failsafe_unrecoverable(ptr_t frame_link, ptr_t stack_link); + +#endif /* __LUNAIX_FAILSAFE_H */ diff --git a/lunaix-os/includes/lunaix/mm/fault.h b/lunaix-os/includes/lunaix/mm/fault.h index 77aab84..3367841 100644 --- a/lunaix-os/includes/lunaix/mm/fault.h +++ b/lunaix-os/includes/lunaix/mm/fault.h @@ -2,8 +2,9 @@ #define __LUNAIX_FAULT_H #include -#include +#include #include +#include #define RESOLVE_OK ( 0b000001 ) #define NO_PREALLOC ( 0b000010 ) @@ -24,12 +25,15 @@ struct fault_context ptr_t fault_refva; // referneced va, for ptep fault, equals to fault_va otherwise pte_t resolving; // the pte that will resolve the fault - ptr_t prealloc_pa; // preallocated physical page in-case of empty fault-pte + struct leaflet* prealloc; // preallocated physical page in-case of empty fault-pte - bool kernel_vmfault:1; // faulting address that is kernel - bool ptep_fault:1; // faulting address is a ptep - bool remote_fault:1; // referenced faulting address is remote vms - bool kernel_access:1; // kernel mem access causing the fault + struct + { + bool kernel_vmfault:1; // faulting address that is kernel + bool ptep_fault:1; // faulting address is a ptep + bool remote_fault:1; // referenced faulting address is remote vms + bool kernel_access:1; // kernel mem access causing the fault + }; struct proc_mm* mm; // process memory space associated with fault, might be remote struct mm_region* vmr; @@ -41,9 +45,8 @@ bool __arch_prepare_fault_context(struct fault_context* context); static inline void -fault_resolved(struct fault_context* fault, pte_t resolved, int flags) +fault_resolved(struct fault_context* fault, int flags) { - fault->resolving = resolved; fault->resolve_type |= (flags | RESOLVE_OK); } #endif /* __LUNAIX_FAULT_H */ diff --git a/lunaix-os/includes/lunaix/mm/page.h b/lunaix-os/includes/lunaix/mm/page.h new file mode 100644 index 0000000..23d5c64 --- /dev/null +++ b/lunaix-os/includes/lunaix/mm/page.h @@ -0,0 +1,299 @@ +#ifndef __LUNAIX_PAGE_H +#define __LUNAIX_PAGE_H + +#include +#include +#include + +#include + +/** + * @brief A leaflet represent a bunch 4k ppage + * as single multi-ordered page, as such + * big page can seen as an unfolded version + * of these small 4k ppages hence the name. + * It is introduced to solve the issue that + * is discovered during refactoring - It is + * jolly unclear whether the ppage is a head, + * tail, or even worse, the middle one, when + * passing around between functions. + * This concept is surprisingly similar to + * Linux's struct folio (I swear to the + * Almighty Princess of the Sun, Celestia, + * that I don't quite understand what folio + * is until I've wrote the conceptually same + * thing) + * + */ +struct leaflet +{ + struct ppage lead_page; +}; + +static inline struct leaflet* +get_leaflet(struct ppage* page) +{ + return (struct leaflet*)leading_page(page); +} + +static inline struct ppage* +get_ppage(struct leaflet* leaflet) +{ + return (struct ppage*)leaflet; +} + +static inline struct leaflet* +alloc_leaflet(int order) +{ + return (struct leaflet*)pmm_alloc_napot_type(POOL_UNIFIED, order, 0); +} + +static inline struct leaflet* +alloc_leaflet_pinned(int order) +{ + return (struct leaflet*)pmm_alloc_napot_type(POOL_UNIFIED, order, PP_FGLOCKED); +} + +static inline void +leaflet_borrow(struct leaflet* leaflet) +{ + struct ppage* const page = get_ppage(leaflet); + assert(page->refs); + if (reserved_page(page)) { + return; + } + + page->refs++; +} + +static inline void +leaflet_return(struct leaflet* leaflet) +{ + struct ppage* const page = get_ppage(leaflet); + assert(page->refs); + pmm_free_one(page, 0); +} + +static inline unsigned int +leaflet_refcount(struct leaflet* leaflet) +{ + return get_ppage(leaflet)->refs; +} + +static inline int +leaflet_order(struct leaflet* leaflet) +{ + return ppage_order(get_ppage(leaflet)); +} + +static inline int +leaflet_size(struct leaflet* leaflet) +{ + return PAGE_SIZE << leaflet_order(leaflet); +} + +static inline int +leaflet_nfold(struct leaflet* leaflet) +{ + return 1 << leaflet_order(leaflet); +} + +static inline struct leaflet* +ppfn_leaflet(pfn_t ppfn) +{ + return get_leaflet(ppage(ppfn)); +} + +static inline struct leaflet* +pte_leaflet(pte_t pte) +{ + struct ppage* ppfn = ppage(pfn(pte_paddr(pte))); + return get_leaflet(ppfn); +} + +static inline struct leaflet* +pte_leaflet_aligned(pte_t pte) +{ + struct ppage* ppfn = ppage(pfn(pte_paddr(pte))); + struct leaflet* _l = get_leaflet(ppfn); + + assert((ptr_t)_l == (ptr_t)ppfn); + return _l; +} + +static inline pfn_t +leaflet_ppfn(struct leaflet* leaflet) +{ + return ppfn(get_ppage(leaflet)); +} + +static inline ptr_t +leaflet_addr(struct leaflet* leaflet) +{ + return page_addr(ppfn(get_ppage(leaflet))); +} + +static inline void +unpin_leaflet(struct leaflet* leaflet) +{ + change_page_type(get_ppage(leaflet), 0); +} + +static inline void +pin_leaflet(struct leaflet* leaflet) +{ + change_page_type(get_ppage(leaflet), PP_FGLOCKED); +} + +/** + * @brief Map a leaflet + * + * @param ptep + * @param leaflet + * @return pages folded into that leaflet + */ +static inline size_t +ptep_map_leaflet(pte_t* ptep, pte_t pte, struct leaflet* leaflet) +{ + // We do not support huge leaflet yet + assert(leaflet_order(leaflet) < LEVEL_SHIFT); + + pte = pte_setppfn(pte, leaflet_ppfn(leaflet)); + pte = pte_mkloaded(pte); + + int n = leaflet_nfold(leaflet); + vmm_set_ptes_contig(ptep, pte, LFT_SIZE, n); + + return n; +} + +/** + * @brief Unmap a leaflet + * + * @param ptep + * @param leaflet + * @return pages folded into that leaflet + */ +static inline size_t +ptep_unmap_leaflet(pte_t* ptep, struct leaflet* leaflet) +{ + // We do not support huge leaflet yet + assert(leaflet_order(leaflet) < LEVEL_SHIFT); + + int n = leaflet_nfold(leaflet); + vmm_unset_ptes(ptep, n); + + return n; +} + +static inline ptr_t +leaflet_mount(struct leaflet* leaflet) +{ + pte_t* ptep = mkptep_va(VMS_SELF, PG_MOUNT_VAR); + ptep_map_leaflet(ptep, mkpte_prot(KERNEL_DATA), leaflet); + + tlb_flush_kernel_ranged(PG_MOUNT_VAR, leaflet_nfold(leaflet)); + + return PG_MOUNT_VAR; +} + +static inline void +leaflet_unmount(struct leaflet* leaflet) +{ + pte_t* ptep = mkptep_va(VMS_SELF, PG_MOUNT_VAR); + vmm_unset_ptes(ptep, leaflet_nfold(leaflet)); + + tlb_flush_kernel_ranged(PG_MOUNT_VAR, leaflet_nfold(leaflet)); +} + +static inline void +leaflet_fill(struct leaflet* leaflet, unsigned int val) +{ + ptr_t mnt; + + mnt = leaflet_mount(leaflet); + memset((void*)mnt, val, leaflet_size(leaflet)); + leaflet_unmount(leaflet); +} + +static inline void +leaflet_wipe(struct leaflet* leaflet) +{ + leaflet_fill(leaflet, 0); +} + +/** + * @brief Duplicate the leaflet + * + * @return Duplication of given leaflet + * + */ +struct leaflet* +dup_leaflet(struct leaflet* leaflet); + + +/** + * @brief Maps a number of contiguous ptes in kernel + * address space + * + * @param pte the pte to be mapped + * @param lvl_size size of the page pointed by the given pte + * @param n number of ptes + * @return ptr_t + */ +ptr_t +vmap_ptes_at(pte_t pte, size_t lvl_size, int n); + +/** + * @brief Maps a number of contiguous ptes in kernel + * address space (leaf page size) + * + * @param pte the pte to be mapped + * @param n number of ptes + * @return ptr_t + */ +static inline ptr_t +vmap_leaf_ptes(pte_t pte, int n) +{ + return vmap_ptes_at(pte, LFT_SIZE, n); +} + +/** + * @brief Maps a contiguous range of physical address + * into kernel address space (leaf page size) + * + * @param paddr start of the physical address range + * @param size size of the physical range + * @param prot default protection to be applied + * @return ptr_t + */ +static inline ptr_t +vmap(struct leaflet* leaflet, pte_attr_t prot) +{ + pte_t _pte = mkpte(page_addr(leaflet_ppfn(leaflet)), prot); + return vmap_ptes_at(_pte, LFT_SIZE, leaflet_nfold(leaflet)); +} + +void +vunmap(ptr_t ptr, struct leaflet* leaflet); + +static inline ptr_t +vmap_range(pfn_t start, size_t npages, pte_attr_t prot) +{ + pte_t _pte = mkpte(page_addr(start), prot); + return vmap_ptes_at(_pte, LFT_SIZE, npages); +} + + +/** + * @brief Allocate a page in kernel space. + * + * @param ptep + * @param pte + * @param order + * @return pte_t + */ +pte_t +alloc_kpage_at(pte_t* ptep, pte_t pte, int order); + +#endif /* __LUNAIX_PAGE_H */ diff --git a/lunaix-os/includes/lunaix/mm/pagetable.h b/lunaix-os/includes/lunaix/mm/pagetable.h index 4c2fce5..f289b7a 100644 --- a/lunaix-os/includes/lunaix/mm/pagetable.h +++ b/lunaix-os/includes/lunaix/mm/pagetable.h @@ -134,8 +134,8 @@ typedef struct __pte pte_t; ((ptr_t)(ptep) & __p) == __p; \ }) -pte_t -vmm_alloc_page(pte_t* ptep, pte_t pte); +extern pte_t +alloc_kpage_at(pte_t* ptep, pte_t pte, int order); /** * @brief Try page walk to the pte pointed by ptep and @@ -155,7 +155,7 @@ __alloc_level(pte_t* ptep, pte_t pte, pte_attr_t prot) } pte = pte_setprot(pte, prot); - return !pte_isnull(vmm_alloc_page(ptep, pte)); + return !pte_isnull(alloc_kpage_at(ptep, pte, 0)); } /** @@ -432,15 +432,25 @@ page_addr(ptr_t pfn) { } static inline ptr_t -va_align(ptr_t va) { +page_aligned(ptr_t va) { return va & ~PAGE_MASK; } static inline ptr_t -va_alignup(ptr_t va) { +page_upaligned(ptr_t va) { return (va + PAGE_MASK) & ~PAGE_MASK; } +static inline ptr_t +napot_aligned(ptr_t va, size_t napot_sz) { + return va & ~(napot_sz - 1); +} + +static inline ptr_t +napot_upaligned(ptr_t va, size_t napot_sz) { + return (va + napot_sz - 1) & ~(napot_sz - 1); +} + static inline pte_t* mkptep_va(ptr_t vm_mnt, ptr_t vaddr) { @@ -504,4 +514,16 @@ pt_last_level(int level) return level == _PTW_LEVEL - 1; } +static inline ptr_t +va_mntpoint(ptr_t va) +{ + return _VM_OF(va); +} + +static inline ptr_t +va_actual(ptr_t va) +{ + return page_addr(_VM_OF(va) ^ va); +} + #endif /* __LUNAIX_PAGETABLE_H */ diff --git a/lunaix-os/includes/lunaix/mm/physical.h b/lunaix-os/includes/lunaix/mm/physical.h new file mode 100644 index 0000000..3eac5a1 --- /dev/null +++ b/lunaix-os/includes/lunaix/mm/physical.h @@ -0,0 +1,54 @@ +#ifndef __LUNAIX_PHYSICAL_H +#define __LUNAIX_PHYSICAL_H + +#include +#include + +/** + * @brief 长久页:不会被缓存,但允许释放 + * + */ +#define PP_FGPERSIST 0b0001 + +/** + * @brief 锁定页:不会被缓存,默认不可释放 + * + */ +#define PP_FGLOCKED 0b0011 + +/** + * @brief 预留页:不会被缓存,永远不可释放 + * + */ +#define PP_RESERVED 0b1000 + +struct ppage_arch; + +struct ppage +{ + unsigned int refs; + union { + struct { + union { + struct { + unsigned char flags:2; + unsigned char order:6; + }; + unsigned char top_flags; + }; + struct { + unsigned char pool:4; + unsigned char type:4; + }; + }; + unsigned short attr; + }; + unsigned short companion; + + struct llist_header sibs; + + struct ppage_arch arch; +} align(16); + + +#endif /* __LUNAIX_PHYSICAL_H */ diff --git a/lunaix-os/includes/lunaix/mm/pmm.h b/lunaix-os/includes/lunaix/mm/pmm.h index fa22f7b..60a5c68 100644 --- a/lunaix-os/includes/lunaix/mm/pmm.h +++ b/lunaix-os/includes/lunaix/mm/pmm.h @@ -2,87 +2,108 @@ #define __LUNAIX_PMM_H // Physical memory manager +#include +#include #include -#include -#include +#include -#define PM_PAGE_SIZE 4096 -#define PM_BMP_MAX_SIZE (1024 * 1024) +enum { + POOL_UNIFIED, + POOL_COUNT +}; -/** - * @brief 长久页:不会被缓存,但允许释放 - * - */ -#define PP_FGPERSIST 0b00001 +typedef unsigned int ppage_type_t; -/** - * @brief 锁定页:不会被缓存,不能被释放 - * - */ -#define PP_FGLOCKED 0b00011 +// Maximum non-huge page order. +#define MAX_PAGE_ORDERS ( LEVEL_SHIFT - 1 ) -typedef u32_t pp_attr_t; +#define RESERVE_MARKER 0xf0f0f0f0 -struct pp_struct +struct pmem_pool { - u32_t ref_counts; - pp_attr_t attr; + int type; + struct ppage* pool_start; + struct ppage* pool_end; + +#if defined(CONFIG_PMALLOC_NCONTIG) + + struct llist_header idle_page; + struct llist_header busy_page; + +#elif defined(CONFIG_PMALLOC_BUDDY) + + struct llist_header idle_order[MAX_PAGE_ORDERS]; + +#elif defined(CONFIG_PMALLOC_SIMPLE) + + struct llist_header idle_order[MAX_PAGE_ORDERS]; + int count[MAX_PAGE_ORDERS]; + +#endif }; -/** - * @brief 标注物理页为可使用 - * - * @param ppn page number - */ -void -pmm_mark_page_free(ptr_t ppn); +struct pmem +{ + struct pmem_pool pool[POOL_COUNT]; -/** - * @brief 标注物理页为已占用 - * - * @param ppn - */ -void -pmm_mark_page_occupied(ptr_t ppn, pp_attr_t attr); + pfn_t list_len; + struct ppage* pplist; + struct llist_header reserved; +}; -/** - * @brief 标注多个连续的物理页为可用 - * - * @param start_ppn 起始PPN - * @param page_count 数量 - */ -void -pmm_mark_chunk_free(ptr_t start_ppn, size_t page_count); +static inline struct ppage* +ppage(pfn_t pfn) +{ + return (struct ppage*)(PPLIST_STARTVM) + pfn; +} -/** - * @brief 标注多个连续的物理页为已占用 - * - * @param start_ppn 起始PPN - * @param page_count 数量 - */ -void -pmm_mark_chunk_occupied(u32_t start_ppn, - size_t page_count, - pp_attr_t attr); +static inline struct ppage* +leading_page(struct ppage* page) { + return page - page->companion; +} -/** - * @brief 分配一个可用的物理页 - * - * @return void* 可用的页地址,否则为 NULL - */ -ptr_t -pmm_alloc_page(pp_attr_t attr); +static inline struct ppage* +ppage_of(struct pmem_pool* pool, pfn_t pfn) +{ + return pool->pool_start + pfn; +} + +static inline pfn_t +ppfn(struct ppage* page) +{ + return (pfn_t)((ptr_t)page - PPLIST_STARTVM) / sizeof(struct ppage); +} + +static inline pfn_t +ppfn_of(struct pmem_pool* pool, struct ppage* page) +{ + return (pfn_t)((ptr_t)page - (ptr_t)pool->pool_start) / sizeof(struct ppage); +} + +static inline ptr_t +ppage_addr(struct ppage* page) { + return ppfn(page) * PAGE_SIZE; +} + +static inline unsigned int +count_order(size_t page_count) { + unsigned int po = ILOG2(page_count); + assert(!(page_count % (1 << po))); + return po; +} + +static inline unsigned int +ppage_order(struct ppage* page) { + return page->order; +} + + +static inline bool +reserved_page(struct ppage* page) +{ + return page->refs == RESERVE_MARKER && page->type == PP_RESERVED; +} -/** - * @brief 分配一个连续的物理内存区域 - * - * @param owner - * @param num_pages 区域大小,单位为页 - * @param attr - * @return ptr_t - */ -ptr_t -pmm_alloc_cpage(size_t num_pages, pp_attr_t attr); /** * @brief 初始化物理内存管理器 @@ -90,48 +111,60 @@ pmm_alloc_cpage(size_t num_pages, pp_attr_t attr); * @param mem_upper_lim 最大可用内存地址 */ void -pmm_init(ptr_t mem_upper_lim); +pmm_init(struct boot_handoff* bctx); -struct pp_struct* -pmm_query(ptr_t pa); +ptr_t +pmm_arch_init_remap(struct pmem* memory, struct boot_handoff* bctx); -/** - * @brief Free physical page with given attributes - * - * @param page - * @return int - */ -int -pmm_free_one(ptr_t page, pp_attr_t attr_mask); +struct pmem_pool* +pmm_pool_get(int pool_index); -/** - * @brief Free a normal physical page - * - * @param page 页地址 - * @return 是否成功 - */ -static inline int -pmm_free_page(ptr_t page) +void +pmm_arch_init_pool(struct pmem* memory); + +bool +pmm_onhold_range(pfn_t start, size_t npages); + +bool +pmm_unhold_range(pfn_t start, size_t npages); + + +struct pmem_pool* +pmm_declare_pool(int pool, pfn_t start, pfn_t size); + +// ---- allocator specific ---- + +void +pmm_free_one(struct ppage* page, int type_mask); + +struct ppage* +pmm_alloc_napot_type(int pool, size_t order, ppage_type_t type); + +// ---- + +static inline struct ppage* +pmm_alloc_normal(size_t order) { - return pmm_free_one(page, 0); + return pmm_alloc_napot_type(POOL_UNIFIED, order, 0); } -/** - * @brief Free physical page regardless of it's attribute - * - * @param page - * @return int - */ -static inline int -pmm_free_any(ptr_t page) +static inline struct ppage* +pmm_alloc_locked(size_t order) +{ + return pmm_alloc_napot_type(POOL_UNIFIED, order, PP_FGLOCKED); +} + +static inline void +change_page_type(struct ppage* page, ppage_type_t type) { - return pmm_free_one(page, -1); + page->type = type; } -int -pmm_ref_page(ptr_t page); +static inline struct pmem_pool* +pmm_pool_lookup(struct ppage* page) +{ + return pmm_pool_get(page->pool); +} -void -pmm_set_attr(ptr_t page, pp_attr_t attr); #endif /* __LUNAIX_PMM_H */ diff --git a/lunaix-os/includes/lunaix/mm/vmm.h b/lunaix-os/includes/lunaix/mm/vmm.h index a3da8c0..92f7684 100644 --- a/lunaix-os/includes/lunaix/mm/vmm.h +++ b/lunaix-os/includes/lunaix/mm/vmm.h @@ -2,35 +2,9 @@ #define __LUNAIX_VMM_H #include +#include #include #include -// Virtual memory manager - -#define VMAP_NULL 0 - -/** - * @brief 映射模式:忽略已存在映射 - * - */ -#define VMAP_IGNORE 1 - -/** - * @brief 映射模式:不作实际映射。该功能用于预留出特定的地址空间 - * - */ -#define VMAP_NOMAP 2 - -/** - * @brief 映射页墙:将虚拟地址映射为页墙,忽略给定的物理地址和页属性 - * - */ -#define VMAP_GUARDPAGE 4 - -/** - * @brief 规定下一个可用页映射应当限定在指定的4MB地址空间内 - * - */ -#define VALLOC_PDE 1 /** * @brief 初始化虚拟内存管理器 @@ -39,18 +13,6 @@ void vmm_init(); -/** - * @brief 在指定地址空间中,添加一个映射 - * - * @param mnt 地址空间挂载点 - * @param va 虚拟地址 - * @param pa 物理地址 - * @param attr 映射属性 - * @return int - */ -int -vmm_set_mapping(ptr_t mnt, ptr_t va, ptr_t pa, pte_attr_t prot); - static inline void vmm_set_ptes_contig(pte_t* ptep, pte_t pte, size_t lvl_size, size_t n) { @@ -80,18 +42,6 @@ vmm_unset_ptes(pte_t* ptep, size_t n) } while (--n > 0); } - -/** - * @brief 删除一个映射 - * - * @param mnt - * @param pid - * @param va - * @return int - */ -ptr_t -vmm_del_mapping(ptr_t mnt, ptr_t va); - pte_t vmm_tryptep(pte_t* ptep, size_t lvl_size); @@ -112,16 +62,6 @@ vmm_lookupat(ptr_t mnt, ptr_t va, pte_t* pte_out) return !pte_isnull(pte); } - -/** - * @brief (COW) 为虚拟页创建副本。 - * - * @return void* 包含虚拟页副本的物理页地址。 - * - */ -ptr_t -vmm_dup_page(ptr_t pa); - /** * @brief 挂载另一个虚拟地址空间至当前虚拟地址空间 * @@ -143,7 +83,8 @@ mount_page(ptr_t mnt, ptr_t pa) { assert(pa); pte_t* ptep = mkptep_va(VMS_SELF, mnt); set_pte(ptep, mkpte(pa, KERNEL_DATA)); - cpu_flush_page(mnt); + + tlb_flush_kernel(mnt); return mnt; } @@ -151,25 +92,11 @@ static inline ptr_t unmount_page(ptr_t mnt) { pte_t* ptep = mkptep_va(VMS_SELF, mnt); set_pte(ptep, null_pte); + + tlb_flush_kernel(mnt); return mnt; } -void* -vmm_ioremap(ptr_t paddr, size_t size); - -void* -vmm_next_free(ptr_t start, int options); - -/** - * @brief 将指定地址空间的虚拟地址转译为物理地址 - * - * @param mnt 地址空间锚定点 - * @param va 虚拟地址 - * @return void* - */ -ptr_t -vmm_v2pat(ptr_t mnt, ptr_t va); - /** * @brief 将当前地址空间的虚拟地址转译为物理地址。 * @@ -179,49 +106,11 @@ vmm_v2pat(ptr_t mnt, ptr_t va); static inline ptr_t vmm_v2p(ptr_t va) { - return vmm_v2pat(VMS_SELF, va); + pte_t* ptep = mkptep_va(VMS_SELF, va); + return pte_paddr(pte_at(ptep)) + va_offset(va); } -/** - * @brief Maps a number of contiguous ptes in kernel - * address space - * - * @param pte the pte to be mapped - * @param lvl_size size of the page pointed by the given pte - * @param n number of ptes - * @return ptr_t - */ -ptr_t -vmap_ptes_at(pte_t pte, size_t lvl_size, int n); - -/** - * @brief Maps a number of contiguous ptes in kernel - * address space (leaf page size) - * - * @param pte the pte to be mapped - * @param n number of ptes - * @return ptr_t - */ -static inline ptr_t -vmap_leaf_ptes(pte_t pte, int n) -{ - return vmap_ptes_at(pte, LFT_SIZE, n); -} - -/** - * @brief Maps a contiguous range of physical address - * into kernel address space (leaf page size) - * - * @param paddr start of the physical address range - * @param size size of the physical range - * @param prot default protection to be applied - * @return ptr_t - */ -static inline ptr_t -vmap(ptr_t paddr, size_t size, pte_attr_t prot) -{ - pte_t _pte = mkpte(paddr, prot); - return vmap_ptes_at(_pte, LFT_SIZE, leaf_count(size)); -} +void +vmap_set_start(ptr_t start_addr); #endif /* __LUNAIX_VMM_H */ diff --git a/lunaix-os/includes/lunaix/mm/vmtlb.h b/lunaix-os/includes/lunaix/mm/vmtlb.h new file mode 100644 index 0000000..d4baf85 --- /dev/null +++ b/lunaix-os/includes/lunaix/mm/vmtlb.h @@ -0,0 +1,8 @@ +#ifndef __LUNAIX_TLB_H +#define __LUNAIX_TLB_H + +#include + +// TODO + +#endif /* __LUNAIX_TLB_H */ diff --git a/lunaix-os/includes/lunaix/process.h b/lunaix-os/includes/lunaix/process.h index f4a526f..ba73de0 100644 --- a/lunaix-os/includes/lunaix/process.h +++ b/lunaix-os/includes/lunaix/process.h @@ -182,6 +182,13 @@ vmregions(struct proc_info* proc) return proc ? &proc->mm->regions : NULL; } + +static inline unsigned int +procvm_asid(struct proc_mm* mm) +{ + return mm->proc->pid; +} + static inline void block_current_thread() { diff --git a/lunaix-os/kernel.mk b/lunaix-os/kernel.mk index fdc3203..9d6ca02 100644 --- a/lunaix-os/kernel.mk +++ b/lunaix-os/kernel.mk @@ -29,6 +29,7 @@ kinc_opts := $(addprefix -I,$(kinc_dirs)) CFLAGS += -include flags.h +CFLAGS += -include config.h %.S.o: %.S $(call status_,AS,$<) diff --git a/lunaix-os/kernel/boot_helper.c b/lunaix-os/kernel/boot_helper.c index f7ad158..7f324e4 100644 --- a/lunaix-os/kernel/boot_helper.c +++ b/lunaix-os/kernel/boot_helper.c @@ -6,6 +6,8 @@ #include #include +extern unsigned char __kexec_end[], __kexec_start[]; + /** * @brief Reserve memory for kernel bootstrapping initialization * @@ -23,24 +25,27 @@ boot_begin(struct boot_handoff* bhctx) vmm_set_ptes_contig(ptep, pte_mkhuge(pte), L0T_SIZE, count); - struct boot_mmapent *mmap = bhctx->mem.mmap, *mmapent; - for (size_t i = 0; i < bhctx->mem.mmap_len; i++) { - mmapent = &mmap[i]; - size_t size_pg = leaf_count(mmapent->size); - pfn_t start_pfn = pfn(mmapent->start); + // 将内核占据的页,包括前1MB,hhk_init 设为已占用 + size_t pg_count = leaf_count(to_kphysical(__kexec_end)); + pmm_onhold_range(0, pg_count); + + size_t i; + struct boot_mmapent* ent; + for (i = 0; i < bhctx->mem.mmap_len; i++) { + ent = &bhctx->mem.mmap[i]; - if (mmapent->type == BOOT_MMAP_FREE) { - pmm_mark_chunk_free(start_pfn, size_pg); - continue; + if (reserved_memregion(ent) || reclaimable_memregion(ent)) { + unsigned int counts = leaf_count(ent->size); + pmm_onhold_range(pfn(ent->start), counts); } } /* Reserve region for all loaded modules */ - for (size_t i = 0; i < bhctx->mods.mods_num; i++) { + for (i = 0; i < bhctx->mods.mods_num; i++) { struct boot_modent* mod = &bhctx->mods.entries[i]; unsigned int counts = leaf_count(mod->end - mod->start); - pmm_mark_chunk_occupied(pfn(mod->start), counts, PP_FGLOCKED); + pmm_onhold_range(pfn(mod->start), counts); } } @@ -54,17 +59,13 @@ extern u8_t __kboot_end; /* link/linker.ld */ void boot_end(struct boot_handoff* bhctx) { - struct boot_mmapent *mmap = bhctx->mem.mmap, *mmapent; + struct boot_mmapent* ent; for (size_t i = 0; i < bhctx->mem.mmap_len; i++) { - mmapent = &mmap[i]; - size_t size_pg = leaf_count(mmapent->size); - - if (mmapent->type == BOOT_MMAP_RCLM) { - pmm_mark_chunk_free(pfn(mmapent->start), size_pg); - } + ent = &bhctx->mem.mmap[i]; - if (mmapent->type == BOOT_MMAP_FREE) { - continue; + if (reclaimable_memregion(ent)) { + unsigned int counts = leaf_count(ent->size); + pmm_unhold_range(pfn(ent->start), counts); } } diff --git a/lunaix-os/kernel/debug/failsafe.c b/lunaix-os/kernel/debug/failsafe.c new file mode 100644 index 0000000..5fdbc5d --- /dev/null +++ b/lunaix-os/kernel/debug/failsafe.c @@ -0,0 +1,24 @@ +#include +#include +#include +#include +#include + +LOG_MODULE("NMM") + +void +do_failsafe_unrecoverable(ptr_t frame_link, ptr_t stack_link) +{ + ERROR("diagnositic mode"); + + ERROR("check: init stack: %s", + check_bootstack_sanity() ? "ok" : "smashing"); + + // TODO ...check other invariants + + ERROR("non recoverable: Nightmare Moon arrival."); + + trace_printstack(); + + spin(); +} \ No newline at end of file diff --git a/lunaix-os/kernel/debug/trace.c b/lunaix-os/kernel/debug/trace.c index 2e30b09..173b1cd 100644 --- a/lunaix-os/kernel/debug/trace.c +++ b/lunaix-os/kernel/debug/trace.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -6,6 +6,7 @@ #include #include +#include #include @@ -79,7 +80,9 @@ ksym_getstr(struct ksym_entry* sym) static inline bool valid_fp(ptr_t ptr) { ptr_t start = ROUNDUP(current_thread->kstack - KSTACK_SIZE, MEM_PAGE); - return start < ptr && ptr < current_thread->kstack; + + return (start < ptr && ptr < current_thread->kstack) + || arch_valid_fp(ptr); } int @@ -205,4 +208,6 @@ trace_printstack_isr(const isr_param* isrm) p = p->execp->saved_prev_ctx; } + + DEBUG("----- [trace end] -----\n"); } \ No newline at end of file diff --git a/lunaix-os/kernel/exe/elf32/ldelf32.c b/lunaix-os/kernel/exe/elf32/ldelf32.c index 64e0eba..69ae939 100644 --- a/lunaix-os/kernel/exe/elf32/ldelf32.c +++ b/lunaix-os/kernel/exe/elf32/ldelf32.c @@ -32,17 +32,17 @@ elf32_smap(struct load_context* ldctx, struct mmap_param param = { .vms_mnt = container->vms_mnt, .pvms = vmspace(container->proc), .proct = proct, - .offset = va_align(phdre->p_offset), - .mlen = va_alignup(phdre->p_memsz), + .offset = page_aligned(phdre->p_offset), + .mlen = page_upaligned(phdre->p_memsz), .flags = MAP_FIXED | MAP_PRIVATE, .type = REGION_TYPE_CODE }; struct mm_region* seg_reg; - int status = mmap_user(NULL, &seg_reg, va_align(va), elfile, ¶m); + int status = mmap_user(NULL, &seg_reg, page_aligned(va), elfile, ¶m); if (!status) { size_t next_addr = phdre->p_memsz + va; - ldctx->end = MAX(ldctx->end, va_alignup(next_addr)); + ldctx->end = MAX(ldctx->end, page_upaligned(next_addr)); ldctx->mem_sz += phdre->p_memsz; } else { // we probably fucked up our process diff --git a/lunaix-os/kernel/exe/exec.c b/lunaix-os/kernel/exe/exec.c index 6f2962e..6105c1c 100644 --- a/lunaix-os/kernel/exe/exec.c +++ b/lunaix-os/kernel/exe/exec.c @@ -120,7 +120,7 @@ exec_load(struct exec_container* container, struct v_file* executable) if (!argv_extra[1]) { // If loading a statically linked file, then heap remapping we can do, // otherwise delayed. - create_heap(vmspace(proc), va_align(container->exe.end)); + create_heap(vmspace(proc), page_aligned(container->exe.end)); } if (container->vms_mnt == VMS_SELF) { @@ -137,6 +137,9 @@ exec_load(struct exec_container* container, struct v_file* executable) memcpy((void*)ustack, (const void*)envp, envp_len); ustack = copy_to_ustack(ustack, (ptr_t*)ustack); + } else { + ustack -= sizeof(ptr_t); + *((ptr_t*)ustack) = 0; } if (argv) { @@ -144,6 +147,9 @@ exec_load(struct exec_container* container, struct v_file* executable) ustack -= argv_len; memcpy((void*)ustack, (const void**)argv, argv_len); + } else { + ustack -= sizeof(ptr_t); + *((ptr_t*)ustack) = 0; } for (size_t i = 0; i < 2 && argv_extra[i]; i++) { diff --git a/lunaix-os/kernel/fs/pcache.c b/lunaix-os/kernel/fs/pcache.c index 3e77cf5..26e7ac4 100644 --- a/lunaix-os/kernel/fs/pcache.c +++ b/lunaix-os/kernel/fs/pcache.c @@ -1,9 +1,8 @@ #include #include #include -#include +#include #include -#include #include #define PCACHE_DIRTY 0x1 @@ -21,22 +20,24 @@ __pcache_try_evict(struct lru_node* obj) static void pcache_free_page(void* va) { - ptr_t pa = vmm_del_mapping(VMS_SELF, (ptr_t)va); - pmm_free_page(pa); + pte_t* ptep = mkptep_va(VMS_SELF, (ptr_t)va); + pte_t pte = pte_at(ptep); + leaflet_return(pte_leaflet(pte)); } static void* pcache_alloc_page() { int i = 0; - ptr_t pp = pmm_alloc_page(0), va = 0; + ptr_t va = 0; + struct leaflet* leaflet = alloc_leaflet(0); - if (!pp) { + if (!leaflet) { return NULL; } - if (!(va = (ptr_t)vmap(pp, PAGE_SIZE, KERNEL_DATA))) { - pmm_free_page(pp); + if (!(va = (ptr_t)vmap(leaflet, KERNEL_DATA))) { + leaflet_return(leaflet); return NULL; } diff --git a/lunaix-os/kernel/fs/ramfs/ramfs.c b/lunaix-os/kernel/fs/ramfs/ramfs.c index 1a6826e..ca40efb 100644 --- a/lunaix-os/kernel/fs/ramfs/ramfs.c +++ b/lunaix-os/kernel/fs/ramfs/ramfs.c @@ -153,7 +153,7 @@ ramfs_mksymlink(struct v_inode* this, const char* target) assert(!(rinode->flags & RAMF_SYMLINK)); - size_t len = strlen(target); + size_t len = strlen(target) + 1; char* symlink = valloc(len); if (!symlink) { diff --git a/lunaix-os/kernel/kinit.c b/lunaix-os/kernel/kinit.c index b19dc30..528a738 100644 --- a/lunaix-os/kernel/kinit.c +++ b/lunaix-os/kernel/kinit.c @@ -36,15 +36,22 @@ kmem_init(struct boot_handoff* bhctx); void kernel_bootstrap(struct boot_handoff* bhctx) { - pmm_init(bhctx->mem.size); vmm_init(); + pmm_init(bhctx); + // now we can start reserving physical space + /* Begin kernel bootstrapping sequence */ boot_begin(bhctx); + tty_init(ioremap(0xB8000, PAGE_SIZE)); + /* Setup kernel memory layout and services */ kmem_init(bhctx); + // FIXME this goes to hal/gfxa + tty_set_theme(VGA_COLOR_WHITE, VGA_COLOR_BLACK); + boot_parse_cmdline(bhctx); /* Prepare stack trace environment */ @@ -54,10 +61,6 @@ kernel_bootstrap(struct boot_handoff* bhctx) invoke_init_function(on_earlyboot); - // FIXME this goes to hal/gfxa - tty_init(ioremap(0xB8000, PAGE_SIZE)); - tty_set_theme(VGA_COLOR_WHITE, VGA_COLOR_BLACK); - device_sysconf_load(); /* Get intc online, this is the cornerstone when initing devices */ @@ -122,11 +125,6 @@ spawn_lunad() void kmem_init(struct boot_handoff* bhctx) { - extern u8_t __kexec_end; - // 将内核占据的页,包括前1MB,hhk_init 设为已占用 - size_t pg_count = leaf_count((ptr_t)&__kexec_end - KERNEL_RESIDENT); - pmm_mark_chunk_occupied(0, pg_count, PP_FGLOCKED); - pte_t* ptep = mkptep_va(VMS_SELF, KERNEL_RESIDENT); ptep = mkl0tep(ptep); diff --git a/lunaix-os/kernel/lunad.c b/lunaix-os/kernel/lunad.c index 34368b6..1f2fb95 100644 --- a/lunaix-os/kernel/lunad.c +++ b/lunaix-os/kernel/lunad.c @@ -57,7 +57,7 @@ static void lunad_do_usr() { // No, these are not preemptive cpu_disable_interrupt(); - + if (!mount_bootmedium() || !exec_initd()) { fail("failed to initd"); } diff --git a/lunaix-os/kernel/mm/cake.c b/lunaix-os/kernel/mm/cake.c index 9707d52..d9ef381 100644 --- a/lunaix-os/kernel/mm/cake.c +++ b/lunaix-os/kernel/mm/cake.c @@ -12,8 +12,7 @@ #include #include -#include -#include +#include #include #include @@ -28,11 +27,12 @@ struct llist_header piles = { .next = &piles, .prev = &piles }; void* __alloc_cake(unsigned int cake_pg) { - ptr_t pa = (ptr_t)pmm_alloc_cpage(cake_pg, 0); - if (!pa) { + struct leaflet* leaflet = alloc_leaflet(count_order(cake_pg)); + if (!leaflet) { return NULL; } - return (void*)vmap(pa, cake_pg * PAGE_SIZE, KERNEL_DATA); + + return (void*)vmap(leaflet, KERNEL_DATA); } struct cake_s* @@ -117,6 +117,9 @@ cake_new_pile(char* name, { struct cake_pile* pile = (struct cake_pile*)cake_grab(&master_pile); + // must aligned to napot order! + assert(is_pot(pg_per_cake)); + __init_pile(pile, name, piece_size, pg_per_cake, options); return pile; diff --git a/lunaix-os/kernel/mm/fault.c b/lunaix-os/kernel/mm/fault.c index 6ba63a7..f55e489 100644 --- a/lunaix-os/kernel/mm/fault.c +++ b/lunaix-os/kernel/mm/fault.c @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -105,17 +106,28 @@ __prepare_fault_context(struct fault_context* fault) fault->resolving = pte_setprot(fault_pte, KERNEL_DATA); } + fault->resolving = pte_mkloaded(fault->resolving); fault->kernel_vmfault = kernel_vmfault; fault->kernel_access = kernel_context(fault->ictx); return true; } +static inline void +__flush_staled_tlb(struct fault_context* fault, struct leaflet* leaflet) +{ + tlb_flush_mm_range(fault->mm, fault->fault_va, leaflet_nfold(leaflet)); +} + static void __handle_conflict_pte(struct fault_context* fault) { - pte_t pte = fault->fault_pte; - ptr_t fault_pa = pte_paddr(pte); + pte_t pte; + struct leaflet *fault_leaflet, *duped_leaflet; + + pte = fault->fault_pte; + fault_leaflet = pte_leaflet(pte); + if (!pte_allow_user(pte)) { return; } @@ -124,14 +136,18 @@ __handle_conflict_pte(struct fault_context* fault) if (writable_region(fault->vmr)) { // normal page fault, do COW - // TODO makes `vmm_dup_page` arch-independent - ptr_t pa = (ptr_t)vmm_dup_page(fault_pa); + duped_leaflet = dup_leaflet(fault_leaflet); + + pte = pte_mkwritable(pte); + pte = pte_mkuntouch(pte); + pte = pte_mkclean(pte); - pmm_free_page(fault_pa); - pte_t new_pte = pte_setpaddr(pte, pa); - new_pte = pte_mkwritable(new_pte); + ptep_map_leaflet(fault->fault_ptep, pte, duped_leaflet); + __flush_staled_tlb(fault, duped_leaflet); - fault_resolved(fault, new_pte, NO_PREALLOC); + leaflet_return(fault_leaflet); + + fault_resolved(fault, NO_PREALLOC); } return; @@ -145,7 +161,13 @@ __handle_anon_region(struct fault_context* fault) pte_attr_t prot = region_pteprot(fault->vmr); pte = pte_setprot(pte, prot); - fault_resolved(fault, pte, 0); + // TODO Potentially we can get different order of leaflet here + struct leaflet* region_part = alloc_leaflet(0); + + ptep_map_leaflet(fault->fault_ptep, pte, region_part); + __flush_staled_tlb(fault, region_part); + + fault_resolved(fault, NO_PREALLOC); } @@ -156,21 +178,30 @@ __handle_named_region(struct fault_context* fault) struct v_file* file = vmr->mfile; pte_t pte = fault->resolving; - ptr_t fault_va = va_align(fault->fault_va); + ptr_t fault_va = page_aligned(fault->fault_va); u32_t mseg_off = (fault_va - vmr->start); u32_t mfile_off = mseg_off + vmr->foff; + // TODO Potentially we can get different order of leaflet here + struct leaflet* region_part = alloc_leaflet(0); + + pte = pte_setprot(pte, region_pteprot(vmr)); + ptep_map_leaflet(fault->fault_ptep, pte, region_part); + int errno = file->ops->read_page(file->inode, (void*)fault_va, mfile_off); if (errno < 0) { ERROR("fail to populate page (%d)", errno); + + ptep_unmap_leaflet(fault->fault_ptep, region_part); + leaflet_return(region_part); + return; } - pte_attr_t prot = region_pteprot(vmr); - pte = pte_setprot(pte, prot); + __flush_staled_tlb(fault, region_part); - fault_resolved(fault, pte, 0); + fault_resolved(fault, NO_PREALLOC); } static void @@ -181,8 +212,17 @@ __handle_kernel_page(struct fault_context* fault) return; } - fault_resolved(fault, fault->resolving, 0); - pmm_set_attr(fault->prealloc_pa, PP_FGPERSIST); + struct leaflet* leaflet = fault->prealloc; + + pin_leaflet(leaflet); + leaflet_wipe(leaflet); + + pte_t pte = fault->resolving; + ptep_map_leaflet(fault->fault_ptep, pte, leaflet); + + tlb_flush_kernel_ranged(fault->fault_va, leaflet_nfold(leaflet)); + + fault_resolved(fault, 0); } @@ -195,24 +235,20 @@ fault_prealloc_page(struct fault_context* fault) pte_t pte; - pte = vmm_alloc_page(fault->fault_ptep, fault->resolving); - if (pte_isnull(pte)) { + struct leaflet* leaflet = alloc_leaflet(0); + if (!leaflet) { return; } - fault->resolving = pte; - fault->prealloc_pa = pte_paddr(fault->resolving); - - pmm_set_attr(fault->prealloc_pa, 0); - cpu_flush_page(fault->fault_va); + fault->prealloc = leaflet; } static void noret __fail_to_resolve(struct fault_context* fault) { - if (fault->prealloc_pa) { - pmm_free_page(fault->prealloc_pa); + if (fault->prealloc) { + leaflet_return(fault->prealloc); } ERROR("(pid: %d) Segmentation fault on %p (%p,e=0x%x)", @@ -221,15 +257,16 @@ __fail_to_resolve(struct fault_context* fault) fault->fault_instn, fault->fault_data); - trace_printstack_isr(fault->ictx); if (fault->kernel_access) { // if a page fault from kernel is not resolvable, then // something must be went south FATAL("unresolvable page fault"); - unreachable; + failsafe_diagnostic(); } + trace_printstack_isr(fault->ictx); + thread_setsignal(current_thread, _SIGSEGV); schedule(); @@ -300,13 +337,8 @@ intr_routine_page_fault(const isr_param* param) } if ((fault.resolve_type & NO_PREALLOC)) { - if (fault.prealloc_pa) { - pmm_free_page(fault.prealloc_pa); + if (fault.prealloc) { + leaflet_return(fault.prealloc); } } - - set_pte(fault.fault_ptep, fault.resolving); - - cpu_flush_page(fault.fault_va); - cpu_flush_page((ptr_t)fault.fault_ptep); } \ No newline at end of file diff --git a/lunaix-os/kernel/mm/mmap.c b/lunaix-os/kernel/mm/mmap.c index 3f273c0..10d4f2c 100644 --- a/lunaix-os/kernel/mm/mmap.c +++ b/lunaix-os/kernel/mm/mmap.c @@ -1,7 +1,6 @@ #include -#include +#include #include -#include #include #include #include @@ -71,6 +70,28 @@ mmap_user(void** addr_out, return mem_map(addr_out, created, addr, file, param); } +static void +__remove_ranged_mappings(pte_t* ptep, size_t npages) +{ + struct leaflet* leaflet; + pte_t pte; + for (size_t i = 0, n = 0; i < npages; i++, ptep++) { + pte = pte_at(ptep); + + set_pte(ptep, null_pte); + if (!pte_isloaded(pte)) { + continue; + } + + leaflet = pte_leaflet_aligned(pte); + leaflet_return(leaflet); + + n = ptep_unmap_leaflet(ptep, leaflet) - 1; + i += n; + ptep += n; + } +} + static ptr_t __mem_find_slot_backward(struct mm_region* lead, struct mmap_param* param, struct mm_region* anchor) { @@ -168,7 +189,7 @@ mem_map(void** addr_out, { assert_msg(addr, "addr can not be NULL"); - ptr_t last_end = USR_EXEC, found_loc = va_align(addr); + ptr_t last_end = USR_EXEC, found_loc = page_aligned(addr); struct mm_region *pos, *n; vm_regions_t* vm_regions = ¶m->pvms->regions; @@ -253,7 +274,7 @@ mem_sync_pages(ptr_t mnt, } pte_t* ptep = mkptep_va(mnt, start); - ptr_t va = va_align(start); + ptr_t va = page_aligned(start); for (; va < start + length; va += PAGE_SIZE, ptep++) { pte_t pte = vmm_tryptep(ptep, LFT_SIZE); @@ -268,7 +289,8 @@ mem_sync_pages(ptr_t mnt, region->mfile->ops->write_page(inode, (void*)va, offset); set_pte(ptep, pte_mkclean(pte)); - cpu_flush_page(va); + tlb_flush_vmr(region, va); + } else if ((options & MS_INVALIDATE)) { goto invalidate; } @@ -279,10 +301,12 @@ mem_sync_pages(ptr_t mnt, continue; + // FIXME what if mem_sync range does not aligned with + // a leaflet with order > 1 invalidate: set_pte(ptep, null_pte); - pmm_free_page(pte_paddr(pte)); - cpu_flush_page(va); + leaflet_return(pte_leaflet(pte)); + tlb_flush_vmr(region, va); } } @@ -325,15 +349,9 @@ mem_unmap_region(ptr_t mnt, struct mm_region* region) mem_sync_pages(mnt, region, region->start, pglen * PAGE_SIZE, 0); pte_t* ptep = mkptep_va(mnt, region->start); - for (size_t i = 0; i < pglen; i++, ptep++) { - pte_t pte = pte_at(ptep); - ptr_t pa = pte_paddr(pte); + __remove_ranged_mappings(ptep, pglen); - set_pte(ptep, null_pte); - if (pte_isloaded(pte)) { - pmm_free_page(pte_paddr(pte)); - } - } + tlb_flush_vmr_all(region); llist_delete(®ion->head); region_release(region); @@ -404,12 +422,11 @@ __unmap_overlapped_cases(ptr_t mnt, } mem_sync_pages(mnt, vmr, vmr->start, umps_len, 0); - for (size_t i = 0; i < umps_len; i += PAGE_SIZE) { - ptr_t pa = vmm_del_mapping(mnt, vmr->start + i); - if (pa) { - pmm_free_page(pa); - } - } + + pte_t *ptep = mkptep_va(mnt, vmr->start); + __remove_ranged_mappings(ptep, leaf_count(umps_len)); + + tlb_flush_vmr_range(vmr, vmr->start, umps_len); vmr->start += displ; vmr->end -= shrink; @@ -431,7 +448,7 @@ int mem_unmap(ptr_t mnt, vm_regions_t* regions, ptr_t addr, size_t length) { length = ROUNDUP(length, PAGE_SIZE); - ptr_t cur_addr = va_align(addr); + ptr_t cur_addr = page_aligned(addr); struct mm_region *pos, *n; llist_for_each(pos, n, regions, head) diff --git a/lunaix-os/kernel/mm/mmio.c b/lunaix-os/kernel/mm/mmio.c index 1e262cc..b91ab0e 100644 --- a/lunaix-os/kernel/mm/mmio.c +++ b/lunaix-os/kernel/mm/mmio.c @@ -1,6 +1,5 @@ #include -#include -#include +#include #include void* @@ -8,24 +7,28 @@ ioremap(ptr_t paddr, u32_t size) { // FIXME implement a page policy interface allow to decouple the // arch-dependent caching behaviour - void* ptr = (void*)vmap(paddr, size, KERNEL_DATA); - if (ptr) { - pmm_mark_chunk_occupied(pfn(paddr), leaf_count(size), PP_FGLOCKED); - } + pfn_t start = pfn(paddr); + size_t npages = leaf_count(size); + + // Ensure the range is reservable (not already in use) + assert(pmm_onhold_range(start, npages)); - return ptr; + return (void*)vmap_range(start, npages, KERNEL_DATA); } void iounmap(ptr_t vaddr, u32_t size) { - pte_t* ptep = mkptep_va(VMS_SELF, vaddr); - for (size_t i = 0; i < size; i += PAGE_SIZE, ptep++) { - pte_t pte = pte_at(ptep); + // FIXME + fail("need fix"); - set_pte(ptep, null_pte); - if (pte_isloaded(pte)) - pmm_free_page(pte_paddr(pte)); - } + // pte_t* ptep = mkptep_va(VMS_SELF, vaddr); + // for (size_t i = 0; i < size; i += PAGE_SIZE, ptep++) { + // pte_t pte = pte_at(ptep); + + // set_pte(ptep, null_pte); + // if (pte_isloaded(pte)) + // return_page(ppage_pa(pte_paddr(pte))); + // } } \ No newline at end of file diff --git a/lunaix-os/kernel/mm/page.c b/lunaix-os/kernel/mm/page.c new file mode 100644 index 0000000..7bccbb3 --- /dev/null +++ b/lunaix-os/kernel/mm/page.c @@ -0,0 +1,23 @@ +#include + +pte_t +alloc_kpage_at(pte_t* ptep, pte_t pte, int order) +{ + ptr_t va = ptep_va(ptep, LFT_SIZE); + + assert(kernel_addr(va)); + + struct leaflet* leaflet = alloc_leaflet_pinned(order); + + if (!leaflet) { + return null_pte; + } + + leaflet_wipe(leaflet); + + ptep_map_leaflet(ptep, pte, leaflet); + + tlb_flush_kernel_ranged(va, leaflet_nfold(leaflet)); + + return pte_at(ptep); +} \ No newline at end of file diff --git a/lunaix-os/kernel/mm/pmalloc_buddy.c b/lunaix-os/kernel/mm/pmalloc_buddy.c new file mode 100644 index 0000000..06ffc55 --- /dev/null +++ b/lunaix-os/kernel/mm/pmalloc_buddy.c @@ -0,0 +1,9 @@ +#include "pmm_internal.h" + +// Classic buddy allocator + +#ifdef CONFIG_PMALLOC_BUDDY + +// TODO + +#endif \ No newline at end of file diff --git a/lunaix-os/kernel/mm/pmalloc_ncontig.c b/lunaix-os/kernel/mm/pmalloc_ncontig.c new file mode 100644 index 0000000..08ad882 --- /dev/null +++ b/lunaix-os/kernel/mm/pmalloc_ncontig.c @@ -0,0 +1,9 @@ +#include "pmm_internal.h" + +// NContig Allocator (simplified boundary tag allocator) + +#ifdef CONFIG_PMALLOC_NCONTIG + +// TODO + +#endif \ No newline at end of file diff --git a/lunaix-os/kernel/mm/pmalloc_simple.c b/lunaix-os/kernel/mm/pmalloc_simple.c new file mode 100644 index 0000000..37a720b --- /dev/null +++ b/lunaix-os/kernel/mm/pmalloc_simple.c @@ -0,0 +1,204 @@ +#include +#include "pmm_internal.h" + +#ifdef CONFIG_PMALLOC_SIMPLE + +// Simple PM Allocator (segregated next fit) + +#define INIT_FLAG 0b10 + +static const int po_limit[] = { + CONFIG_PMALLOC_SIMPLE_PO0_THRES, + CONFIG_PMALLOC_SIMPLE_PO1_THRES, + CONFIG_PMALLOC_SIMPLE_PO2_THRES, + CONFIG_PMALLOC_SIMPLE_PO3_THRES, + CONFIG_PMALLOC_SIMPLE_PO4_THRES, + CONFIG_PMALLOC_SIMPLE_PO5_THRES, + CONFIG_PMALLOC_SIMPLE_PO6_THRES, + CONFIG_PMALLOC_SIMPLE_PO7_THRES, + CONFIG_PMALLOC_SIMPLE_PO8_THRES, + CONFIG_PMALLOC_SIMPLE_PO9_THRES, +}; + +static inline bool +__uninitialized_page(struct ppage* page) +{ + return !(page->flags & INIT_FLAG); +} + +static inline void +__set_page_initialized(struct ppage* page) +{ + page->flags |= INIT_FLAG; +} + +static inline void +__set_pages_uninitialized(struct ppage* lead) +{ + for (size_t i = 0; i < (1UL << lead->order); i++) + { + lead[i].flags &= ~INIT_FLAG; + } +} + +void +pmm_allocator_init(struct pmem* memory) +{ + // nothing todo +} + +void +pmm_allocator_init_pool(struct pmem_pool* pool) +{ + for (int i = 0; i < MAX_PAGE_ORDERS; i++) { + llist_init_head(&pool->idle_order[i]); + pool->count[i] = 0; + } + + struct ppage* pooled_page = pool->pool_start; + for (; pooled_page <= pool->pool_end; pooled_page++) { + *pooled_page = (struct ppage){ }; + } +} + +void +pmm_free_one(struct ppage* page, int type_mask) +{ + page = leading_page(page); + + assert(page->refs); + assert(!reserved_page(page)); + assert(!__uninitialized_page(page)); + + if (--page->refs) { + return; + } + + int order = page->order; + assert(order <= MAX_PAGE_ORDERS); + + struct pmem_pool* pool = pmm_pool_lookup(page); + struct llist_header* bucket = &pool->idle_order[order]; + + if (pool->count[order] < po_limit[order]) { + llist_append(bucket, &page->sibs); + pool->count[order]++; + return; + } + + __set_pages_uninitialized(page); +} + +static pfn_t index = 0; + +struct ppage* +pmm_looknext(struct pmem_pool* pool, size_t order) +{ + struct ppage *lead, *tail = NULL; + pfn_t working = index; + size_t count, total; + size_t poolsz = ppfn_of(pool, pool->pool_end) + 1; + + total = 1 << order; + count = total; + do + { + tail = ppage_of(pool, working); + + if (__uninitialized_page(tail)) { + count--; + } + else { + count = total; + } + + working = (working + 1) % poolsz; + } while (count && working != index); + + index = working; + if (count) { + return NULL; + } + + lead = tail - total + 1; + for (size_t i = 0; i < total; i++) + { + struct ppage* page = &lead[i]; + page->order = order; + page->companion = i; + page->pool = pool->type; + llist_init_head(&page->sibs); + __set_page_initialized(page); + } + + return lead; +} + +struct ppage* +pmm_alloc_napot_type(int pool, size_t order, ppage_type_t type) +{ + assert(order <= MAX_PAGE_ORDERS); + + struct pmem_pool* _pool = pmm_pool_get(pool); + struct llist_header* bucket = &_pool->idle_order[order]; + + struct ppage* good_page = NULL; + if (!llist_empty(bucket)) { + (_pool->count[order])--; + good_page = list_entry(bucket->next, struct ppage, sibs); + llist_delete(&good_page->sibs); + } + else { + good_page = pmm_looknext(_pool, order); + } + + assert(good_page); + assert(!good_page->refs); + + good_page->refs = 1; + good_page->type = type; + + return good_page; +} + +bool +pmm_allocator_trymark_onhold(struct pmem_pool* pool, struct ppage* start, struct ppage* end) +{ + while (start <= end) { + if (__uninitialized_page(start)) { + set_reserved(start); + __set_page_initialized(start); + } + else if (!start->refs) { + struct ppage* lead = leading_page(start); + llist_delete(&lead->sibs); + + __set_pages_uninitialized(lead); + + continue; + } + else if (!reserved_page(start)) { + return false; + } + + start++; + } + + return true; +} + +bool +pmm_allocator_trymark_unhold(struct pmem_pool* pool, struct ppage* start, struct ppage* end) +{ + while (start <= end) { + if (!__uninitialized_page(start) && reserved_page(start)) { + __set_pages_uninitialized(start); + } + + start++; + } + + return true; +} + +#endif \ No newline at end of file diff --git a/lunaix-os/kernel/mm/pmm.c b/lunaix-os/kernel/mm/pmm.c index 72b57d7..f00f9c8 100644 --- a/lunaix-os/kernel/mm/pmm.c +++ b/lunaix-os/kernel/mm/pmm.c @@ -1,172 +1,111 @@ -#include #include #include #include -// This is a very large array... -static struct pp_struct pm_table[PM_BMP_MAX_SIZE]; -export_symbol(debug, pmm, pm_table); +#include "pmm_internal.h" -static ptr_t max_pg; -export_symbol(debug, pmm, max_pg); - -void -pmm_mark_page_free(ptr_t ppn) +static inline bool +__check_typemask(struct ppage* page, ppage_type_t typemask) { - if ((pm_table[ppn].attr & PP_FGLOCKED)) { - return; - } - pm_table[ppn].ref_counts = 0; + return !page->type || (page->type & typemask); } -void -pmm_mark_page_occupied(ptr_t ppn, pp_attr_t attr) -{ - pm_table[ppn] = - (struct pp_struct){ .ref_counts = 1, .attr = attr }; -} +static struct pmem memory; +export_symbol(debug, pmm, memory); void -pmm_mark_chunk_free(ptr_t start_ppn, size_t page_count) +pmm_init(struct boot_handoff* bctx) { - for (size_t i = start_ppn; i < start_ppn + page_count && i < max_pg; i++) { - if ((pm_table[i].attr & PP_FGLOCKED)) { - continue; - } - pm_table[i].ref_counts = 0; - } -} + ptr_t pplist_pa; -void -pmm_mark_chunk_occupied(u32_t start_ppn, - size_t page_count, - pp_attr_t attr) -{ - for (size_t i = start_ppn; i < start_ppn + page_count && i < max_pg; i++) { - pm_table[i] = - (struct pp_struct){ .ref_counts = 1, .attr = attr }; - } -} + llist_init_head(&memory.reserved); -// 我们跳过位于0x0的页。我们不希望空指针是指向一个有效的内存空间。 -#define LOOKUP_START 1 - -volatile size_t pg_lookup_ptr; + pplist_pa = pmm_arch_init_remap(&memory, bctx); + + if (!pplist_pa) { + spin(); + } -void -pmm_init(ptr_t mem_upper_lim) -{ - max_pg = pfn(mem_upper_lim); + pmm_arch_init_pool(&memory); - pg_lookup_ptr = LOOKUP_START; + pmm_allocator_init(&memory); - // mark all as occupied - for (size_t i = 0; i < PM_BMP_MAX_SIZE; i++) { - pm_table[i] = - (struct pp_struct){ .attr = 0, .ref_counts = 1 }; + for (size_t i = 0; i < POOL_COUNT; i++) + { + pmm_allocator_init_pool(&memory.pool[i]); } + + pfn_t pplist_size = memory.list_len * sizeof(struct ppage); + pmm_onhold_range(pfn(pplist_pa), leaf_count(pplist_size)); } -ptr_t -pmm_alloc_cpage(size_t num_pages, pp_attr_t attr) +static inline bool must_inline optimize("-fipa-cp-clone") +__pmm_mark_range(pfn_t start, size_t npages, const bool hold) { - size_t p1 = 0; - size_t p2 = 0; - - while (p2 < max_pg && p2 - p1 < num_pages) { - (!(&pm_table[p2])->ref_counts) ? (p2++) : (p1 = ++p2); + if (start >= memory.list_len) { + return true; } - if (p2 == max_pg && p2 - p1 < num_pages) { - return NULLPTR; - } + struct ppage *_start, *_end, + *_mark_start, *_mark_end; - pmm_mark_chunk_occupied(p1, num_pages, attr); + _start = ppage(start); + _end = ppage(start + npages - 1); + + struct pmem_pool* pool; + for (int i = 0; npages && i < POOL_COUNT; i++) { + pool = &memory.pool[i]; - return p1 << 12; -} + _mark_start = MAX(pool->pool_start, _start); + _mark_end = MIN(pool->pool_end, _end); + if (pool->pool_end < _mark_start || _mark_end < pool->pool_start) { + continue; + } -ptr_t -pmm_alloc_page(pp_attr_t attr) -{ - // Next fit approach. Maximize the throughput! - ptr_t good_page_found = (ptr_t)NULL; - size_t old_pg_ptr = pg_lookup_ptr; - size_t upper_lim = max_pg; - struct pp_struct* pm; - while (!good_page_found && pg_lookup_ptr < upper_lim) { - pm = &pm_table[pg_lookup_ptr]; - - if (!pm->ref_counts) { - *pm = (struct pp_struct){ .attr = attr, - .ref_counts = 1 }; - good_page_found = pg_lookup_ptr << 12; - break; + bool _r; + if (hold) { + _r = pmm_allocator_trymark_onhold(pool, _mark_start, _mark_end); } else { - pg_lookup_ptr++; - - // We've searched the interval [old_pg_ptr, max_pg) but failed - // may be chances in [1, old_pg_ptr) ? - // Let's find out! - if (pg_lookup_ptr >= upper_lim && old_pg_ptr != LOOKUP_START) { - upper_lim = old_pg_ptr; - pg_lookup_ptr = LOOKUP_START; - old_pg_ptr = LOOKUP_START; - } + _r = pmm_allocator_trymark_unhold(pool, _mark_start, _mark_end); } - } - return good_page_found; -} -int -pmm_free_one(ptr_t page, pp_attr_t attr_mask) -{ - pfn_t ppfn = pfn(page); - struct pp_struct* pm = &pm_table[ppfn]; - - assert(ppfn < max_pg && pm->ref_counts); - if (pm->attr && !(pm->attr & attr_mask)) { - return 0; + if (_r) + { + npages -= (ppfn(_mark_end) - ppfn(_mark_start)) + 1; + } } - pm->ref_counts--; - return 1; + return !npages; } -int -pmm_ref_page(ptr_t page) +bool +pmm_onhold_range(pfn_t start, size_t npages) { - u32_t ppn = pfn(page); - - if (ppn >= PM_BMP_MAX_SIZE) { - return 0; - } - - struct pp_struct* pm = &pm_table[ppn]; - assert(ppn < max_pg && pm->ref_counts); + return __pmm_mark_range(start, npages, true); +} - pm->ref_counts++; - return 1; +bool +pmm_unhold_range(pfn_t start, size_t npages) +{ + return __pmm_mark_range(start, npages, false); } -void -pmm_set_attr(ptr_t page, pp_attr_t attr) +struct pmem_pool* +pmm_pool_get(int pool_index) { - struct pp_struct* pp = &pm_table[pfn(page)]; - - if (pp->ref_counts) { - pp->attr = attr; - } + assert(pool_index < POOL_COUNT); + + return &memory.pool[pool_index]; } -struct pp_struct* -pmm_query(ptr_t pa) +struct pmem_pool* +pmm_declare_pool(int pool, pfn_t start, pfn_t size) { - u32_t ppn = pa >> 12; + struct pmem_pool* _pool = &memory.pool[pool]; - if (ppn >= PM_BMP_MAX_SIZE) { - return NULL; - } + _pool->type = POOL_UNIFIED; + _pool->pool_end = ppage(start + size - 1); + _pool->pool_start = ppage(start); - return &pm_table[ppn]; + return _pool; } \ No newline at end of file diff --git a/lunaix-os/kernel/mm/pmm_internal.h b/lunaix-os/kernel/mm/pmm_internal.h new file mode 100644 index 0000000..cfdd265 --- /dev/null +++ b/lunaix-os/kernel/mm/pmm_internal.h @@ -0,0 +1,29 @@ +#ifndef __LUNAIX_PMM_ALLOC_H +#define __LUNAIX_PMM_ALLOC_H + +#include + +static inline void +set_reserved(struct ppage* page) +{ + page->refs = RESERVE_MARKER; + page->type = PP_RESERVED; + page->order = 0; +} + +void +pmm_allocator_init(struct pmem* memory); + +void +pmm_allocator_init_pool(struct pmem_pool* pool); + +void +pmm_allocator_add_freehole(struct pmem_pool* pool, struct ppage* start, struct ppage* end); + +bool +pmm_allocator_trymark_onhold(struct pmem_pool* pool, struct ppage* start, struct ppage* end); + +bool +pmm_allocator_trymark_unhold(struct pmem_pool* pool, struct ppage* start, struct ppage* end); + +#endif /* __LUNAIX_PMM_ALLOC_H */ diff --git a/lunaix-os/kernel/mm/procvm.c b/lunaix-os/kernel/mm/procvm.c index 7ba6f53..7b07560 100644 --- a/lunaix-os/kernel/mm/procvm.c +++ b/lunaix-os/kernel/mm/procvm.c @@ -1,8 +1,7 @@ #include #include #include -#include -#include +#include #include #include @@ -23,6 +22,13 @@ procvm_create(struct proc_info* proc) { return mm; } +static inline unsigned int +__ptep_advancement(struct leaflet* leaflet, int level) +{ + size_t shifts = MAX(MAX_LEVEL - level - 1, 1) * LEVEL_SHIFT; + return (1 << (leaflet_order(leaflet) % shifts)) - 1; +} + static ptr_t vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) { @@ -36,10 +42,10 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) pte_t* ptep_ssm = mkptep_va(VMS_SELF, (ptr_t)ptep_sms); pte_t pte_sms = mkpte_prot(KERNEL_DATA); - pte_sms = vmm_alloc_page(ptep_ssm, pte_sms); + pte_sms = alloc_kpage_at(ptep_ssm, pte_sms, 0); set_pte(ptep_sms, pte_sms); - cpu_flush_page((ptr_t)dest_mnt); + tlb_flush_kernel((ptr_t)dest_mnt); if (only_kernel) { ptep = ptep_kernel; @@ -50,10 +56,11 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) } int level = 0; + struct leaflet* leaflet; + while (ptep < ptep_kernel) { pte_t pte = *ptep; - ptr_t pa = pte_paddr(pte); if (pte_isnull(pte)) { goto cont; @@ -61,12 +68,18 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) if (pt_last_level(level) || pte_huge(pte)) { set_pte(ptep_dest, pte); - - if (pte_isloaded(pte)) - pmm_ref_page(pa); + + if (pte_isloaded(pte)) { + leaflet = pte_leaflet(pte); + assert(leaflet_refcount(leaflet)); + + if (leaflet_ppfn(leaflet) == pte_ppfn(pte)) { + leaflet_borrow(leaflet); + } + } } else if (!pt_last_level(level)) { - vmm_alloc_page(ptep_dest, pte); + alloc_kpage_at(ptep_dest, pte, 0); ptep = ptep_step_into(ptep); ptep_dest = ptep_step_into(ptep_dest); @@ -96,8 +109,14 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) pte_t pte = *ptep; assert(!pte_isnull(pte)); + // Ensure it is a next level pagetable, + // we MAY relax this later allow kernel + // to have huge leaflet mapped at L0T + leaflet = pte_leaflet_aligned(pte); + assert(leaflet_order(leaflet) == 0); + set_pte(ptep_dest, pte); - pmm_ref_page(pte_paddr(pte)); + leaflet_borrow(leaflet); ptep++; ptep_dest++; @@ -109,6 +128,7 @@ vmscpy(ptr_t dest_mnt, ptr_t src_mnt, bool only_kernel) static void vmsfree(ptr_t vm_mnt) { + struct leaflet* leaflet; pte_t* ptep_head = mkl0tep(mkptep_va(vm_mnt, 0)); pte_t* ptep_kernel = mkl0tep(mkptep_va(vm_mnt, KERNEL_RESIDENT)); @@ -130,21 +150,29 @@ vmsfree(ptr_t vm_mnt) continue; } - if (pte_isloaded(pte)) - pmm_free_any(pa); + if (pte_isloaded(pte)) { + leaflet = pte_leaflet_aligned(pte); + leaflet_return(leaflet); + + ptep += __ptep_advancement(leaflet, level); + } cont: if (ptep_vfn(ptep) == MAX_PTEN - 1) { ptep = ptep_step_out(ptep); - pmm_free_any(pte_paddr(pte_at(ptep))); + leaflet = pte_leaflet_aligned(pte_at(ptep)); + + assert(leaflet_order(leaflet) == 0); + leaflet_return(leaflet); + level--; } ptep++; } - ptr_t self_pa = pte_paddr(ptep_head[MAX_PTEN - 1]); - pmm_free_any(self_pa); + leaflet = pte_leaflet_aligned(ptep_head[MAX_PTEN - 1]); + leaflet_return(leaflet); } static inline void @@ -272,9 +300,9 @@ procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm, rvmctx->vms_mnt = vm_mnt; rvmctx->page_cnt = size_pn; - remote_base = va_align(remote_base); + remote_base = page_aligned(remote_base); rvmctx->remote = remote_base; - rvmctx->local_mnt = PG_MOUNT_4_END + 1; + rvmctx->local_mnt = PG_MOUNT_VAR; pte_t* rptep = mkptep_va(vm_mnt, remote_base); pte_t* lptep = mkptep_va(VMS_SELF, rvmctx->local_mnt); @@ -288,7 +316,7 @@ procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm, continue; } - ptr_t pa = pmm_alloc_page(0); + ptr_t pa = ppage_addr(pmm_alloc_normal(0)); set_pte(lptep, mkpte(pa, KERNEL_DATA)); set_pte(rptep, mkpte(pa, pattr)); } diff --git a/lunaix-os/kernel/mm/region.c b/lunaix-os/kernel/mm/region.c index 7dead6d..14cada1 100644 --- a/lunaix-os/kernel/mm/region.c +++ b/lunaix-os/kernel/mm/region.c @@ -130,7 +130,7 @@ region_get(vm_regions_t* lead, unsigned long vaddr) struct mm_region *pos, *n; - vaddr = va_align(vaddr); + vaddr = page_aligned(vaddr); llist_for_each(pos, n, lead, head) { diff --git a/lunaix-os/kernel/mm/vmap.c b/lunaix-os/kernel/mm/vmap.c index 02711fb..6ec714a 100644 --- a/lunaix-os/kernel/mm/vmap.c +++ b/lunaix-os/kernel/mm/vmap.c @@ -1,6 +1,5 @@ -#include +#include #include -#include #include #include @@ -9,6 +8,11 @@ static ptr_t start = VMAP; static volatile ptr_t prev_va = 0; +void +vmap_set_start(ptr_t start_addr) { + start = start_addr; +} + static pte_t* __alloc_contig_ptes(pte_t* ptep, size_t base_sz, int n) { @@ -49,7 +53,6 @@ __alloc_contig_ptes(pte_t* ptep, size_t base_sz, int n) } va -= base_sz * _n; - assert(prev_va < va); prev_va = va; return mkptep_va(ptep_vm_mnt(ptep), va); @@ -67,5 +70,25 @@ vmap_ptes_at(pte_t pte, size_t lvl_size, int n) vmm_set_ptes_contig(ptep, pte, lvl_size, n); - return page_addr(ptep_pfn(ptep)); + ptr_t va = page_addr(ptep_pfn(ptep)); + + tlb_flush_kernel_ranged(va, n); + + return va; +} + +void +vunmap(ptr_t ptr, struct leaflet* leaflet) +{ + pte_t* ptep; + unsigned int npages; + + assert(start <= ptr && ptr <= VMAP_END); + + npages = leaflet_nfold(leaflet); + ptep = mkptep_va(VMS_SELF, ptr); + + vmm_unset_ptes(ptep, npages); + + tlb_flush_kernel_ranged(ptr, npages); } \ No newline at end of file diff --git a/lunaix-os/kernel/mm/vmm.c b/lunaix-os/kernel/mm/vmm.c index 6b497b8..d7d504e 100644 --- a/lunaix-os/kernel/mm/vmm.c +++ b/lunaix-os/kernel/mm/vmm.c @@ -1,6 +1,5 @@ #include -#include -#include +#include #include #include @@ -15,54 +14,6 @@ vmm_init() // XXX: something here? } -pte_t -vmm_alloc_page(pte_t* ptep, pte_t pte) -{ - ptr_t pa = pmm_alloc_page(PP_FGPERSIST); - if (!pa) { - return null_pte; - } - - pte = pte_setpaddr(pte, pa); - pte = pte_mkloaded(pte); - set_pte(ptep, pte); - - mount_page(PG_MOUNT_1, pa); - memset((void*)PG_MOUNT_1, 0, LFT_SIZE); - unmount_page(PG_MOUNT_1); - - cpu_flush_page((ptr_t)ptep); - - return pte; -} - -int -vmm_set_mapping(ptr_t mnt, ptr_t va, ptr_t pa, pte_attr_t prot) -{ - assert(!va_offset(va)); - - pte_t* ptep = mkptep_va(mnt, va); - pte_t pte = mkpte(pa, prot); - - set_pte(ptep, pte); - - return 1; -} - -ptr_t -vmm_del_mapping(ptr_t mnt, ptr_t va) -{ - assert(!va_offset(va)); - - pte_t* ptep = mkptep_va(mnt, va); - - pte_t old = *ptep; - - set_pte(ptep, null_pte); - - return pte_paddr(old); -} - pte_t vmm_tryptep(pte_t* ptep, size_t lvl_size) { @@ -92,15 +43,6 @@ vmm_tryptep(pte_t* ptep, size_t lvl_size) return *_ptep; } -ptr_t -vmm_v2pat(ptr_t mnt, ptr_t va) -{ - ptr_t va_off = va_offset(va); - pte_t* ptep = mkptep_va(mnt, va); - - return pte_paddr(pte_at(ptep)) + va_off; -} - ptr_t vms_mount(ptr_t mnt, ptr_t vms_root) { @@ -108,7 +50,7 @@ vms_mount(ptr_t mnt, ptr_t vms_root) pte_t* ptep = mkl0tep_va(VMS_SELF, mnt); set_pte(ptep, mkpte(vms_root, KERNEL_DATA)); - cpu_flush_page(mnt); + tlb_flush_kernel(mnt); return mnt; } @@ -117,7 +59,7 @@ vms_unmount(ptr_t mnt) { pte_t* ptep = mkl0tep_va(VMS_SELF, mnt); set_pte(ptep, null_pte); - cpu_flush_page(mnt); + tlb_flush_kernel(mnt); return mnt; } diff --git a/lunaix-os/kernel/process/fork.c b/lunaix-os/kernel/process/fork.c index 9518c19..c106650 100644 --- a/lunaix-os/kernel/process/fork.c +++ b/lunaix-os/kernel/process/fork.c @@ -1,7 +1,6 @@ #include #include -#include -#include +#include #include #include #include @@ -31,8 +30,7 @@ region_maybe_cow(struct mm_region* region) for (size_t i = start_pn; i <= end_pn; i++) { pte_t* self = mkptep_pn(VMS_SELF, i); pte_t* guest = mkptep_pn(VMS_MOUNT_1, i); - - cpu_flush_page(page_addr(ptep_pfn(self))); + ptr_t va = page_addr(ptep_pfn(self)); if ((attr & REGION_MODE_MASK) == REGION_RSHARED) { set_pte(self, pte_mkwprotect(*self)); @@ -42,6 +40,8 @@ region_maybe_cow(struct mm_region* region) set_pte(guest, null_pte); } } + + tlb_flush_vmr_all(region); } static inline void @@ -59,6 +59,8 @@ __dup_fdtable(struct proc_info* pcb) static void __dup_kernel_stack(struct thread* thread, ptr_t vm_mnt) { + struct leaflet* leaflet; + ptr_t kstack_pn = pfn(current_thread->kstack); kstack_pn -= pfn(KSTACK_SIZE) - 1; @@ -71,13 +73,16 @@ __dup_kernel_stack(struct thread* thread, ptr_t vm_mnt) if (pte_isguardian(p)) { set_pte(dest_ptep, guard_pte); } else { - ptr_t ppa = vmm_dup_page(pte_paddr(p)); - set_pte(dest_ptep, pte_setpaddr(p, ppa)); + leaflet = dup_leaflet(pte_leaflet(p)); + i += ptep_map_leaflet(dest_ptep, p, leaflet); } src_ptep++; dest_ptep++; } + + struct proc_mm* mm = vmspace(thread->process); + tlb_flush_mm_range(mm, kstack_pn, leaf_count(KSTACK_SIZE)); } /* @@ -144,6 +149,9 @@ done: pid_t dup_proc() { + // FIXME need investigate: issue with fork, as well as pthread + // especially when involving frequent alloc and dealloc ops + // (could be issue in allocator's segregated free list) struct proc_info* pcb = alloc_process(); if (!pcb) { syscall_result(ENOMEM); @@ -169,7 +177,7 @@ dup_proc() struct proc_mm* mm = vmspace(pcb); procvm_dupvms_mount(mm); - struct thread* main_thread = dup_active_thread(VMS_MOUNT_1, pcb); + struct thread* main_thread = dup_active_thread(mm->vm_mnt, pcb); if (!main_thread) { syscall_result(ENOMEM); procvm_unmount(mm); diff --git a/lunaix-os/kernel/process/process.c b/lunaix-os/kernel/process/process.c index cdaa12a..818493a 100644 --- a/lunaix-os/kernel/process/process.c +++ b/lunaix-os/kernel/process/process.c @@ -64,7 +64,7 @@ spawn_process(struct thread** created, ptr_t entry, bool with_ustack) struct proc_mm* mm = vmspace(kproc); procvm_initvms_mount(mm); - + struct thread* kthread = create_thread(kproc, with_ustack); if (!kthread) { diff --git a/lunaix-os/kernel/process/thread.c b/lunaix-os/kernel/process/thread.c index e74718e..4ad4653 100644 --- a/lunaix-os/kernel/process/thread.c +++ b/lunaix-os/kernel/process/thread.c @@ -3,8 +3,7 @@ #include #include #include -#include -#include +#include #include #include @@ -64,17 +63,16 @@ __alloc_kernel_thread_stack(struct proc_info* proc, ptr_t vm_mnt) return 0; found:; - ptr_t pa = pmm_alloc_cpage(KSTACK_PAGES - 1, 0); + // KSTACK_PAGES = 3, removal one guardian pte, give order 1 page + struct leaflet* leaflet = alloc_leaflet(1); - if (!pa) { + if (!leaflet) { WARN("failed to create kernel stack: nomem\n"); return 0; } set_pte(ptep, guard_pte); - - pte_t pte = mkpte(pa, KERNEL_DATA); - vmm_set_ptes_contig(ptep + 1, pte, LFT_SIZE, KSTACK_PAGES - 1); + ptep_map_leaflet(ptep + 1, mkpte_prot(KERNEL_DATA), leaflet); ptep += KSTACK_PAGES; return align_stack(ptep_va(ptep, LFT_SIZE) - 1); @@ -83,6 +81,7 @@ found:; void thread_release_mem(struct thread* thread) { + struct leaflet* leaflet; struct proc_mm* mm = vmspace(thread->process); ptr_t vm_mnt = mm->vm_mnt; @@ -90,9 +89,13 @@ thread_release_mem(struct thread* thread) assert(vm_mnt); pte_t* ptep = mkptep_va(vm_mnt, thread->kstack); + leaflet = pte_leaflet(*ptep); ptep -= KSTACK_PAGES - 1; - vmm_unset_ptes(ptep, KSTACK_PAGES); + set_pte(ptep, null_pte); + ptep_unmap_leaflet(ptep + 1, leaflet); + + leaflet_return(leaflet); if (thread->ustack) { if ((thread->ustack->start & 0xfff)) { diff --git a/lunaix-os/kernel/spike.c b/lunaix-os/kernel/spike.c index 91fe9f6..4c1bb03 100644 --- a/lunaix-os/kernel/spike.c +++ b/lunaix-os/kernel/spike.c @@ -3,6 +3,7 @@ #include #include #include +#include LOG_MODULE("spike") @@ -13,9 +14,8 @@ __assert_fail(const char* expr, const char* file, unsigned int line) // the stack context being preserved cpu_disable_interrupt(); ERROR("assertion fail (%s:%u)\n\t%s", file, line, expr); - trace_printstack(); - - spin(); // never reach + + failsafe_diagnostic(); } void noret diff --git a/lunaix-os/link/linker.ld b/lunaix-os/link/linker.ld index 34833f0..93c9693 100644 --- a/lunaix-os/link/linker.ld +++ b/lunaix-os/link/linker.ld @@ -185,5 +185,14 @@ SECTIONS { *(.bss) } + .bss.kstack BLOCK(4K) : AT ( ADDR(.bss.kstack) - 0xC0000000) + { + PROVIDE(__bsskstack_start = .); + + *(.bss.kstack) + + PROVIDE(__bsskstack_end = .); + } + __kexec_end = ALIGN(4K); } \ No newline at end of file diff --git a/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py b/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py index 1e84d87..985e91b 100644 --- a/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py +++ b/lunaix-os/scripts/gdb/lunadbg/profiling/pmstat.py @@ -1,14 +1,15 @@ from ..symbols import LunaixSymbols from ..structs.page import PageStruct +from ..structs.pmem import PMem from ..pp import MyPrettyPrinter import math class PhysicalMemProfile: def __init__(self) -> None: super().__init__() - self._pm_list = LunaixSymbols.debug_sym("pmm", "pm_table") + self._pmem = PMem(LunaixSymbols.debug_sym("pmm", "memory").value().address) - self.max_mem_pg = int(LunaixSymbols.debug_sym("pmm", "max_pg").value()) + self.max_mem_pg = self._pmem.list_len() self.max_mem_sz = self.max_mem_pg * 4096 self.mem_distr = [] @@ -16,20 +17,22 @@ class PhysicalMemProfile: self.__mem_distr_granule = distr_granule self.mem_distr.clear() + pplist = self._pmem.pplist() page_per_granule = self.max_mem_pg / self.__mem_distr_granule page_per_granule = math.ceil(page_per_granule) remainder = self.max_mem_pg % self.__mem_distr_granule bucket = 0 non_contig = 0 last_contig = False + for i in range(self.max_mem_pg): - element = PageStruct(self._pm_list[i].address) - bucket += int(element.ref > 0) + element = PageStruct(pplist[i].address) + bucket += int(element.busy()) if last_contig: - last_contig = element.ref > 0 + last_contig = element.busy() non_contig += int(not last_contig) else: - last_contig = element.ref > 0 + last_contig = element.busy() if (i + 1) % page_per_granule == 0: self.mem_distr.append(bucket) diff --git a/lunaix-os/scripts/gdb/lunadbg/structs/page.py b/lunaix-os/scripts/gdb/lunadbg/structs/page.py index 01c08b2..6dc5bf9 100644 --- a/lunaix-os/scripts/gdb/lunadbg/structs/page.py +++ b/lunaix-os/scripts/gdb/lunadbg/structs/page.py @@ -4,10 +4,25 @@ from . import KernelStruct class PageStruct(KernelStruct): def __init__(self, gdb_inferior: Value) -> None: super().__init__(gdb_inferior, PageStruct) - self.ref = self._kstruct["ref_counts"] - self.attr = self._kstruct["attr"] + self.ref = self._kstruct["refs"] + self.type = self._kstruct["type"] + self.flags = self._kstruct["flags"] + self.order = self._kstruct["order"] + self.pool = self._kstruct["pool"] + + def uninitialized(self): + return not (self.flags & 0b10) + + def reserved(self): + return (not self.uninitialized() + and self.type == 0b1000 + and self.ref == 0xf0f0f0f0) + + def busy(self): + return (not self.uninitialized() + and self.ref > 0) @staticmethod def get_type() -> Type: - return lookup_type("struct pp_struct").pointer() + return lookup_type("struct ppage").pointer() diff --git a/lunaix-os/scripts/gdb/lunadbg/structs/pmem.py b/lunaix-os/scripts/gdb/lunadbg/structs/pmem.py new file mode 100644 index 0000000..ed50b0c --- /dev/null +++ b/lunaix-os/scripts/gdb/lunadbg/structs/pmem.py @@ -0,0 +1,16 @@ +from gdb import Type, Value, lookup_type +from . import KernelStruct + +class PMem(KernelStruct): + def __init__(self, gdb_inferior: Value) -> None: + super().__init__(gdb_inferior, PMem) + + @staticmethod + def get_type(): + return lookup_type("struct pmem").pointer() + + def pplist(self): + return self._kstruct["pplist"] + + def list_len(self): + return self._kstruct["list_len"] \ No newline at end of file diff --git a/lunaix-os/scripts/templates/i386/config.json b/lunaix-os/scripts/templates/i386/config.json index 4b2a186..b74d558 100644 --- a/lunaix-os/scripts/templates/i386/config.json +++ b/lunaix-os/scripts/templates/i386/config.json @@ -83,6 +83,9 @@ "size": "1@page", "name": "pg_mount_{index}" }, + { + "name": "pg_mount_var" + }, { "name": "vmap", "block": "1@huge" -- 2.27.0