+ 虚拟内存
+ 内存管理与按需分页
+ 键盘输入
-+ å¤\9aè¿\9bç¨\8b
++ è¿\9bç¨\8b模å\9e\8b
+ 54个常见的Linux/POSIX系统调用([附录1](#appendix1))
+ 用户模式
+ 信号机制
+ 通用图形设备抽象层
+ 标准VGA实现
+ 虚拟终端设备接口(兼容 POSIX.1-2008)
++ 线程模型
已经测试过的环境:
+ `vmrs [pid]` 列举进程`<pid>`的内存区域图(Memory Regions),如果`<pid>`未指定,则默认为正在运行的进程(smp=1)。
+ `proc [pid]` 打印进程`<pid>`的进程控制块状态,如果`<pid>`未指定,则默认为正在运行的进程(smp=1)。
-+ `proc_table` 列举所有非终止的进程以及他们的状态。
++ `sched <threads | procs> [-l]` 查看调度器信息,接受两个参数:
+ + `threads` 打印所有依然在调度器中有注册的线程
+ + `procs` 打印所有依然在调度器中有注册的进程
+ + 可选开关 `-l` 决定是否以长列表打印(更详细的信息)
该插件可以通过运行以下命令来进行安装:
+ Linux Manual - 用于查询*nix API的一些具体行为。
-## 附录1:支持的系统调用<a id="appendix1"></a>
+## 附录1:实现的 POSIX 系统接口 <a id="appendix1"></a>
-**Unix/Linux/POSIX**
+LunaixOS 提供对以下POSIX的系统接口的实现。内核定义的系统调用号可以参考 [LunaixOS系统调用表](docs/lunaix-syscall-table.md) 。
1. `sleep(3)`
1. `wait(2)`
3. `epoll_create(2)` (via `pollctl`)
3. `epoll_ctl(2)` (via `pollctl`)
3. `epoll_wait(2)` (via `pollctl`)
+4. `pthread_create`
+4. `pthread_self`
+4. `pthread_exit`
+4. `pthread_join`
+4. `pthread_kill`
+4. `pthread_detach`
+4. `pthread_sigmask`
-**LunaixOS自有**
-
-1. `yield`
-2. `geterrno`
-3. `realpathat`
-4. `syslog`
-5. `pollctl`
( **※**:该系统调用暂未经过测试 )
--- /dev/null
+| Name | Id |
+| ----- | ---- |
+| __SYSCALL_fork | 1 |
+| __SYSCALL_yield | 2 |
+| __SYSCALL_sbrk | 3 |
+| __SYSCALL_brk | 4 |
+| __SYSCALL_getpid | 5 |
+| __SYSCALL_getppid | 6 |
+| __SYSCALL_sleep | 7 |
+| __SYSCALL__exit | 8 |
+| __SYSCALL_wait | 9 |
+| __SYSCALL_waitpid | 10 |
+| __SYSCALL_sigreturn | 11 |
+| __SYSCALL_sigprocmask | 12 |
+| __SYSCALL_sys_sigaction | 13 |
+| __SYSCALL_pause | 14 |
+| __SYSCALL_kill | 15 |
+| __SYSCALL_alarm | 16 |
+| __SYSCALL_sigpending | 17 |
+| __SYSCALL_sigsuspend | 18 |
+| __SYSCALL_open | 19 |
+| __SYSCALL_close | 20 |
+| __SYSCALL_read | 21 |
+| __SYSCALL_write | 22 |
+| __SYSCALL_sys_readdir | 23 |
+| __SYSCALL_mkdir | 24 |
+| __SYSCALL_lseek | 25 |
+| __SYSCALL_geterrno | 26 |
+| __SYSCALL_readlink | 27 |
+| __SYSCALL_readlinkat | 28 |
+| __SYSCALL_rmdir | 29 |
+| __SYSCALL_unlink | 30 |
+| __SYSCALL_unlinkat | 31 |
+| __SYSCALL_link | 32 |
+| __SYSCALL_fsync | 33 |
+| __SYSCALL_dup | 34 |
+| __SYSCALL_dup2 | 35 |
+| __SYSCALL_realpathat | 36 |
+| __SYSCALL_symlink | 37 |
+| __SYSCALL_chdir | 38 |
+| __SYSCALL_fchdir | 39 |
+| __SYSCALL_getcwd | 40 |
+| __SYSCALL_rename | 41 |
+| __SYSCALL_mount | 42 |
+| __SYSCALL_unmount | 43 |
+| __SYSCALL_getxattr | 44 |
+| __SYSCALL_setxattr | 45 |
+| __SYSCALL_fgetxattr | 46 |
+| __SYSCALL_fsetxattr | 47 |
+| __SYSCALL_ioctl | 48 |
+| __SYSCALL_getpgid | 49 |
+| __SYSCALL_setpgid | 50 |
+| __SYSCALL_syslog | 51 |
+| __SYSCALL_sys_mmap | 52 |
+| __SYSCALL_munmap | 53 |
+| __SYSCALL_execve | 54 |
+| __SYSCALL_fstat | 55 |
+| __SYSCALL_pollctl | 56 |
+| __SYSCALL_th_create | 57 |
+| __SYSCALL_th_self | 58 |
+| __SYSCALL_th_exit | 59 |
+| __SYSCALL_th_join | 60 |
+| __SYSCALL_th_kill | 61 |
+| __SYSCALL_th_detach | 62 |
+| __SYSCALL_th_sigmask | 63 |
.gdb_history
**.o
-**.d
\ No newline at end of file
+**.d
+
+*.log
pushl %ebx
pushl %eax
- movl __current, %eax
- movl proc_intr_ctx(%eax), %eax
- incl %eax
- pushl %eax # nested intr: current depth
+ pushl $0 // placeholder for depth accounting
movl ics(%esp), %eax /* 取出 %cs */
andl $0x3, %eax /* 判断 RPL */
movw %ax, %ds
movw %ax, %es
- movl __current, %ebx
+ movl current_thread, %ebx
+ movl iuesp(%esp), %eax
# Save x87 context to user stack, rather than kernel's memory.
# XXX what will happen if we triggered a page fault during fxsave?
- # FIXME can we remove this overhead?
- movl iuesp(%esp), %eax
- andl $stack_alignment, %eax
- subl $512, %eax
- fxsave (%eax)
+ # FIXME I think we should defer this to scheduler, and pratice lazy save/load
+ # Doing this will also make it safe from nested interrupt due to potential
+ # page fault when saving
+ # FIXME Also, generalise it to any FPU context, without constraining it to x87.
+
+ #andl $stack_alignment, %eax
+ #subl $512, %eax
+ #fxsave (%eax)
# 保存用户栈顶指针。因为我们允许同级中断的产生,所以需要该手段跟踪用户栈的地址。
- movl %eax, proc_ustack_top(%ebx) # 存入__current->ustack_top
+ movl %eax, thread_ustack_top(%ebx) # 存入__current->ustack_top
/* kernel space same-level switch */
1:
andl $3, %eax
jz 1f
- movl __current, %eax
- movl proc_ustack_top(%eax), %eax
+ ## FIXME x87 fpu context
+ movl current_thread, %eax
+ movl thread_ustack_top(%eax), %eax
test %eax, %eax
jz 1f
- fxrstor (%eax)
+ # fxrstor (%eax)
1:
popl %eax # discard isr_param::depth
movl 16(%esp), %esp
movl %eax, tmp_store
- movl __current, %eax
+ movl current_thread, %eax
# nested intr: restore saved context
- popl proc_intr_ctx(%eax)
+ popl thread_intr_ctx(%eax)
addl $8, %esp
iret
- .type switch_to, @function
- .global switch_to
- switch_to:
- # 约定
- # arg1: 目标进程PCB地址 (next
+ .type do_switch, @function
+ .global do_switch
+ do_switch:
+ # Assumption: __current already hold the target process
- movl %eax, %ebx # next
- movl __current, %eax
- movl proc_page_table(%eax), %ecx # __current->pagetable
- movl proc_page_table(%ebx), %eax # next->pagetable
-
- cmpl %ecx, %eax # if(next->pagtable != __current->pagetable) {
+ call proc_vmroot
+
+ movl %eax, %ebx
+ movl %cr3, %eax
+ xorl %ebx, %eax # avoid setting cr3 if just local thread switch.
jz 1f
- movl %eax, %cr3 # cpu_lcr3(next->pagetable)
- # }
- 1:
- movl %ebx, __current # __current = next
- # 我们已经处在了新的地址空间,为了避免影响其先前的栈布局
- # 需要使用一个临时的栈空间
+ movl %ebx, %cr3
+
+ 1:
+ # the address space could be changed. A temporary stack
+ # is required to prevent corrupt existing stack
movl $tmp_stack, %esp
call signal_dispatch # kernel/signal.c
+ movl current_thread, %ebx
test %eax, %eax # do we have signal to handle?
jz 1f
由于这中间没有进行地址空间的交换,所以第二次跳转使用的是同一个内核栈,而之前默认tss.esp0的值是永远指向最顶部
这样一来就有可能会覆盖更早的上下文信息(比如嵌套的信号捕获函数)
*/
- movl proc_intr_ctx(%ebx), %ecx # __current->intr_ctx
+ movl thread_intr_ctx(%ebx), %ecx # __current->intr_ctx
movl %ecx, (tss_esp0_off + _tss)
jmp handle_signal
1:
- movl proc_intr_ctx(%ebx), %eax
+ movl thread_intr_ctx(%ebx), %eax
jmp soft_iret
.type handle_signal, @function
LOG_MODULE("INTR")
+static inline void
+update_thread_context(isr_param* param)
+{
+ if (!current_thread) {
+ return;
+ }
+
+ isr_param* ppctx = current_thread->intr_ctx;
+ param->execp->saved_prev_ctx = ppctx;
+ current_thread->intr_ctx = param;
+
+ if (ppctx) {
+ param->depth = ppctx->depth + 1;
+ }
+}
+
void
intr_handler(isr_param* param)
{
- param->execp->saved_prev_ctx = __current->intr_ctx;
- __current->intr_ctx = param;
-
- volatile struct exec_param* execp = __current->intr_ctx->execp;
+ update_thread_context(param);
+ volatile struct exec_param* execp = param->execp;
if (execp->vector <= 255) {
isr_cb subscriber = isrm_get(execp->vector);
subscriber(param);
#define stack_alignment 0xfffffff0
#ifndef __ASM__
-#define store_retval(retval) __current->intr_ctx->registers.eax = (retval)
+#define align_stack(ptr) ((ptr) & stack_alignment)
+#define store_retval(retval) current_thread->intr_ctx->registers.eax = (retval)
-#define store_retval_to(proc, retval) (proc)->intr_ctx->registers.eax = (retval)
+#define store_retval_to(th, retval) (th)->intr_ctx->registers.eax = (retval)
-#define eret_target(proc) (proc)->intr_ctx->execp->eip
-#define eret_stack(proc) (proc)->intr_ctx->execp->esp
-#define intr_ivec(proc) (proc)->intr_ctx->execp->vector
-#define intr_ierr(proc) (proc)->intr_ctx->execp->err_code
+#define eret_target(th) (th)->intr_ctx->execp->eip
+#define eret_stack(th) (th)->intr_ctx->execp->esp
+#define intr_ivec(th) (th)->intr_ctx->execp->vector
+#define intr_ierr(th) (th)->intr_ctx->execp->err_code
#define j_usr(sp, pc) \
asm volatile("movw %0, %%ax\n" \
"r"(pc) \
: "eax", "memory");
-#define switch_context(process) asm volatile("jmp switch_to\n" ::"a"(process));
+
+static inline void must_inline noret
+switch_context() {
+ asm volatile("jmp do_switch\n");
+ unreachable;
+}
#define push_arg1(stack_ptr, arg) *((typeof((arg))*)(stack_ptr)--) = arg
#define push_arg2(stack_ptr, arg1, arg2) \
*((typeof((arg3))*)(stack_ptr)--) = arg3; \
*((typeof((arg4))*)(stack_ptr)--) = arg4; \
}
+
+
+static inline ptr_t must_inline
+abi_get_callframe()
+{
+ ptr_t val;
+ asm("movl %%ebp, %0" : "=r"(val)::);
+ return val;
+}
+
+static inline ptr_t
+abi_get_retaddr()
+{
+ return *((ptr_t*)abi_get_callframe() + 1);
+}
+
+static inline ptr_t
+abi_get_retaddrat(ptr_t fp)
+{
+ return *((ptr_t*)fp + 1);
+}
#endif
#endif /* __LUNAIX_ABI_H */
void
cpu_trap_panic(char* message);
-static inline ptr_t
-cpu_get_fp()
-{
- ptr_t val;
- asm("movl %%ebp, %0" : "=r"(val)::);
- return val;
-}
/**
* @brief Load current processor state
/* struct layout: critical section of struct proc_info */
.struct 0
-proc_pid:
- .struct proc_pid + regsize
-proc_parent:
- .struct proc_parent + regsize
-proc_intr_ctx:
- .struct proc_intr_ctx + regsize
-proc_ustack_top:
- .struct proc_ustack_top + regsize
-proc_page_table:
- .struct proc_page_table + regsize
-proc_fxstate:
+thread_intr_ctx:
+ .struct thread_intr_ctx + regsize
+thread_ustack_top:
/* struct layout: proc_sig */
.struct 0
#include "vectors.h"
#ifndef __ASM__
+#include <lunaix/compiler.h>
#include <sys/cpu.h>
#define saved_fp(isrm) ((isrm)->registers.ebp)
-#define uspace_context(isrm) (((isrm)->execp->cs) == 0x8)
+#define kernel_context(isrm) (!(((isrm)->execp->cs) & 0b11))
struct exec_param;
u32_t es;
u32_t fs;
u32_t gs;
-} __attribute__((packed));
+} compact;
-typedef struct
+struct pcontext
{
unsigned int depth;
struct regcontext registers;
u32_t esp;
volatile struct exec_param* execp;
};
-} __attribute__((packed)) isr_param;
+} compact;
struct exec_param
{
- isr_param* saved_prev_ctx;
+ struct pcontext* saved_prev_ctx;
u32_t vector;
u32_t err_code;
u32_t eip;
u32_t eflags;
u32_t esp;
u32_t ss;
-} __attribute__((packed));
-
-#define ISR_PARAM_SIZE sizeof(isr_param)
-
-void
-exception_init();
+} compact;
#endif
#define MEM_HUGE 0x400000UL
#define MEM_1G 0x40000000UL
-#define KERNEL_STACK 0x300000UL
-#define KERNEL_STACK_SIZE 0x100000UL
-#define KERNEL_STACK_END 0x3ffff0UL
+#define KSTACK_AREA 0x100000UL
+#define KSTACK_AREA_SIZE 0x300000UL
+#define KSTACK_AREA_END 0x3ffff0UL
#define USR_EXEC 0x400000UL
#define USR_EXEC_SIZE 0x20000000UL
#define USR_EXEC_END 0x203fffffUL
#define USR_MMAP 0x20400000UL
-#define USR_MMAP_SIZE 0x9f800000UL
-#define USR_MMAP_END 0xbfbfffffUL
+#define USR_MMAP_SIZE 0x9fbc0000UL
+#define USR_MMAP_END 0xbffbffffUL
-#define USR_STACK 0xbfc00000UL
-#define USR_STACK_SIZE 0x400000UL
+#define USR_STACK 0xbffc0000UL
+#define USR_STACK_SIZE 0x40000UL
#define USR_STACK_END 0xbffffff0UL
#define KERNEL_EXEC 0xc0000000UL
--- /dev/null
+#ifndef __LUNAIX_MM_DEFS_H
+#define __LUNAIX_MM_DEFS_H
+
+#include "mempart.h"
+
+#define KSTACK_SIZE (3 * MEM_PAGE)
+
+#define MEMGUARD 0xdeadc0deUL
+
+#define kernel_addr(addr) ((addr) >= KERNEL_EXEC)
+#define guardian_page(pte) ((pte) == MEMGUARD)
+
+#endif /* __LUNAIX_MM_DEFS_H */
#include <lunaix/trace.h>
#include <sys/interrupts.h>
+#include <sys/mm/mm_defs.h>
#include <klibc/string.h>
LOG_MODULE("pf")
-static u32_t
-get_ptattr(struct mm_region* vmr)
-{
- u32_t vmr_attr = vmr->attr;
- u32_t ptattr = PG_PRESENT | PG_ALLOW_USER;
-
- if ((vmr_attr & PROT_WRITE)) {
- ptattr |= PG_WRITE;
- }
- return ptattr & 0xfff;
-}
#define COW_MASK (REGION_RSHARED | REGION_READ | REGION_WRITE)
void
intr_routine_page_fault(const isr_param* param)
{
+ if (param->depth > 10) {
+ // Too many nested fault! we must messed up something
+ // XXX should we failed silently?
+ spin();
+ }
+
uint32_t errcode = param->execp->err_code;
ptr_t ptr = cpu_ldeaddr();
if (!ptr) {
// XXX do kernel trigger pfault?
- vm_regions_t* vmr = (vm_regions_t*)&__current->mm.regions;
+ volatile x86_pte_t* pte = &PTE_MOUNTED(VMS_SELF, ptr >> 12);
+
+ if (guardian_page(*pte)) {
+ ERROR("memory region over-running");
+ goto segv_term;
+ }
+
+ vm_regions_t* vmr = vmregions(__current);
struct mm_region* hit_region = region_get(vmr, ptr);
if (!hit_region) {
goto segv_term;
}
- volatile x86_pte_t* pte = &PTE_MOUNTED(VMS_SELF, ptr >> 12);
if (PG_IS_PRESENT(*pte)) {
if (((errcode ^ mapping.flags) & PG_ALLOW_USER)) {
// invalid access
// normal page fault, do COW
cpu_flush_page((ptr_t)pte);
- ptr_t pa = (ptr_t)vmm_dup_page(__current->pid, PG_ENTRY_ADDR(*pte));
+ ptr_t pa = (ptr_t)vmm_dup_page(PG_ENTRY_ADDR(*pte));
- pmm_free_page(__current->pid, *pte & ~0xFFF);
+ pmm_free_page(*pte & ~0xFFF);
*pte = (*pte & 0xFFF & ~PG_DIRTY) | pa | PG_WRITE;
goto resolved;
if (!PG_IS_PRESENT(*pte)) {
cpu_flush_page((ptr_t)pte);
- ptr_t pa = pmm_alloc_page(__current->pid, 0);
+ ptr_t pa = pmm_alloc_page(0);
if (!pa) {
goto oom;
}
- *pte = pa | get_ptattr(hit_region);
+ *pte = pa | region_ptattr(hit_region);
memset((void*)PG_ALIGN(ptr), 0, PG_SIZE);
goto resolved;
}
u32_t mseg_off = (ptr - hit_region->start);
u32_t mfile_off = mseg_off + hit_region->foff;
- ptr_t pa = pmm_alloc_page(__current->pid, 0);
+ ptr_t pa = pmm_alloc_page(0);
if (!pa) {
goto oom;
}
cpu_flush_page((ptr_t)pte);
- *pte = pa | get_ptattr(hit_region);
+ *pte = pa | region_ptattr(hit_region);
memset((void*)ptr, 0, PG_SIZE);
param->execp->eip,
param->execp->err_code);
- sigset_add(__current->sigctx.sig_pending, _SIGSEGV);
-
trace_printstack_isr(param);
+ if (kernel_context(param)) {
+ ERROR("[page fault on kernel]");
+ // halt kernel if segv comes from kernel space
+ spin();
+ }
+
+ thread_setsignal(current_thread, _SIGSEGV);
+
schedule();
// should not reach
while (1)
--- /dev/null
+#include <lunaix/process.h>
+#include <lunaix/pcontext.h>
+#include <lunaix/mm/vmm.h>
+#include <klibc/string.h>
+
+#include <sys/mm/mempart.h>
+#include <sys/abi.h>
+
+volatile struct x86_tss _tss = { .link = 0,
+ .esp0 = 0,
+ .ss0 = KDATA_SEG };
+
+bool
+inject_transfer_context(ptr_t vm_mnt, struct transfer_context* tctx)
+{
+ v_mapping mapping;
+ if (!vmm_lookupat(vm_mnt, tctx->inject, &mapping)) {
+ return false;
+ }
+
+ vmm_mount_pg(PG_MOUNT_4, mapping.pa);
+
+ ptr_t mount_inject = PG_MOUNT_4 + PG_OFFSET(tctx->inject);
+ memcpy((void*)mount_inject, &tctx->transfer, sizeof(tctx->transfer));
+
+ vmm_unmount_pg(PG_MOUNT_4);
+ return true;
+}
+
+void
+thread_setup_trasnfer(struct transfer_context* tctx,
+ ptr_t kstack_tp, ptr_t ustack_pt,
+ ptr_t entry, bool to_user)
+{
+ ptr_t offset = (ptr_t)&tctx->transfer.eret - (ptr_t)&tctx->transfer;
+ tctx->inject = align_stack(kstack_tp - sizeof(tctx->transfer));
+
+ tctx->transfer.isr = (isr_param){
+ .registers = {
+ .ds = KDATA_SEG,
+ .es = KDATA_SEG,
+ .fs = KDATA_SEG,
+ .gs = KDATA_SEG
+ },
+ .execp = (struct exec_param*)(tctx->inject + offset)
+ };
+
+ int code_seg = KCODE_SEG, data_seg = KDATA_SEG;
+ int mstate = cpu_ldstate();
+ if (to_user) {
+ code_seg = UCODE_SEG, data_seg = UDATA_SEG;
+ mstate |= 0x200; // enable interrupt
+ }
+
+ tctx->transfer.eret = (struct exec_param) {
+ .cs = code_seg, .eip = entry,
+ .ss = data_seg, .esp = align_stack(ustack_pt),
+ .eflags = mstate
+ };
+}
\ No newline at end of file
+++ /dev/null
-#include <lunaix/process.h>
-
-#include <sys/mm/mempart.h>
-#include <sys/x86_isa.h>
-
-volatile struct x86_tss _tss = { .link = 0,
- .esp0 = KERNEL_STACK_END,
- .ss0 = KDATA_SEG };
-
-void
-proc_init_transfer(struct proc_info* proc,
- ptr_t stack_top,
- ptr_t target,
- int flags)
-{
- struct exec_param* execp =
- (struct exec_param*)(stack_top - sizeof(struct exec_param));
- isr_param* isrp = (isr_param*)((ptr_t)execp - sizeof(isr_param));
-
- *execp = (struct exec_param){
- .cs = KCODE_SEG, .ss = KDATA_SEG, .eip = target, .eflags = cpu_ldstate()
- };
-
- *isrp = (isr_param){ .registers = { .ds = KDATA_SEG,
- .es = KDATA_SEG,
- .fs = KDATA_SEG,
- .gs = KDATA_SEG },
- .execp = execp };
-
- if ((flags & TRANSFER_IE)) {
- execp->eflags |= 0x200;
- }
-
- proc->intr_ctx = isrp;
-}
\ No newline at end of file
.long __lxsys_execve
.long __lxsys_fstat /* 55 */
.long __lxsys_pollctl
+ .long __lxsys_th_create
+ .long __lxsys_th_self
+ .long __lxsys_th_exit
+ .long __lxsys_th_join /* 60 */
+ .long __lxsys_th_kill
+ .long __lxsys_th_detach
+ .long __lxsys_th_sigmask
2:
.rept __SYSCALL_MAX - (2b - 1b)/4
.long 0
.endr
-.global syscall_hndlr
.section .text
+ .type syscall_hndlr, @function
+ .global syscall_hndlr
syscall_hndlr:
pushl %ebp
- movl 8(%esp), %ebp // isr_param*
+ movl %esp, %ebp
+ movl 8(%esp), %ebx // isr_param*
- addl $4, %ebp
- movl (%ebp), %eax /* eax: call code as well as the return value from syscall */
+ addl $4, %ebx
+ movl (%ebx), %eax /* eax: call code as well as the return value from syscall */
cmpl $__SYSCALL_MAX, %eax
jae 2f
jne 1f
2:
neg %eax
+ movl %ebp, %esp
popl %ebp
ret
+
1:
- pushl 24(%ebp) /* esi - #5 arg */
- pushl 16(%ebp) /* edi - #4 arg */
- pushl 12(%ebp) /* edx - #3 arg */
- pushl 8(%ebp) /* ecx - #2 arg */
- pushl 4(%ebp) /* ebx - #1 arg */
+ pushl %ebx
+ pushl 24(%ebx) /* esi - #5 arg */
+ pushl 16(%ebx) /* edi - #4 arg */
+ pushl 12(%ebx) /* edx - #3 arg */
+ pushl 8(%ebx) /* ecx - #2 arg */
+ pushl 4(%ebx) /* ebx - #1 arg */
call *(%eax)
- movl %eax, (%ebp) /* save the return value */
-
addl $20, %esp /* remove the parameters from stack */
-
+
+ popl %ebx
+ movl %eax, (%ebx) /* save the return value */
+
+ movl %ebp, %esp
popl %ebp
ret
\ No newline at end of file
if (!clbp) {
// 每页最多4个命令队列
- clb_pa = pmm_alloc_page(KERNEL_PID, PP_FGLOCKED);
+ clb_pa = pmm_alloc_page(PP_FGLOCKED);
clb_pg_addr = (ptr_t)ioremap(clb_pa, 0x1000);
memset((void*)clb_pg_addr, 0, 0x1000);
}
if (!fisp) {
// 每页最多16个FIS
- fis_pa = pmm_alloc_page(KERNEL_PID, PP_FGLOCKED);
+ fis_pa = pmm_alloc_page(PP_FGLOCKED);
fis_pg_addr = (ptr_t)ioremap(fis_pa, 0x1000);
memset((void*)fis_pg_addr, 0, 0x1000);
}
#include <lunaix/keyboard.h>
#include <lunaix/syslog.h>
#include <lunaix/timer.h>
+#include <lunaix/pcontext.h>
#include <hal/intc.h>
#include <klibc/string.h>
#include <sys/cpu.h>
-#include <sys/interrupts.h>
#include <sys/port_io.h>
#define PS2_PORT_ENC_DATA 0x60
char result;
- cpu_disable_interrupt();
-
// 1、禁用任何的PS/2设备
ps2_post_cmd(PS2_PORT_CTRL_CMDREG, PS2_CMD_PORT1_DISABLE, PS2_NO_ARG);
ps2_post_cmd(PS2_PORT_CTRL_CMDREG, PS2_CMD_PORT2_DISABLE, PS2_NO_ARG);
*/
isrm_bindirq(PC_AT_IRQ_KBD, intr_ps2_kbd_handler);
- cpu_enable_interrupt();
return 0;
done:
#include <lunaix/device.h>
#include <lunaix/mm/page.h>
+#include <klibc/string.h>
+
static inline void
rng_fill(void* data, size_t len)
{
static int
__rand_rd(struct device* dev, void* buf, size_t offset, size_t len)
{
- rng_fill(buf, len);
+ if (unlikely(len < 4)) {
+ int tmp_buf = 0;
+ rng_fill(&tmp_buf, 4);
+ memcpy(buf, &tmp_buf, len);
+ } else {
+ rng_fill(buf, len);
+ }
return len;
}
int
pdev_randdev_init(struct device_def* devdef)
{
+ // FIXME add check on cpuid for presence of rdrand
struct device* devrand = device_allocseq(NULL, NULL);
devrand->ops.read = __rand_rd;
devrand->ops.read_page = __rand_rd_pg;
#include <lunaix/isrm.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/status.h>
+#include <lunaix/pcontext.h>
#include <hal/rtc/mc146818a.h>
#include <klibc/string.h>
-#include <sys/interrupts.h>
#include <sys/port_io.h>
#define RTC_INDEX_PORT 0x70
while (sz <= min && dt <= expr) {
// XXX should we held the device lock while we are waiting?
- sched_yieldk();
+ sched_pass();
dt = clock_systime() - t;
t += dt;
#define compact __attribute__((packed))
#define align(v) __attribute__((aligned (v)))
-#define export_symbol(domain, symbol)\
- typeof(symbol)* must_emit __SYMEXPORT_Z##domain##_##symbol = &(symbol)
+#define export_symbol(domain, namespace, symbol)\
+ typeof(symbol)* must_emit __SYMEXPORT_Z##domain##_N##namespace##_S##symbol = &(symbol)
inline static void noret
spin()
*/
#define list_entry(ptr, type, member) container_of(ptr, type, member)
+/**
+ * list_next - get the struct for next entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_next(current, type, member) container_of(current->member.next, type, member)
+
+/**
+ * list_prev - get the struct for prev entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_prev(current, type, member) container_of(current->member.prev, type, member)
+
/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop counter.
#define __LUNAIX_CODVAR_H
#include <lunaix/ds/llist.h>
-#include <lunaix/sched.h>
typedef struct waitq
{
return llist_empty(&waitq->waiters);
}
+static inline void
+waitq_cancel_wait(waitq_t* waitq)
+{
+ llist_delete(&waitq->waiters);
+}
+
void
pwait(waitq_t* queue);
void
pwake_all(waitq_t* queue);
-#define wait_if(cond) \
- while ((cond)) { \
- sched_yieldk(); \
- }
-
#endif /* __LUNAIX_CODVAR_H */
elf32_open(struct elf32* elf, const char* path);
int
-elf32_openat(struct elf32* elf, const void* elf_vfile);
+elf32_openat(struct elf32* elf, void* elf_vfile);
int
elf32_static_linked(const struct elf32* elf);
char** argv;
int envc;
char** envp;
-} PACKED;
+} compact;
#ifndef __USR_WRAPPER__
int
exec_kexecve(const char* filename, const char* argv[], const char* envp[]);
+void
+exec_init_container(struct exec_container* param,
+ struct thread* thread,
+ ptr_t vms,
+ const char** argv,
+ const char** envp);
+
#endif
#endif /* __LUNAIX_LOADER_H */
#include <usr/lunaix/poll.h>
-struct v_fd; // <lunaix/fs.h>
+struct thread; // <lunaix/process.h>
+struct proc_info; // <lunaix/process.h>
+struct v_fd; // <lunaix/fs.h>
typedef struct llist_header poll_evt_q;
{
poll_evt_q evt_listener;
struct v_file* file_ref;
- pid_t pid;
+ struct thread* thread;
};
struct iopoll
iopoll_init(struct iopoll*);
void
-iopoll_free(pid_t, struct iopoll*);
+iopoll_free(struct proc_info*);
int
-iopoll_install(pid_t, struct iopoll*, struct v_fd*);
+iopoll_install(struct thread* thread, struct v_fd* fd);
int
-iopoll_remove(pid_t, struct iopoll*, int);
+iopoll_remove(struct thread*, int);
static inline void
poll_setrevt(struct poll_info* pinfo, int evt)
#define __LUNAIX_ISRM_H
#include <lunaix/types.h>
-#include <sys/interrupts.h>
+#include <lunaix/pcontext.h>
typedef void (*isr_cb)(const isr_param*);
--- /dev/null
+#ifndef __LUNAIX_KPREEMPT_H
+#define __LUNAIX_KPREEMPT_H
+
+#include <sys/abi.h>
+
+#define _preemptible __attribute__((section(".kf.preempt")))
+
+#define ensure_preempt_caller() \
+ do { \
+ extern int __kf_preempt_start[]; \
+ extern int __kf_preempt_end[]; \
+ ptr_t _retaddr = abi_get_retaddr(); \
+ assert_msg((ptr_t)__kf_preempt_start <= _retaddr \
+ && _retaddr < (ptr_t)__kf_preempt_end, \
+ "caller must be kernel preemptible"); \
+ } while(0)
+
+#endif /* __LUNAIX_KPREEMPT_H */
#define __LUNAIX_CAKE_H
#include <lunaix/ds/llist.h>
+#include <lunaix/spike.h>
#define PILE_NAME_MAXLEN 20
void
cake_ctor_zeroing(struct cake_pile* pile, void* piece);
+#define DEADCAKE_MARK 0xdeadcafeUL
+
+static inline void
+cake_ensure_valid(void* area) {
+ if (unlikely(*(unsigned int*)area == DEADCAKE_MARK)) {
+ fail("access to freed cake piece");
+ }
+}
+
#endif /* __LUNAIX_VALLOC_H */
#ifndef __LUNAIX_MM_H
#define __LUNAIX_MM_H
-#include <lunaix/ds/llist.h>
-#include <lunaix/ds/mutex.h>
-#include <lunaix/fs.h>
#include <lunaix/types.h>
#include <usr/lunaix/mann_flags.h>
#define REGION_EXEC PROT_EXEC
#define REGION_ANON MAP_ANON
#define REGION_RW REGION_READ | REGION_WRITE
+#define REGION_KERNEL (1 << 31)
#define REGION_TYPE_CODE (1 << 16)
#define REGION_TYPE_GENERAL (2 << 16)
#define REGION_TYPE_STACK (4 << 16)
#define REGION_TYPE_VARS (5 << 16)
-struct proc_mm;
-
-struct mm_region
-{
- struct llist_header head; // must be first field!
- struct proc_mm* proc_vms;
-
- // file mapped to this region
- struct v_file* mfile;
- // mapped file offset
- off_t foff;
- // mapped file length
- u32_t flen; // XXX it seems that we don't need this actually..
-
- ptr_t start;
- ptr_t end;
- u32_t attr;
-
- void** index; // fast reference, to accelerate access to this very region.
-
- void* data;
- // when a region is copied
- void (*region_copied)(struct mm_region*);
- // when a region is unmapped
- void (*destruct_region)(struct mm_region*);
-};
-
-static inline void
-mm_index(void** index, struct mm_region* target)
-{
- *index = (void*)target;
- target->index = index;
-}
-
-typedef struct llist_header vm_regions_t;
-
-struct proc_mm
-{
- vm_regions_t regions;
- struct mm_region* heap;
- struct mm_region* stack;
- pid_t pid;
-};
-
#endif /* __LUNAIX_MM_H */
struct proc_mm* pvms; // process vm
off_t offset; // mapped file offset
size_t mlen; // mapped memory length
- size_t flen; // mapped file length
u32_t proct; // protections
u32_t flags; // other options
u32_t type; // region type
+ ptr_t range_start;
+ ptr_t range_end;
};
int
struct mm_region* region,
ptr_t newend);
+int
+mmap_user(void** addr_out,
+ struct mm_region** created,
+ ptr_t addr,
+ struct v_file* file,
+ struct mmap_param* param);
+
int
mem_map(void** addr_out,
struct mm_region** created,
struct pp_struct
{
- pid_t owner;
u32_t ref_counts;
pp_attr_t attr;
};
* @param ppn
*/
void
-pmm_mark_page_occupied(pid_t owner, ptr_t ppn, pp_attr_t attr);
+pmm_mark_page_occupied(ptr_t ppn, pp_attr_t attr);
/**
* @brief 标注多个连续的物理页为可用
* @param page_count 数量
*/
void
-pmm_mark_chunk_occupied(pid_t owner,
- u32_t start_ppn,
+pmm_mark_chunk_occupied(u32_t start_ppn,
size_t page_count,
pp_attr_t attr);
* @return void* 可用的页地址,否则为 NULL
*/
ptr_t
-pmm_alloc_page(pid_t owner, pp_attr_t attr);
+pmm_alloc_page(pp_attr_t attr);
/**
* @brief 分配一个连续的物理内存区域
* @return ptr_t
*/
ptr_t
-pmm_alloc_cpage(pid_t owner, size_t num_pages, pp_attr_t attr);
+pmm_alloc_cpage(size_t num_pages, pp_attr_t attr);
/**
* @brief 初始化物理内存管理器
* @return 是否成功
*/
int
-pmm_free_page(pid_t owner, ptr_t page);
+pmm_free_page(ptr_t page);
int
-pmm_ref_page(pid_t owner, ptr_t page);
+pmm_ref_page(ptr_t page);
#endif /* __LUNAIX_PMM_H */
--- /dev/null
+#ifndef __LUNAIX_PROCVM_H
+#define __LUNAIX_PROCVM_H
+
+#include <lunaix/ds/llist.h>
+#include <lunaix/ds/mutex.h>
+#include <lunaix/fs.h>
+#include <lunaix/types.h>
+
+struct proc_mm;
+struct proc_info;
+
+struct mm_region
+{
+ struct llist_header head; // must be first field!
+ struct proc_mm* proc_vms;
+
+ // file mapped to this region
+ struct v_file* mfile;
+ // mapped file offset
+ off_t foff;
+ // mapped file length
+ u32_t flen; // XXX it seems that we don't need this actually..
+
+ ptr_t start;
+ ptr_t end;
+ u32_t attr;
+
+ void** index; // fast reference, to accelerate access to this very region.
+
+ void* data;
+ // when a region is copied
+ void (*region_copied)(struct mm_region*);
+ // when a region is unmapped
+ void (*destruct_region)(struct mm_region*);
+};
+
+struct remote_vmctx
+{
+ ptr_t vms_mnt;
+ ptr_t local_mnt;
+ ptr_t remote;
+ size_t page_cnt;
+};
+
+
+static inline void
+mm_index(void** index, struct mm_region* target)
+{
+ *index = (void*)target;
+ target->index = index;
+}
+
+typedef struct llist_header vm_regions_t;
+
+struct proc_mm
+{
+ // virtual memory root (i.e. root page table)
+ ptr_t vmroot;
+ vm_regions_t regions;
+ struct mm_region* heap;
+ struct proc_info* proc;
+};
+
+/**
+ * @brief Create a process virtual memory space descriptor
+ *
+ * @param proc
+ * @return struct proc_mm*
+ */
+struct proc_mm*
+procvm_create(struct proc_info* proc);
+
+/**
+ * @brief Initialize the vm of `proc` to duplication of current process
+ *
+ * @param proc
+ * @return struct proc_mm*
+ */
+void
+procvm_dup(struct proc_info* proc);
+
+void
+procvm_cleanup(ptr_t vm_mnt, struct proc_info* proc);
+
+
+/**
+ * @brief Initialize the vm of `proc` as a clean slate which contains
+ * nothing but shared global mapping of kernel image.
+ *
+ * @param proc
+ */
+void
+procvm_init_clean(struct proc_info* proc);
+
+
+/*
+ remote virtual memory manipulation
+*/
+
+#define REMOTEVM_MAX_PAGES 128
+
+ptr_t
+procvm_enter_remote_transaction(struct remote_vmctx* rvmctx, struct proc_mm* mm,
+ ptr_t vm_mnt, ptr_t remote_base, size_t size);
+
+int
+procvm_copy_remote(struct remote_vmctx* rvmctx,
+ ptr_t remote_dest, void* local_src, size_t sz);
+
+void
+procvm_exit_remote_transaction(struct remote_vmctx* rvmctx);
+
+#endif /* __LUNAIX_PROCVM_H */
#define __LUNAIX_REGION_H
#include <lunaix/mm/mm.h>
+#include <lunaix/mm/page.h>
+#include <lunaix/mm/procvm.h>
+
+#define prev_region(vmr) list_prev(vmr, struct mm_region, head)
+#define next_region(vmr) list_next(vmr, struct mm_region, head)
+#define get_region(vmr_el) list_entry(vmr_el, struct mm_region, head)
+
+static inline int
+stack_region(struct mm_region* region) {
+ return region->attr & REGION_TYPE_STACK;
+}
+
+static inline int
+same_region(struct mm_region* a, struct mm_region* b) {
+ return a->start == b->start \
+ && a->end == b->end \
+ && a->attr == b->attr;
+}
+
+static inline bool
+region_contains(struct mm_region* mm, ptr_t va) {
+ return mm->start <= va && va < mm->end;
+}
+
+static inline size_t
+region_size(struct mm_region* mm) {
+ return mm->end - mm->start;
+}
+
struct mm_region*
region_create(ptr_t start, ptr_t end, u32_t attr);
struct mm_region*
region_dup(struct mm_region* origin);
+static u32_t
+region_ptattr(struct mm_region* vmr)
+{
+ u32_t vmr_attr = vmr->attr;
+ u32_t ptattr = PG_PRESENT | PG_ALLOW_USER;
+
+ if ((vmr_attr & PROT_WRITE)) {
+ ptattr |= PG_WRITE;
+ }
+
+ return ptattr & 0xfff;
+}
+
#endif /* __LUNAIX_REGION_H */
#ifndef __LUNAIX_VALLOC_H
#define __LUNAIX_VALLOC_H
+#include <lunaix/compiler.h>
+
void*
valloc(unsigned int size);
void
valloc_init();
+extern void
+valloc_ensure_valid(void* ptr);
+
#endif /* __LUNAIX_VALLOC_H */
*/
#define VMAP_NOMAP 2
+/**
+ * @brief 映射页墙:将虚拟地址映射为页墙,忽略给定的物理地址和页属性
+ *
+ */
+#define VMAP_GUARDPAGE 4
+
/**
* @brief 规定下一个可用页映射应当限定在指定的4MB地址空间内
*
*
*/
ptr_t
-vmm_dup_page(pid_t pid, ptr_t pa);
-
-ptr_t
-vmm_dup_vmspace(pid_t pid);
+vmm_dup_page(ptr_t pa);
/**
* @brief 挂载另一个虚拟地址空间至当前虚拟地址空间
ptr_t
vmm_unmount_pd(ptr_t mnt);
+static inline ptr_t
+vmm_mount_pg(ptr_t mnt, ptr_t pa) {
+ assert(pa);
+ vmm_set_mapping(VMS_SELF, mnt, pa, PG_PREM_RW, 0);
+ return mnt;
+}
+
+static inline ptr_t
+vmm_unmount_pg(ptr_t mnt) {
+ vmm_del_mapping(VMS_SELF, mnt);
+ return mnt;
+}
+
void*
vmm_ioremap(ptr_t paddr, size_t size);
--- /dev/null
+#ifndef __LUNAIX_CONTEXT_H
+#define __LUNAIX_CONTEXT_H
+
+struct exec_param;
+struct regcontext;
+struct pcontext;
+typedef struct pcontext isr_param;
+
+#include <lunaix/compiler.h>
+#include <sys/interrupts.h>
+
+struct transfer_context
+{
+ ptr_t inject;
+ struct {
+ struct pcontext isr;
+ struct exec_param eret;
+ } compact transfer;
+};
+
+bool
+inject_transfer_context(ptr_t vm_mnt, struct transfer_context* tctx);
+
+void
+thread_setup_trasnfer(struct transfer_context* tctx,
+ ptr_t kstack_tp, ptr_t ustack_pt,
+ ptr_t entry, bool to_user);
+
+static inline void
+thread_create_user_transfer(struct transfer_context* tctx,
+ ptr_t kstack_tp, ptr_t ustack_pt,
+ ptr_t entry)
+{
+ thread_setup_trasnfer(tctx, kstack_tp, ustack_pt, entry, true);
+}
+
+static inline void
+thread_create_kernel_transfer(struct transfer_context* tctx,
+ ptr_t kstack_tp, ptr_t entry)
+{
+ thread_setup_trasnfer(tctx, kstack_tp, 0, entry, false);
+}
+
+#endif /* __LUNAIX_CONTEXT_H */
#include <lunaix/fs.h>
#include <lunaix/iopoll.h>
#include <lunaix/mm/mm.h>
+#include <lunaix/mm/page.h>
#include <lunaix/mm/region.h>
#include <lunaix/signal.h>
#include <lunaix/timer.h>
#include <lunaix/types.h>
+#include <lunaix/spike.h>
+#include <lunaix/pcontext.h>
#include <stdint.h>
-#include <sys/interrupts.h>
-// 虽然内核不是进程,但为了区分,这里使用Pid=-1来指代内核。这主要是方便物理页所有权检查。
-#define KERNEL_PID -1
/*
- |C|Bk|De|Tn|Pu|Rn|
- \----/
- Dt
+ |C|Sp|Bk|De|Tn|Pu|Rn|
+ \----/
+ Dt
Group Dt: whether this process is terminated.
De: Destoryed
Pu: Paused
Bk: Blocked
+ Sp: Stopped
C : Created
*/
#define PS_DESTROY 4
#define PS_PAUSED 8
#define PS_BLOCKED 16
-#define PS_CREATED 32
+#define PS_STOPPED 32
+#define PS_CREATED 64
-#define PS_GrBP (PS_PAUSED | PS_BLOCKED)
+#define PS_GrBP (PS_PAUSED | PS_BLOCKED | PS_STOPPED)
#define PS_GrDT (PS_TERMNAT | PS_DESTROY)
+#define PS_Rn (PS_RUNNING | PS_CREATED)
#define proc_terminated(proc) (((proc)->state) & PS_GrDT)
#define proc_hanged(proc) (((proc)->state) & PS_BLOCKED)
-#define proc_runnable(proc) (((proc)->state) & PS_PAUSED)
+#define proc_runnable(proc) (!(proc)->state || !(((proc)->state) & ~PS_Rn))
-struct sigact
-{
- struct sigact* prev;
- sigset_t sa_mask;
- void* sa_actor;
- void* sa_handler;
- pid_t sender;
-};
-struct sighail
-{
- sigset_t sig_pending;
- sigset_t sig_mask;
- struct sigact* inprogress;
- struct sigact signals[_SIG_NUM];
-};
+#define TH_DETACHED 0b0001
+
+#define thread_detached(th) ((th)->flags & TH_DETACHED)
+#define detach_thread(th) ((th)->flags |= TH_DETACHED)
struct proc_sig
{
isr_param* saved_ictx;
} __attribute__((packed));
-struct proc_info
+
+struct proc_info;
+
+struct haybed {
+ struct llist_header sleepers;
+ time_t wakeup_time;
+ time_t alarm_time;
+};
+
+struct thread
{
/*
Any change to *critical section*, including layout, size
must be reflected in arch/i386/interrupt.S.inc to avoid
disaster!
*/
+ struct
+ {
+ isr_param* intr_ctx;
+ ptr_t ustack_top;
+ }; // *critical section
+
+ struct {
+ tid_t tid;
+ time_t created;
+ int state;
+ int syscall_ret;
+ ptr_t exit_val;
+ int flags;
+ };
+
+ struct {
+ ptr_t kstack; // process local kernel stack
+ struct mm_region* ustack; // process local user stack (NULL for kernel thread)
+ };
+
+ struct haybed sleep;
+
+ struct proc_info* process;
+ struct llist_header proc_sibs; // sibling to process-local threads
+ struct llist_header sched_sibs; // sibling to scheduler (global) threads
+ struct sigctx sigctx;
+ waitq_t waitqueue;
+};
- /* ---- critical section start ---- */
-
- pid_t pid;
- struct proc_info* parent;
- isr_param* intr_ctx;
- ptr_t ustack_top;
- ptr_t page_table;
+struct proc_info
+{
+ // active thread, must be at the very beginning
+ struct thread* th_active;
- /* ---- critical section end ---- */
+ struct llist_header threads;
+ int thread_count;
struct llist_header tasks;
+
struct llist_header siblings;
struct llist_header children;
struct llist_header grp_member;
- waitq_t waitqueue;
- struct
- {
- struct llist_header sleepers;
- time_t wakeup_time;
- time_t alarm_time;
- } sleep;
-
- struct proc_mm mm;
- time_t created;
- u8_t state;
- int32_t exit_code;
- int32_t k_status;
- struct sighail sigctx;
+ struct {
+ struct proc_info* parent;
+ pid_t pid;
+ pid_t pgid;
+ time_t created;
+
+ int state;
+ int exit_code;
+ };
+
+ struct proc_mm* mm;
+ struct sigregister* sigreg;
struct v_fdtable* fdtable;
struct v_dnode* cwd;
- pid_t pgid;
+ struct {
+ char* cmd;
+ size_t cmd_len;
+ };
struct iopoll pollctx;
};
extern volatile struct proc_info* __current;
+extern volatile struct thread* current_thread;
+
+/**
+ * @brief Check if current process belong to kernel itself
+ * (pid=0)
+ */
+#define kernel_process(proc) (!(proc)->pid)
+
+#define resume_thread(th) (th)->state = PS_READY
+#define pause_thread(th) (th)->state = PS_PAUSED
+#define block_thread(th) (th)->state = PS_BLOCKED
-#define resume_process(proc) (proc)->state = PS_READY
-#define pause_process(proc) (proc)->state = PS_PAUSED
-#define block_process(proc) (proc)->state = PS_BLOCKED
+static inline void must_inline
+set_current_executing(struct thread* thread)
+{
+ current_thread = thread;
+ __current = thread->process;
+}
+
+static inline struct proc_mm*
+vmspace(struct proc_info* proc)
+{
+ return proc->mm;
+}
+
+static inline ptr_t
+vmroot(struct proc_info* proc)
+{
+ return proc->mm->vmroot;
+}
+
+static inline vm_regions_t*
+vmregions(struct proc_info* proc)
+{
+ return &proc->mm->regions;
+}
static inline void
-block_current()
+block_current_thread()
{
- block_process(__current);
+ block_thread(current_thread);
}
static inline void
-pause_current()
+pause_current_thread()
{
- pause_process(__current);
+ pause_thread(current_thread);
}
static inline void
-resume_current()
+resume_current_thread()
{
- resume_process(__current);
+ resume_thread(current_thread);
+}
+
+static inline int syscall_result(int retval) {
+ return (current_thread->syscall_ret = retval);
}
+/**
+ * @brief Spawn a process with arbitary entry point.
+ * The inherit priviledge level is deduced automatically
+ * from the given entry point
+ *
+ * @param created returned created main thread
+ * @param entry entry point
+ * @param with_ustack whether to pre-allocate a user stack with it
+ * @return int
+ */
+int
+spawn_process(struct thread** created, ptr_t entry, bool with_ustack);
+
+/**
+ * @brief Spawn a process that housing a given executable image as well as
+ * program argument and environment setting
+ *
+ * @param created returned created main thread
+ * @param path file system path to executable
+ * @param argv arguments passed to executable
+ * @param envp environment variables passed to executable
+ * @return int
+ */
+int
+spawn_process_usr(struct thread** created, char* path,
+ const char** argv, const char** envp);
+
/**
* @brief 分配并初始化一个进程控制块
*
pid_t
destroy_process(pid_t pid);
-void
-copy_kernel_stack(struct proc_info* proc, ptr_t kstack_from);
+void
+delete_process(struct proc_info* proc);
/**
* @brief 复制当前进程(LunaixOS的类 fork (unix) 实现)
*
*/
void
-terminate_proc(int exit_code);
+terminate_current(int exit_code);
+
+void
+terminate_proccess(struct proc_info* proc, int exit_code);
int
orphaned_proc(pid_t pid);
struct proc_info*
get_process(pid_t pid);
+/*
+ ========= Thread =========
+*/
+
void
-proc_setsignal(struct proc_info* proc, int signum);
+commit_thread(struct thread* thread);
+
+struct thread*
+alloc_thread(struct proc_info* process);
void
-proc_clear_signal(struct proc_info* proc);
+destory_thread(ptr_t vm_mnt, struct thread* thread);
-// enable interrupt upon transfer
-#define TRANSFER_IE 1
+void
+terminate_thread(struct thread* thread, ptr_t val);
+
+void
+terminate_current_thread(ptr_t val);
+
+struct thread*
+create_thread(struct proc_info* proc, ptr_t vm_mnt, bool with_ustack);
-/**
- * @brief Setup process initial context, used to initiate first switch
- *
- * @param proc
- * @param stop
- * @param target
- * @param flags
- */
void
-proc_init_transfer(struct proc_info* proc, ptr_t stop, ptr_t target, int flags);
+start_thread(struct thread* th, ptr_t vm_mnt, ptr_t entry);
+
+static inline void
+spawn_kthread(ptr_t entry) {
+ assert(kernel_process(__current));
+
+ struct thread* th = create_thread(__current, VMS_SELF, false);
+
+ assert(th);
+ start_thread(th, VMS_SELF, entry);
+}
+
+void
+exit_thread(void* val);
+
+void
+thread_release_mem(struct thread* thread, ptr_t vm_mnt);
+
+/*
+ ========= Signal =========
+*/
+
+#define pending_sigs(thread) ((thread)->sigctx.sig_pending)
+#define raise_signal(thread, sig) sigset_add(pending_sigs(thread), sig)
+#define sigact_of(proc, sig) ((proc)->sigreg->signals[(sig)])
+#define set_sigact(proc, sig, sigact) ((proc)->sigreg->signals[(sig)] = (sigact))
+
+static inline struct sigact*
+active_signal(struct thread* thread) {
+ struct sigctx* sigctx = &thread->sigctx;
+ struct sigregister* sigreg = thread->process->sigreg;
+ return sigreg->signals[sigctx->sig_active];
+}
+
+static inline void
+sigactive_push(struct thread* thread, int active_sig) {
+ struct sigctx* sigctx = &thread->sigctx;
+ int prev_active = sigctx->sig_active;
+
+ assert(sigact_of(thread->process, active_sig));
+
+ sigctx->sig_order[active_sig] = prev_active;
+ sigctx->sig_active = active_sig;
+}
+
+static inline void
+sigactive_pop(struct thread* thread) {
+ struct sigctx* sigctx = &thread->sigctx;
+ int active_sig = sigctx->sig_active;
+
+ sigctx->sig_active = sigctx->sig_order[active_sig];
+ sigctx->sig_order[active_sig] = active_sig;
+}
+
+void
+proc_setsignal(struct proc_info* proc, signum_t signum);
+
+void
+thread_setsignal(struct thread* thread, signum_t signum);
+
#endif /* __LUNAIX_PROCESS_H */
#ifndef __LUNAIX_SCHEDULER_H
#define __LUNAIX_SCHEDULER_H
+#include <lunaix/compiler.h>
+#include <lunaix/process.h>
+
#define SCHED_TIME_SLICE 300
+#define MAX_THREAD_PP 1024
#define PROC_TABLE_SIZE 8192
#define MAX_PROCESS (PROC_TABLE_SIZE / sizeof(ptr_t))
struct scheduler
{
- struct proc_info** _procs;
+ struct proc_info** procs;
+ struct llist_header* threads;
+ struct llist_header* proc_list;
+ struct llist_header sleepers;
+
int procs_index;
int ptable_len;
+ int ttable_len;
};
void
sched_init();
-void
+void noret
schedule();
void
-sched_yieldk();
+sched_pass();
+
+void noret
+run(struct thread* thread);
+
+void
+cleanup_detached_threads();
#endif /* __LUNAIX_SCHEDULER_H */
#define sigset_union(set, set2) ((set) = (set) | (set2))
#define sigset_intersect(set, set2) ((set) = (set) & (set2))
+struct sigact
+{
+ sigset_t sa_mask;
+ void* sa_actor;
+ void* sa_handler;
+ pid_t sender;
+};
+
+struct sigregister {
+ struct sigact* signals[_SIG_NUM];
+};
+
+struct sigctx
+{
+ sigset_t sig_pending;
+ sigset_t sig_mask;
+ signum_t sig_active;
+ signum_t sig_order[_SIG_NUM];
+};
+
int
-signal_send(pid_t pid, int signum);
+signal_send(pid_t pid, signum_t signum);
+
+void
+signal_dup_context(struct sigctx* dest_ctx);
+
+void
+signal_dup_registers(struct sigregister* dest_reg);
+
+void
+signal_reset_context(struct sigctx* sigctx);
+
+void
+signal_reset_register(struct sigregister* sigreg);
+
+void
+signal_free_registers(struct sigregister* sigreg);
#endif /* __LUNAIX_SIGNAL_H */
#ifndef __LUNAIXOS_NASSERT__
#define assert(cond) \
do { \
- if (!(cond)) { \
+ if (unlikely(!(cond))) { \
__assert_fail(#cond, __FILE__, __LINE__); \
} \
} while(0)
#define assert_msg(cond, msg) \
do { \
- if (!(cond)) { \
+ if (unlikely(!(cond))) { \
__assert_fail(msg, __FILE__, __LINE__); \
} \
} while(0)
#include <lunaix/process.h>
#include <lunaix/syscall.h>
-#define DO_STATUS(errno) SYSCALL_ESTATUS(__current->k_status = errno)
+#define DO_STATUS(errno) SYSCALL_ESTATUS(syscall_result(errno))
#define DO_STATUS_OR_RETURN(errno) ({ errno < 0 ? DO_STATUS(errno) : errno; })
#endif /* __LUNAIX_SYSCALL_UTILS_H */
#include <lunaix/ds/llist.h>
#include <lunaix/time.h>
-#include <sys/interrupts.h>
+#include <lunaix/pcontext.h>
#define SYS_TIMER_FREQUENCY_HZ 1000
#define __LUNAIX_TRACE_H
#include <lunaix/boot_generic.h>
-#include <sys/interrupts.h>
+#include <lunaix/pcontext.h>
struct ksym_entry
{
#ifndef __LUNAIX_GDBSTUB_H
#define __LUNAIX_GDBSTUB_H
-#include <sys/interrupts.h>
+#include <lunaix/pcontext.h>
void
gdbstub_loop(isr_param* param);
#ifndef __LUNAIX_LSDBG_H
#define __LUNAIX_LSDBG_H
-#include <sys/interrupts.h>
+#include <lunaix/pcontext.h>
#define SDBG_CLNT_HI 0x10
#define SDBG_CLNT_QUIT 0xff
#define PROT_READ (1 << 2)
#define PROT_WRITE (1 << 3)
#define PROT_EXEC (1 << 4)
+#define PROT_NONE 0
// identity mapped to region attributes
#define MAP_PRIVATE MAP_RSHARED
#define MAP_EXCLUSIVE 0x0
#define MAP_ANON (1 << 5)
+#define MAP_ANONYMOUS MAP_ANON
#define MAP_STACK 0 // no effect in Lunaix
// other MAP_* goes should beyond 0x20
#define SIGCHLD 2
#define SIGCLD SIGCHLD
-#define SIGSTOP 3
-#define SIGCONT 4
+#define SIGINT 3
+#define SIGSTOP 4
+#define SIGCONT 5
-#define SIGINT 5
#define SIGSEGV 6
#define SIGKILL 7
#define SIGTERM 8
+#define SIGILL 9
+#define SIGSYS 10
#define SIG_BLOCK 1
#define SIG_UNBLOCK 2
#define SIG_SETMASK 3
+typedef unsigned char signum_t;
typedef unsigned int sigset_t;
typedef void (*sighandler_t)(int);
#define ENOEXEC -27
#define E2BIG -28
#define ELIBBAD -29
+#define EAGAIN -30
+#define EDEADLK -31
#endif /* __LUNAIX_STATUS_H */
#define __SYSCALL_fstat 55
#define __SYSCALL_pollctl 56
+#define __SYSCALL_th_create 57
+#define __SYSCALL_th_self 58
+#define __SYSCALL_th_exit 59
+#define __SYSCALL_th_join 60
+#define __SYSCALL_th_kill 61
+#define __SYSCALL_th_detach 62
+#define __SYSCALL_th_sigmask 63
+
#define __SYSCALL_MAX 0x100
#endif /* __LUNAIX_SYSCALLID_H */
--- /dev/null
+#ifndef __LUNAIX_USR_THREADS_H
+#define __LUNAIX_USR_THREADS_H
+
+#include "types.h"
+
+struct uthread_info {
+ void* th_stack_top;
+ size_t th_stack_sz;
+};
+
+#endif /* __LUNAIX_USR_THREADS_H */
typedef signed long ssize_t;
typedef int pid_t;
+typedef int tid_t;
typedef __SIZE_TYPE__ size_t;
/* Reserve region for all loaded modules */
for (size_t i = 0; i < bhctx->mods.mods_num; i++) {
struct boot_modent* mod = &bhctx->mods.entries[i];
- pmm_mark_chunk_occupied(KERNEL_PID,
- PN(mod->start),
+ pmm_mark_chunk_occupied(PN(mod->start),
CEIL(mod->end - mod->start, PG_SIZE_BITS),
PP_FGLOCKED);
}
// clean up
for (size_t i = 0; i < (ptr_t)(&__kexec_boot_end); i += PG_SIZE) {
vmm_del_mapping(VMS_SELF, (ptr_t)i);
- pmm_free_page(KERNEL_PID, (ptr_t)i);
+ pmm_free_page((ptr_t)i);
}
}
#include <lunaix/syslog.h>
#include <lunaix/trace.h>
-#include <sys/cpu.h>
-#include <sys/mm/mempart.h>
+#include <sys/abi.h>
+#include <sys/mm/mm_defs.h>
#include <klibc/string.h>
}
static inline bool valid_fp(ptr_t ptr) {
- return KERNEL_STACK < ptr && ptr < KERNEL_EXEC_END;
+ ptr_t start = ROUNDUP(current_thread->kstack - KSTACK_SIZE, MEM_PAGE);
+ return start < ptr && ptr < current_thread->kstack;
}
int
int i = 0;
while (valid_fp((ptr_t)frame) && i < limit) {
- ptr_t pc = *(frame + 1);
+ ptr_t pc = abi_get_retaddrat((ptr_t)frame);
current = trace_sym_lookup(pc);
tb_buffer[i] =
static inline void
trace_print_code_entry(ptr_t sym_pc, ptr_t inst_pc, char* sym)
{
- DEBUG("%p+%p: %s", sym_pc, inst_pc - sym_pc, sym);
+ if (sym_pc) {
+ DEBUG("%p+%p: %s", sym_pc, inst_pc - sym_pc, sym);
+ } else {
+ DEBUG("%p+%p: %s", inst_pc, sym_pc, sym);
+ }
}
void
void
trace_printstack()
{
- trace_printstack_of(cpu_get_fp());
+ trace_printstack_of(abi_get_callframe());
}
static void
-trace_printswctx(const isr_param* p, char* direction)
+trace_printswctx(const isr_param* p, bool from_usr, bool to_usr)
{
struct ksym_entry* sym = trace_sym_lookup(p->execp->eip);
- DEBUG(">> (sw:%s) iv:%d, errno:%p <<",
- direction,
+ DEBUG("^^^^^ --- %s", to_usr ? "user" : "kernel");
+ DEBUG(" interrupted on #%d, ecode=%p",
p->execp->vector,
p->execp->err_code);
+ DEBUG("vvvvv --- %s", from_usr ? "user" : "kernel");
ptr_t sym_pc = sym ? sym->pc : p->execp->eip;
trace_print_code_entry(sym_pc, p->execp->eip, ksym_getstr(sym));
trace_printstack_isr(const isr_param* isrm)
{
isr_param* p = isrm;
- ptr_t fp = cpu_get_fp();
- int prev_fromusr = 0;
+ ptr_t fp = abi_get_callframe();
+ int prev_usrctx = 0;
DEBUG("stack trace (pid=%d)\n", __current->pid);
trace_printstack_of(fp);
while (p) {
- if (!prev_fromusr) {
- if (uspace_context(p)) {
- trace_printswctx(p, "s/u");
+ if (!prev_usrctx) {
+ if (!kernel_context(p)) {
+ trace_printswctx(p, true, false);
} else {
- trace_printswctx(p, "s/s");
+ trace_printswctx(p, false, false);
}
} else {
- trace_printswctx(p, "u/s");
+ trace_printswctx(p, false, true);
}
fp = saved_fp(p);
+ if (!valid_fp(fp)) {
+ DEBUG("??? invalid frame: %p", fp);
+ break;
+ }
+
trace_printstack_of(fp);
- prev_fromusr = uspace_context(p);
+ prev_usrctx = !kernel_context(p);
p = p->execp->saved_prev_ctx;
}
static inline void
current_rmiopoll(int pld)
{
- iopoll_remove(__current->pid, &__current->pollctx, pld);
+ iopoll_remove(current_thread, pld);
}
static struct iopoller*
continue;
}
- int pld = iopoll_install(__current->pid, &__current->pollctx, fd_s);
+ int pld = iopoll_install(current_thread, fd_s);
if (pld < 0) {
nc++;
}
static void
__wait_until_event()
{
- block_current();
- sched_yieldk();
+ block_current_thread();
+ sched_pass();
}
void
}
void
-iopoll_free(pid_t pid, struct iopoll* ctx)
+iopoll_free(struct proc_info* proc)
{
+ pid_t pid = proc->pid;
+ struct iopoll* ctx = &proc->pollctx;
for (int i = 0; i < MAX_POLLER_COUNT; i++) {
struct iopoller* poller = ctx->pollers[i];
if (poller) {
struct iopoller *pos, *n;
llist_for_each(pos, n, pollers_q, evt_listener)
{
- struct proc_info* proc = get_process(pos->pid);
- if (proc_hanged(proc)) {
- resume_process(proc);
+ struct thread* thread = pos->thread;
+ assert(!proc_terminated(thread));
+
+ if (proc_hanged(thread)) {
+ resume_thread(thread);
}
-
- assert(!proc_terminated(proc));
}
}
int
-iopoll_remove(pid_t pid, struct iopoll* ctx, int pld)
+iopoll_remove(struct thread* thread, int pld)
{
+ struct proc_info* proc = thread->process;
+ struct iopoll* ctx = &proc->pollctx;
struct iopoller* poller = ctx->pollers[pld];
if (!poller) {
return ENOENT;
}
- vfs_pclose(poller->file_ref, pid);
+ // FIXME vfs locking model need to rethink in the presence of threads
+ vfs_pclose(poller->file_ref, proc->pid);
vfree(poller);
ctx->pollers[pld] = NULL;
ctx->n_poller--;
}
int
-iopoll_install(pid_t pid, struct iopoll* pollctx, struct v_fd* fd)
+iopoll_install(struct thread* thread, struct v_fd* fd)
{
int pld = __alloc_pld();
if (pld < 0) {
struct iopoller* iop = valloc(sizeof(struct iopoller));
*iop = (struct iopoller){
.file_ref = fd->file,
- .pid = pid,
+ .thread = thread,
};
vfs_ref_file(fd->file);
- __current->pollctx.pollers[pld] = iop;
- __current->pollctx.n_poller++;
+
+ struct proc_info* proc = thread->process;
+ proc->pollctx.pollers[pld] = iop;
+ proc->pollctx.n_poller++;
struct device* dev;
if ((dev = fd2dev(fd))) {
} break;
case _SPOLL_RM: {
int pld = va_arg(va, int);
- retcode = iopoll_remove(__current->pid, &__current->pollctx, pld);
+ retcode = iopoll_remove(current_thread, pld);
} break;
case _SPOLL_WAIT: {
struct poll_info* pinfos = va_arg(va, struct poll_info*);
#include <lunaix/ds/mutex.h>
#include <lunaix/process.h>
+#include <lunaix/sched.h>
void
mutex_lock(mutex_t* mutex)
}
while (atomic_load(&mutex->lk)) {
- sched_yieldk();
+ sched_pass();
}
atomic_fetch_add(&mutex->lk, 1);
{
while (!atomic_load(&sem->counter)) {
// FIXME: better thing like wait queue
- sched_yieldk();
+ sched_pass();
}
atomic_fetch_sub(&sem->counter, 1);
}
#include <lunaix/ds/waitq.h>
#include <lunaix/process.h>
+#include <lunaix/sched.h>
#include <lunaix/spike.h>
void
pwait(waitq_t* queue)
{
- assert(__current);
+ assert(current_thread);
// prevent race condition.
cpu_disable_interrupt();
- waitq_t* current_wq = &__current->waitqueue;
+ waitq_t* current_wq = ¤t_thread->waitqueue;
assert(llist_empty(¤t_wq->waiters));
llist_append(&queue->waiters, ¤t_wq->waiters);
- block_current();
- sched_yieldk();
+ block_current_thread();
+ sched_pass();
cpu_enable_interrupt();
}
}
waitq_t* wq = list_entry(queue->waiters.next, waitq_t, waiters);
- struct proc_info* proc = container_of(wq, struct proc_info, waitqueue);
+ struct thread* thread = container_of(wq, struct thread, waitqueue);
- assert(proc->state == PS_BLOCKED);
- proc->state = PS_READY;
+ assert(thread->state == PS_BLOCKED);
+ thread->state = PS_READY;
llist_delete(&wq->waiters);
}
return;
}
- struct proc_info* proc;
+ struct thread* thread;
waitq_t *pos, *n;
llist_for_each(pos, n, &queue->waiters, waiters)
{
- proc = container_of(pos, struct proc_info, waitqueue);
+ thread = container_of(pos, struct thread, waitqueue);
- assert(proc->state == PS_BLOCKED);
- proc->state = PS_READY;
+ assert(thread->state == PS_BLOCKED);
+ thread->state = PS_READY;
llist_delete(&pos->waiters);
}
}
\ No newline at end of file
return pcache_read(elf->inode, data, len, off);
}
+static int
+elf32_do_open(struct elf32* elf, struct v_file* elf_file)
+{
+ int status = 0;
+ elf->pheaders = NULL;
+ elf->elf_file = elf_file;
+
+ if ((status = elf32_read_ehdr(elf)) < 0) {
+ elf32_close(elf);
+ return status;
+ }
+
+ if ((status = elf32_read_phdr(elf)) < 0) {
+ elf32_close(elf);
+ return status;
+ }
+
+ return 0;
+}
+
int
elf32_open(struct elf32* elf, const char* path)
{
return error;
}
- return elf32_openat(elf, elffile);
+ return elf32_do_open(elf, elffile);
}
int
-elf32_openat(struct elf32* elf, const void* elf_vfile)
+elf32_openat(struct elf32* elf, void* elf_vfile)
{
- int status = 0;
- elf->pheaders = NULL;
- elf->elf_file = elf_vfile;
-
- if ((status = elf32_read_ehdr(elf)) < 0) {
- elf32_close(elf);
- return status;
- }
-
- if ((status = elf32_read_phdr(elf)) < 0) {
- elf32_close(elf);
- return status;
- }
-
- return 0;
+ // so the ref count kept in sync
+ vfs_ref_file(elf_vfile);
+ return elf32_do_open(elf, elf_vfile);
}
int
uintptr_t va = phdre->p_va + base_va;
struct exec_container* container = ldctx->container;
struct mmap_param param = { .vms_mnt = container->vms_mnt,
- .pvms = &container->proc->mm,
+ .pvms = vmspace(container->proc),
.proct = proct,
.offset = PG_ALIGN(phdre->p_offset),
.mlen = ROUNDUP(phdre->p_memsz, PG_SIZE),
- .flen = phdre->p_filesz,
.flags = MAP_FIXED | MAP_PRIVATE,
.type = REGION_TYPE_CODE };
struct mm_region* seg_reg;
- int status = mem_map(NULL, &seg_reg, PG_ALIGN(va), elfile, ¶m);
+ int status = mmap_user(NULL, &seg_reg, PG_ALIGN(va), elfile, ¶m);
if (!status) {
size_t next_addr = phdre->p_memsz + va;
ldctx->mem_sz += phdre->p_memsz;
} else {
// we probably fucked up our process
- terminate_proc(-1);
+ terminate_current(-1);
}
return status;
goto done;
}
- ldpath = valloc(512);
- errno = elf32_find_loader(&elf, ldpath, 512);
+ ldpath = valloc(256);
+ errno = elf32_find_loader(&elf, ldpath, 256);
uintptr_t load_base = 0;
if (errno < 0) {
elf32_close(&elf);
done:
- vfree_safe(ldpath);
+ if (!container->argv_pp[1]) {
+ vfree_safe(ldpath);
+ }
return errno;
}
#include <lunaix/mm/valloc.h>
#include <lunaix/mm/vmm.h>
#include <lunaix/process.h>
+#include <lunaix/sched.h>
#include <lunaix/spike.h>
#include <lunaix/status.h>
#include <lunaix/syscall.h>
#include <lunaix/syscall_utils.h>
#include <sys/abi.h>
-#include <sys/mm/mempart.h>
+#include <sys/mm/mm_defs.h>
#include <klibc/string.h>
void
-exec_container(struct exec_container* param,
- struct proc_info* proc,
+exec_init_container(struct exec_container* param,
+ struct thread* thread,
ptr_t vms,
const char** argv,
const char** envp)
{
- *param = (struct exec_container){ .proc = proc,
+ assert(thread->ustack);
+ ptr_t ustack_top = align_stack(thread->ustack->end - 1);
+ *param = (struct exec_container){ .proc = thread->process,
.vms_mnt = vms,
.exe = { .container = param },
.argv_pp = { 0, 0 },
.argv = argv,
- .envp = envp };
+ .envp = envp,
+ .stack_top = ustack_top };
}
size_t
return (sz + 1) * sizeof(ptr_t);
}
-ptr_t
+static ptr_t
copy_to_ustack(ptr_t stack_top, ptr_t* paramv)
{
ptr_t ptr;
return stack_top;
}
+static void
+save_process_cmd(struct proc_info* proc, ptr_t* argv)
+{
+ ptr_t ptr, *_argv = argv;
+ size_t total_sz = 0;
+ while ((ptr = *_argv)) {
+ total_sz += strlen((const char*)ptr) + 1;
+ _argv++;
+ }
+
+ if (proc->cmd) {
+ vfree(proc->cmd);
+ }
+
+ char* cmd_ = (char*)valloc(total_sz);
+ proc->cmd = cmd_;
+ proc->cmd_len = total_sz;
+
+ while ((ptr = *argv)) {
+ cmd_ = strcpy(cmd_, (const char*)ptr);
+ cmd_[-1] = ' ';
+ argv++;
+ }
+ cmd_[-1] = '\0';
+}
+
// externed from mm/dmm.c
extern int
create_heap(struct proc_mm* pvms, ptr_t addr);
goto done;
}
- struct proc_mm* pvms = &container->proc->mm;
+ struct proc_info* proc = container->proc;
+ struct proc_mm* pvms = vmspace(proc);
if (pvms->heap) {
mem_unmap_region(container->vms_mnt, pvms->heap);
if (!argv_extra[1]) {
// If loading a statically linked file, then heap remapping we can do,
// otherwise delayed.
- create_heap(&container->proc->mm, PG_ALIGN(container->exe.end));
+ create_heap(vmspace(proc), PG_ALIGN(container->exe.end));
}
if (container->vms_mnt == VMS_SELF) {
// we are loading executable into current addr space
- ptr_t ustack = USR_STACK_END;
+ ptr_t ustack = container->stack_top;
size_t argv_len = 0, envp_len = 0;
ptr_t argv_ptr = 0, envp_ptr = 0;
ustack = copy_to_ustack(ustack, (ptr_t*)ustack);
}
- if (argv) {
+ if (argv) {
argv_len = args_ptr_size(argv);
ustack -= argv_len;
memcpy((void*)ustack, (const void**)argv, argv_len);
- for (size_t i = 0; i < 2 && argv_extra[i]; i++) {
- ustack -= sizeof(ptr_t);
- *((ptr_t*)ustack) = (ptr_t)argv_extra[i];
- argv_len += sizeof(ptr_t);
- }
+ }
- argv_ptr = ustack;
- ustack = copy_to_ustack(ustack, (ptr_t*)ustack);
+ for (size_t i = 0; i < 2 && argv_extra[i]; i++) {
+ ustack -= sizeof(ptr_t);
+ *((ptr_t*)ustack) = (ptr_t)argv_extra[i];
+ argv_len += sizeof(ptr_t);
}
+ argv_ptr = ustack;
+ ustack = copy_to_ustack(ustack, (ptr_t*)ustack);
+
+ save_process_cmd(proc, (ptr_t*)argv_ptr);
+
// four args (arg{c|v}, env{c|p}) for main
struct uexec_param* exec_param = &((struct uexec_param*)ustack)[-1];
.envp = (char**)envp_ptr };
} else {
/*
- TODO need to find a way to inject argv and envp remotely
- this is for the support of kernel level implementation of
- posix_spawn
-
- IDEA
- 1. Allocate a orphaned physical page (i.e., do not belong to any
- VMA)
- 2. Mounted to a temporary mount point in current VMA, (i.e.,
- PG_MOUNT_*)
- 3. Do setup there.
- 4. Unmount then mounted to the foreign VMA as the first stack
- page.
+ TODO Inject to remote user stack with our procvm_remote toolsets
+ Need a better way to factorise the argv/envp length calculating
*/
fail("not implemented");
+
}
done:
errno = exec_load(container, file);
+ // It shouldn't matter which pid we passed. As the only reader is
+ // in current context and we must finish read at this point,
+ // therefore the dead-lock condition will not exist and the pid
+ // for arbitration has no use.
+ vfs_pclose(file, container->proc->pid);
+
done:
return errno;
}
int errno = 0;
struct exec_container container;
- exec_container(
- &container, (struct proc_info*)__current, VMS_SELF, argv, envp);
+ exec_init_container(&container, current_thread, VMS_SELF, argv, envp);
errno = exec_load_byname(&container, filename);
int errno = 0;
struct exec_container container;
- exec_container(
- &container, (struct proc_info*)__current, VMS_SELF, argv, envp);
+ exec_init_container(
+ &container, current_thread, VMS_SELF, argv, envp);
if ((errno = exec_load_byname(&container, filename))) {
goto done;
// we will jump to new entry point (_u_start) upon syscall's
// return so execve 'will not return' from the perspective of it's invoker
- eret_target(__current) = container.exe.entry;
- eret_stack(__current) = container.stack_top;
+ eret_target(current_thread) = container.exe.entry;
+ eret_stack(current_thread) = container.stack_top;
// these become meaningless once execved!
- __current->ustack_top = 0;
- proc_clear_signal(__current);
+ current_thread->ustack_top = 0;
+ signal_reset_context(¤t_thread->sigctx);
+ signal_reset_register(__current->sigreg);
done:
// set return value
pcache_free_page(void* va)
{
ptr_t pa = vmm_del_mapping(VMS_SELF, (ptr_t)va);
- pmm_free_page(KERNEL_PID, pa);
+ pmm_free_page(pa);
}
static void*
pcache_alloc_page()
{
int i = 0;
- ptr_t pp = pmm_alloc_page(KERNEL_PID, 0), va = 0;
+ ptr_t pp = pmm_alloc_page(0), va = 0;
if (!pp) {
return NULL;
}
if (!(va = (ptr_t)vmap(pp, PG_SIZE, PG_PREM_RW, 0))) {
- pmm_free_page(KERNEL_PID, pp);
+ pmm_free_page(pp);
return NULL;
}
ret_ptr = buf;
done:
- __current->k_status = errno;
+ syscall_result(errno);
return ret_ptr;
}
#include <lunaix/syscall.h>
#include <lunaix/syscall_utils.h>
-#define DO_STATUS(errno) SYSCALL_ESTATUS(__current->k_status = errno)
-
struct v_xattr_entry*
xattr_new(struct hstr* name)
{
#include <lunaix/trace.h>
#include <lunaix/tty/tty.h>
#include <lunaix/owloysius.h>
+#include <lunaix/pcontext.h>
#include <hal/acpi/acpi.h>
#include <hal/intc.h>
#include <sys/abi.h>
-#include <sys/interrupts.h>
#include <sys/mm/mempart.h>
#include <klibc/strfmt.h>
#include <klibc/string.h>
-extern void
-__proc0(); /* proc0.c */
-
void
-spawn_proc0();
+spawn_lunad();
void
kmem_init(struct boot_handoff* bhctx);
*/
boot_end(bhctx);
- spawn_proc0();
+ spawn_lunad();
}
+extern void
+lunad_main();
+
/**
- * @brief 创建并运行proc0进程
+ * @brief 创建并运行Lunaix守护进程
*
*/
void
-spawn_proc0()
+spawn_lunad()
{
- struct proc_info* proc0 = alloc_process();
-
- /**
- * @brief
- * 注意:这里和视频中说的不一样,属于我之后的一点微调。
- * 在视频中,spawn_proc0是在_kernel_post_init的末尾才调用的。并且是直接跳转到_proc0
- *
- * 但是我后来发现,上述的方法会产生竞态条件。这是因为spawn_proc0被调用的时候,时钟中断已经开启,
- * 而中断的产生会打乱栈的布局,从而使得下面的上下文设置代码产生未定义行为(Undefined
- * Behaviour)。 为了保险起见,有两种办法:
- * 1. 在创建proc0进程前关闭中断
- * 2. 将_kernel_post_init搬进proc0进程
- * (_kernel_post_init已经更名为init_platform)
- *
- * 目前的解决方案是2
- */
-
- proc0->parent = proc0;
-
- // 方案1:必须在读取eflags之后禁用。否则当进程被调度时,中断依然是关闭的!
- // cpu_disable_interrupt();
-
- /* Ok... 首先fork进我们的零号进程,而后由那里,我们fork进init进程。 */
-
- // 把当前虚拟地址空间(内核)复制一份。
- proc0->page_table = vmm_dup_vmspace(proc0->pid);
-
- // 直接切换到新的拷贝,进行配置。
- cpu_chvmspace(proc0->page_table);
-
- // 为内核创建一个专属栈空间。
- for (size_t i = 0; i < KERNEL_STACK_SIZE; i += PG_SIZE) {
- ptr_t pa = pmm_alloc_page(KERNEL_PID, 0);
- vmm_set_mapping(VMS_SELF, KERNEL_STACK + i, pa, PG_PREM_RW, VMAP_NULL);
- }
-
- proc_init_transfer(proc0, KERNEL_STACK_END, (ptr_t)__proc0, 0);
-
- // 向调度器注册进程。
- commit_process(proc0);
-
- // 由于时钟中断与APIC未就绪,我们需要手动进行第一次调度。这里也会同时隐式地恢复我们的eflags.IF位
- proc0->state = PS_RUNNING;
- switch_context(proc0);
+ int has_error;
+ struct thread* kthread;
+
+ has_error = spawn_process(&kthread, (ptr_t)lunad_main, false);
+ assert_msg(!has_error, "failed to spawn lunad");
- /* Should not return */
- assert_msg(0, "Unexpected Return");
+ run(kthread);
+
+ fail("Unexpected Return");
}
void
extern u8_t __kexec_end;
// 将内核占据的页,包括前1MB,hhk_init 设为已占用
size_t pg_count = ((ptr_t)&__kexec_end - KERNEL_EXEC) >> PG_SIZE_BITS;
- pmm_mark_chunk_occupied(KERNEL_PID, 0, pg_count, PP_FGLOCKED);
+ pmm_mark_chunk_occupied(0, pg_count, PP_FGLOCKED);
// reserve higher half
for (size_t i = L1_INDEX(KERNEL_EXEC); i < 1023; i++) {
.max_recs = MAX_KPENT_NUM,
.kp_ent_wp = &kprecs.kp_ents.ents
};
-export_symbol(debug, kprecs);
+export_symbol(debug, kprintf, kprecs);
static char*
shift_level(const char* str, int* level)
#include <lunaix/syslog.h>
#include <lunaix/types.h>
#include <lunaix/owloysius.h>
+#include <lunaix/sched.h>
+#include <lunaix/kpreempt.h>
#include <klibc/string.h>
return 0;
}
+static void
+lunad_do_usr() {
+ // No, these are not preemptive
+ cpu_disable_interrupt();
+
+ if (!mount_bootmedium() || !exec_initd()) {
+ fail("failed to initd");
+ }
+}
+
/**
- * @brief LunaixOS的零号进程,该进程永远为可执行。
+ * @brief LunaixOS的内核进程,该进程永远为可执行。
*
* 这主要是为了保证调度器在没有进程可调度时依然有事可做。
*
* 同时,该进程也负责fork出我们的init进程。
*
*/
-void
-__proc0()
+void _preemptible
+lunad_main()
{
/*
* We must defer boot code/data cleaning to here, after we successfully
*/
boot_cleanup();
- init_platform();
+ spawn_kthread((ptr_t)init_platform);
- init_proc_user_space(__current);
-
- if (!mount_bootmedium() || !exec_initd()) {
- FATAL("failed to initd");
- // should not reach
+ /*
+ NOTE Kernel preemption after this point.
+
+ More specifically, it is not a real kernel preemption (as in preemption
+ happened at any point of kernel, except those marked explicitly).
+ In Lunaix, things are designed in an non-preemptive fashion, we implement
+ kernel preemption the other way around: only selected kernel functions which,
+ of course, with great care of preemptive assumption, will goes into kernel
+ thread (which is preemptive!)
+ */
+
+ cpu_enable_interrupt();
+ while (1)
+ {
+ cleanup_detached_threads();
+ sched_pass();
}
}
// FIXME Re-design needed!!
// sdbg_init();
+
+ assert(!spawn_process(NULL, (ptr_t)lunad_do_usr, true));
+
+ exit_thread(NULL);
}
\ No newline at end of file
void*
__alloc_cake(unsigned int cake_pg)
{
- ptr_t pa = (ptr_t)pmm_alloc_cpage(KERNEL_PID, cake_pg, 0);
+ ptr_t pa = (ptr_t)pmm_alloc_cpage(cake_pg, 0);
if (!pa) {
return NULL;
}
llist_append(&pile->partial, &pos->cakes);
}
+ *((unsigned int*)area) = DEADCAKE_MARK;
+
return 1;
}
.mlen = PG_SIZE };
int status = 0;
struct mm_region* heap;
- if ((status = mem_map(NULL, &heap, addr, NULL, &map_param))) {
+ if ((status = mmap_user(NULL, &heap, addr, NULL, &map_param))) {
return status;
}
__DEFINE_LXSYSCALL1(void*, sbrk, ssize_t, incr)
{
- struct proc_mm* pvms = (struct proc_mm*)&__current->mm;
+ struct proc_mm* pvms = vmspace(__current);
struct mm_region* heap = pvms->heap;
assert(heap);
__DEFINE_LXSYSCALL1(int, brk, void*, addr)
{
- struct proc_mm* pvms = (struct proc_mm*)&__current->mm;
+ struct proc_mm* pvms = vmspace(__current);
struct mm_region* heap = pvms->heap;
if (!heap) {
#include <lunaix/mm/valloc.h>
#include <lunaix/mm/vmm.h>
#include <lunaix/spike.h>
-
#include <lunaix/syscall.h>
#include <lunaix/syscall_utils.h>
#include <sys/mm/mempart.h>
+#include <usr/lunaix/mann_flags.h>
+
// any size beyond this is bullshit
#define BS_SIZE (KERNEL_EXEC - USR_MMAP)
return 0;
}
+int
+mmap_user(void** addr_out,
+ struct mm_region** created,
+ ptr_t addr,
+ struct v_file* file,
+ struct mmap_param* param)
+{
+ param->range_end = KERNEL_EXEC;
+ param->range_start = USR_EXEC;
+
+ return mem_map(addr_out, created, addr, file, param);
+}
+
+static ptr_t
+__mem_find_slot_backward(struct mm_region* lead, struct mmap_param* param, struct mm_region* anchor)
+{
+ ptr_t size = param->mlen;
+ struct mm_region *pos = anchor,
+ *n = next_region(pos);
+ while (pos != lead)
+ {
+ if (pos == lead) {
+ break;
+ }
+
+ ptr_t end = n->start;
+ if (n == lead) {
+ end = param->range_end;
+ }
+
+ if (end - pos->end >= size) {
+ return pos->end;
+ }
+
+ pos = n;
+ n = next_region(pos);
+ }
+
+ return 0;
+}
+
+static ptr_t
+__mem_find_slot_forward(struct mm_region* lead, struct mmap_param* param, struct mm_region* anchor)
+{
+ ptr_t size = param->mlen;
+ struct mm_region *pos = anchor,
+ *prev = prev_region(pos);
+ while (lead != pos)
+ {
+ ptr_t end = prev->end;
+ if (prev == lead) {
+ end = param->range_start;
+ }
+
+ if (pos->start - end >= size) {
+ return pos->start - size;
+ }
+
+ pos = prev;
+ prev = prev_region(pos);
+ }
+
+ return 0;
+}
+
+static ptr_t
+__mem_find_slot(vm_regions_t* lead, struct mmap_param* param, struct mm_region* anchor)
+{
+ ptr_t result = 0;
+ struct mm_region* _lead = get_region(lead);
+ if ((result = __mem_find_slot_backward(_lead, param, anchor))) {
+ return result;
+ }
+
+ return __mem_find_slot_forward(_lead, param, anchor);
+}
+
+static struct mm_region*
+__mem_find_nearest(vm_regions_t* lead, ptr_t addr)
+{
+ ptr_t min_dist = (ptr_t)-1;
+ struct mm_region *pos, *n, *min = NULL;
+ llist_for_each(pos, n, lead, head) {
+ if (region_contains(pos, addr)) {
+ return pos;
+ }
+
+ ptr_t dist = addr - pos->end;
+ if (addr < pos->start) {
+ dist = pos->start - addr;
+ }
+
+ if (dist < min_dist) {
+ min_dist = dist;
+ min = pos;
+ }
+ }
+
+ return min;
+}
+
int
mem_map(void** addr_out,
struct mm_region** created,
{
assert_msg(addr, "addr can not be NULL");
- ptr_t last_end = USR_EXEC, found_loc = addr;
+ ptr_t last_end = USR_EXEC, found_loc = PG_ALIGN(addr);
struct mm_region *pos, *n;
vm_regions_t* vm_regions = ¶m->pvms->regions;
goto found;
}
- llist_for_each(pos, n, vm_regions, head)
- {
- if (last_end < found_loc) {
- size_t avail_space = pos->start - found_loc;
- if (pos->start > found_loc && avail_space > param->mlen) {
- goto found;
- }
- found_loc = pos->end + MEM_PAGE;
- }
+ if (llist_empty(vm_regions)) {
+ goto found;
+ }
- last_end = pos->end;
+ struct mm_region* anchor = __mem_find_nearest(vm_regions, found_loc);
+ if ((found_loc = __mem_find_slot(vm_regions, param, anchor))) {
+ goto found;
}
return ENOMEM;
found:
- if (found_loc >= KERNEL_EXEC || found_loc < USR_EXEC) {
+ if (found_loc >= param->range_end || found_loc < param->range_start) {
return ENOMEM;
}
region_add(vm_regions, region);
- u32_t attr = PG_ALLOW_USER;
- if ((param->proct & REGION_WRITE)) {
+ int proct = param->proct;
+ int attr = PG_ALLOW_USER;
+ if ((proct & REGION_WRITE)) {
attr |= PG_WRITE;
}
+ if ((proct & REGION_KERNEL)) {
+ attr &= ~PG_ALLOW_USER;
+ }
- for (u32_t i = 0; i < param->mlen; i += PG_SIZE) {
+ for (size_t i = 0; i < param->mlen; i += PG_SIZE) {
vmm_set_mapping(param->vms_mnt, found_loc + i, 0, attr, 0);
}
invalidate:
*mapping.pte &= ~PG_PRESENT;
- pmm_free_page(KERNEL_PID, mapping.pa);
+ pmm_free_page(mapping.pa);
cpu_flush_page((ptr_t)mapping.pte);
}
}
void
mem_unmap_region(ptr_t mnt, struct mm_region* region)
{
+ if (!region) {
+ return;
+ }
+
+ valloc_ensure_valid(region);
+
size_t len = ROUNDUP(region->end - region->start, PG_SIZE);
mem_sync_pages(mnt, region, region->start, len, 0);
for (size_t i = region->start; i <= region->end; i += PG_SIZE) {
ptr_t pa = vmm_del_mapping(mnt, i);
if (pa) {
- pmm_free_page(__current->pid, pa);
+ pmm_free_page(pa);
}
}
+
llist_delete(®ion->head);
region_release(region);
}
for (size_t i = 0; i < umps_len; i += PG_SIZE) {
ptr_t pa = vmm_del_mapping(mnt, vmr->start + i);
if (pa) {
- pmm_free_page(vmr->proc_vms->pid, pa);
+ pmm_free_page(pa);
}
}
struct mmap_param param = { .flags = options,
.mlen = ROUNDUP(length, PG_SIZE),
- .flen = length,
.offset = offset,
.type = REGION_TYPE_GENERAL,
.proct = proct,
- .pvms = (struct proc_mm*)&__current->mm,
+ .pvms = vmspace(__current),
.vms_mnt = VMS_SELF };
- errno = mem_map(&result, NULL, addr_ptr, file, ¶m);
+ errno = mmap_user(&result, NULL, addr_ptr, file, ¶m);
done:
- __current->k_status = errno;
+ syscall_result(errno);
return result;
}
__DEFINE_LXSYSCALL2(int, munmap, void*, addr, size_t, length)
{
return mem_unmap(
- VMS_SELF, (vm_regions_t*)&__current->mm.regions, (ptr_t)addr, length);
+ VMS_SELF, vmregions(__current), (ptr_t)addr, length);
}
__DEFINE_LXSYSCALL3(int, msync, void*, addr, size_t, length, int, flags)
}
int status = mem_msync(VMS_SELF,
- (vm_regions_t*)&__current->mm.regions,
+ vmregions(__current),
(ptr_t)addr,
length,
flags);
void* ptr = vmap(paddr, size, PG_PREM_RW | PG_DISABLE_CACHE, 0);
if (ptr) {
- pmm_mark_chunk_occupied(KERNEL_PID,
- paddr >> PG_SIZE_BITS,
+ pmm_mark_chunk_occupied(paddr >> PG_SIZE_BITS,
CEIL(size, PG_SIZE_BITS),
PP_FGLOCKED);
}
{
for (size_t i = 0; i < size; i += PG_SIZE) {
ptr_t paddr = vmm_del_mapping(VMS_SELF, vaddr + i);
- pmm_free_page(KERNEL_PID, paddr);
+ pmm_free_page(paddr);
}
}
\ No newline at end of file
// This is a very large array...
static struct pp_struct pm_table[PM_BMP_MAX_SIZE];
+export_symbol(debug, pmm, pm_table);
static ptr_t max_pg;
+export_symbol(debug, pmm, max_pg);
void
pmm_mark_page_free(ptr_t ppn)
}
void
-pmm_mark_page_occupied(pid_t owner, ptr_t ppn, pp_attr_t attr)
+pmm_mark_page_occupied(ptr_t ppn, pp_attr_t attr)
{
pm_table[ppn] =
- (struct pp_struct){ .owner = owner, .ref_counts = 1, .attr = attr };
+ (struct pp_struct){ .ref_counts = 1, .attr = attr };
}
void
}
void
-pmm_mark_chunk_occupied(pid_t owner,
- u32_t start_ppn,
+pmm_mark_chunk_occupied(u32_t start_ppn,
size_t page_count,
pp_attr_t attr)
{
for (size_t i = start_ppn; i < start_ppn + page_count && i < max_pg; i++) {
pm_table[i] =
- (struct pp_struct){ .owner = owner, .ref_counts = 1, .attr = attr };
+ (struct pp_struct){ .ref_counts = 1, .attr = attr };
}
}
// mark all as occupied
for (size_t i = 0; i < PM_BMP_MAX_SIZE; i++) {
pm_table[i] =
- (struct pp_struct){ .owner = 0, .attr = 0, .ref_counts = 1 };
+ (struct pp_struct){ .attr = 0, .ref_counts = 1 };
}
}
ptr_t
-pmm_alloc_cpage(pid_t owner, size_t num_pages, pp_attr_t attr)
+pmm_alloc_cpage(size_t num_pages, pp_attr_t attr)
{
size_t p1 = 0;
size_t p2 = 0;
return NULLPTR;
}
- pmm_mark_chunk_occupied(owner, p1, num_pages, attr);
+ pmm_mark_chunk_occupied(p1, num_pages, attr);
return p1 << 12;
}
ptr_t
-pmm_alloc_page(pid_t owner, pp_attr_t attr)
+pmm_alloc_page(pp_attr_t attr)
{
// Next fit approach. Maximize the throughput!
ptr_t good_page_found = (ptr_t)NULL;
if (!pm->ref_counts) {
*pm = (struct pp_struct){ .attr = attr,
- .owner = owner,
.ref_counts = 1 };
good_page_found = pg_lookup_ptr << 12;
break;
}
int
-pmm_free_page(pid_t owner, ptr_t page)
+pmm_free_page(ptr_t page)
{
struct pp_struct* pm = &pm_table[page >> 12];
}
int
-pmm_ref_page(pid_t owner, ptr_t page)
+pmm_ref_page(ptr_t page)
{
- (void)owner; // TODO: do smth with owner
-
u32_t ppn = page >> 12;
if (ppn >= PM_BMP_MAX_SIZE) {
--- /dev/null
+#include <lunaix/mm/procvm.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/mm/region.h>
+#include <lunaix/mm/pmm.h>
+#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/mmap.h>
+#include <lunaix/process.h>
+
+#include <sys/mm/mempart.h>
+
+#include <klibc/string.h>
+
+struct proc_mm*
+procvm_create(struct proc_info* proc) {
+ struct proc_mm* mm = valloc(sizeof(struct proc_mm));
+
+ assert(mm);
+
+ mm->heap = 0;
+ mm->proc = proc;
+
+ llist_init_head(&mm->regions);
+ return mm;
+}
+
+
+static ptr_t
+__dup_vmspace(ptr_t mount_point, bool only_kernel)
+{
+ ptr_t ptd_pp = pmm_alloc_page(PP_FGPERSIST);
+ vmm_set_mapping(VMS_SELF, PG_MOUNT_1, ptd_pp, PG_PREM_RW, VMAP_NULL);
+
+ x86_page_table* ptd = (x86_page_table*)PG_MOUNT_1;
+ x86_page_table* pptd = (x86_page_table*)(mount_point | (0x3FF << 12));
+
+ size_t kspace_l1inx = L1_INDEX(KERNEL_EXEC);
+ size_t i = 1; // skip first 4MiB, to avoid bring other thread's stack
+
+ ptd->entry[0] = 0;
+ if (only_kernel) {
+ i = kspace_l1inx;
+ memset(ptd, 0, PG_SIZE);
+ }
+
+ for (; i < PG_MAX_ENTRIES - 1; i++) {
+
+ x86_pte_t ptde = pptd->entry[i];
+ // 空或者是未在内存中的L1页表项直接照搬过去。
+ // 内核地址空间直接共享过去。
+ if (!ptde || i >= kspace_l1inx || !(ptde & PG_PRESENT)) {
+ ptd->entry[i] = ptde;
+ continue;
+ }
+
+ // 复制L2页表
+ ptr_t pt_pp = pmm_alloc_page(PP_FGPERSIST);
+ vmm_set_mapping(VMS_SELF, PG_MOUNT_2, pt_pp, PG_PREM_RW, VMAP_NULL);
+
+ x86_page_table* ppt = (x86_page_table*)(mount_point | (i << 12));
+ x86_page_table* pt = (x86_page_table*)PG_MOUNT_2;
+
+ for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
+ x86_pte_t pte = ppt->entry[j];
+ pmm_ref_page(PG_ENTRY_ADDR(pte));
+ pt->entry[j] = pte;
+ }
+
+ ptd->entry[i] = (ptr_t)pt_pp | PG_ENTRY_FLAGS(ptde);
+ }
+
+ ptd->entry[PG_MAX_ENTRIES - 1] = NEW_L1_ENTRY(T_SELF_REF_PERM, ptd_pp);
+
+ return ptd_pp;
+}
+
+void
+procvm_dup(struct proc_info* proc) {
+ struct proc_mm* mm = vmspace(proc);
+ struct proc_mm* mm_current = vmspace(__current);
+
+ mm->heap = mm_current->heap;
+ mm->vmroot = __dup_vmspace(VMS_SELF, false);
+
+ region_copy_mm(mm_current, mm);
+}
+
+void
+procvm_init_clean(struct proc_info* proc)
+{
+ struct proc_mm* mm = vmspace(proc);
+ mm->vmroot = __dup_vmspace(VMS_SELF, true);
+}
+
+
+static void
+__delete_vmspace(ptr_t vm_mnt)
+{
+ x86_page_table* pptd = (x86_page_table*)(vm_mnt | (0x3FF << 12));
+
+ // only remove user address space
+ for (size_t i = 0; i < L1_INDEX(KERNEL_EXEC); i++) {
+ x86_pte_t ptde = pptd->entry[i];
+ if (!ptde || !(ptde & PG_PRESENT)) {
+ continue;
+ }
+
+ x86_page_table* ppt = (x86_page_table*)(vm_mnt | (i << 12));
+
+ for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
+ x86_pte_t pte = ppt->entry[j];
+ // free the 4KB data page
+ if ((pte & PG_PRESENT)) {
+ pmm_free_page(PG_ENTRY_ADDR(pte));
+ }
+ }
+ // free the L2 page table
+ pmm_free_page(PG_ENTRY_ADDR(ptde));
+ }
+ // free the L1 directory
+ pmm_free_page(PG_ENTRY_ADDR(pptd->entry[PG_MAX_ENTRIES - 1]));
+}
+
+void
+procvm_cleanup(ptr_t vm_mnt, struct proc_info* proc) {
+ struct mm_region *pos, *n;
+ llist_for_each(pos, n, vmregions(proc), head)
+ {
+ mem_sync_pages(vm_mnt, pos, pos->start, pos->end - pos->start, 0);
+ region_release(pos);
+ }
+
+ vfree(proc->mm);
+
+ __delete_vmspace(vm_mnt);
+}
+
+ptr_t
+procvm_enter_remote(struct remote_vmctx* rvmctx, struct proc_mm* mm,
+ ptr_t vm_mnt, ptr_t remote_base, size_t size)
+{
+ ptr_t size_pn = PN(size + MEM_PAGE);
+ assert(size_pn < REMOTEVM_MAX_PAGES);
+
+ struct mm_region* region = region_get(&mm->regions, remote_base);
+ assert(region && region_contains(region, remote_base + size));
+
+ rvmctx->vms_mnt = vm_mnt;
+ rvmctx->page_cnt = size_pn;
+
+ remote_base = PG_ALIGN(remote_base);
+ rvmctx->remote = remote_base;
+ rvmctx->local_mnt = PG_MOUNT_4_END + 1;
+
+ v_mapping m;
+ unsigned int pattr = region_ptattr(region);
+ ptr_t raddr = remote_base, lmnt = rvmctx->local_mnt;
+ for (size_t i = 0; i < size_pn; i++, lmnt += MEM_PAGE, raddr += MEM_PAGE)
+ {
+ if (vmm_lookupat(vm_mnt, raddr, &m) && PG_IS_PRESENT(m.flags)) {
+ vmm_set_mapping(VMS_SELF, lmnt, m.pa, PG_PREM_RW, 0);
+ continue;
+ }
+
+ ptr_t pa = pmm_alloc_page(0);
+ vmm_set_mapping(VMS_SELF, lmnt, pa, PG_PREM_RW, 0);
+ vmm_set_mapping(vm_mnt, raddr, pa, pattr, 0);
+ }
+
+ return vm_mnt;
+
+}
+
+int
+procvm_copy_remote_transaction(struct remote_vmctx* rvmctx,
+ ptr_t remote_dest, void* local_src, size_t sz)
+{
+ if (remote_dest < rvmctx->remote) {
+ return -1;
+ }
+
+ ptr_t offset = remote_dest - rvmctx->remote;
+ if (PN(offset + sz) >= rvmctx->page_cnt) {
+ return -1;
+ }
+
+ memcpy((void*)(rvmctx->local_mnt + offset), local_src, sz);
+
+ return sz;
+}
+
+void
+procvm_exit_remote_transaction(struct remote_vmctx* rvmctx)
+{
+ ptr_t lmnt = rvmctx->local_mnt;
+ for (size_t i = 0; i < rvmctx->page_cnt; i++, lmnt += MEM_PAGE)
+ {
+ vmm_del_mapping(VMS_SELF, lmnt);
+ }
+}
\ No newline at end of file
#include <lunaix/mm/region.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
+#include <lunaix/process.h>
#include <sys/mm/mempart.h>
}
if (region->mfile) {
- vfs_pclose(region->mfile, region->proc_vms->pid);
+ struct proc_mm* mm = region->proc_vms;
+ vfs_pclose(region->mfile, mm->proc->pid);
}
if (region->index) {
i -= boffset;
if (i >= len)
- return NULL;
+ i = 0;
return cake_grab(segregate_list[i]);
}
vfree_dma(void* ptr)
{
__vfree(ptr, piles_dma, CLASS_LEN(piles_names_dma));
+}
+
+inline void must_inline
+valloc_ensure_valid(void* ptr) {
+ cake_ensure_valid(ptr);
}
\ No newline at end of file
for (size_t i = 0; i < size; i += PG_SIZE) {
vmm_set_mapping(VMS_SELF, alloc_begin + i, paddr + i, attr, 0);
- pmm_ref_page(KERNEL_PID, paddr + i);
+ pmm_ref_page(paddr + i);
}
return (void*)alloc_begin;
#include <lunaix/syslog.h>
#include <sys/cpu.h>
-#include <sys/mm/mempart.h>
+#include <sys/mm/mm_defs.h>
LOG_MODULE("VMM")
vmm_init_pd()
{
x86_page_table* dir =
- (x86_page_table*)pmm_alloc_page(KERNEL_PID, PP_FGPERSIST);
+ (x86_page_table*)pmm_alloc_page(PP_FGPERSIST);
for (size_t i = 0; i < PG_MAX_ENTRIES; i++) {
dir->entry[i] = PTE_NULL;
}
x86_pte_t* l1pte = &l1pt->entry[l1_inx];
if (!*l1pte) {
x86_page_table* new_l1pt_pa =
- (x86_page_table*)pmm_alloc_page(KERNEL_PID, PP_FGPERSIST);
+ (x86_page_table*)pmm_alloc_page(PP_FGPERSIST);
// 物理内存已满!
if (!new_l1pt_pa) {
return 1;
}
- l2pt->entry[l2_inx] = NEW_L2_ENTRY(attr, pa);
+ if (!(options & VMAP_GUARDPAGE)) {
+ l2pt->entry[l2_inx] = NEW_L2_ENTRY(attr, pa);
+ } else {
+ l2pt->entry[l2_inx] = MEMGUARD;
+ }
+
return 1;
}
ptr_t
vmm_mount_pd(ptr_t mnt, ptr_t pde)
{
+ assert(pde);
+
x86_page_table* l1pt = (x86_page_table*)L1_BASE_VADDR;
l1pt->entry[(mnt >> 22)] = NEW_L1_ENTRY(T_SELF_REF_PERM, pde);
cpu_flush_page(mnt);
}
ptr_t
-vmm_dup_page(pid_t pid, ptr_t pa)
+vmm_dup_page(ptr_t pa)
{
- ptr_t new_ppg = pmm_alloc_page(pid, 0);
+ ptr_t new_ppg = pmm_alloc_page(0);
vmm_set_mapping(VMS_SELF, PG_MOUNT_3, new_ppg, PG_PREM_RW, VMAP_NULL);
vmm_set_mapping(VMS_SELF, PG_MOUNT_4, pa, PG_PREM_RW, VMAP_NULL);
+++ /dev/null
-void
-my_dummy()
-{
- while (1) {
- asm("hlt");
- }
-}
--- /dev/null
+#include <lunaix/mm/region.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/pmm.h>
+#include <lunaix/mm/mmap.h>
+#include <lunaix/process.h>
+#include <lunaix/spike.h>
+#include <lunaix/status.h>
+#include <lunaix/syscall.h>
+#include <lunaix/syslog.h>
+#include <lunaix/signal.h>
+
+#include <sys/abi.h>
+#include <sys/mm/mm_defs.h>
+
+#include <klibc/string.h>
+
+LOG_MODULE("FORK")
+
+static void
+region_maybe_cow(struct mm_region* region)
+{
+ int attr = region->attr;
+ if ((attr & REGION_WSHARED)) {
+ return;
+ }
+
+ ptr_t start_vpn = PN(region->start);
+ ptr_t end_vpn = PN(region->end);
+ for (size_t i = start_vpn; i <= end_vpn; i++) {
+ x86_pte_t* curproc = &PTE_MOUNTED(VMS_SELF, i);
+ x86_pte_t* newproc = &PTE_MOUNTED(VMS_MOUNT_1, i);
+
+ cpu_flush_page((ptr_t)newproc);
+
+ if ((attr & REGION_MODE_MASK) == REGION_RSHARED) {
+ // 如果读共享,则将两者的都标注为只读,那么任何写入都将会应用COW策略。
+ cpu_flush_page((ptr_t)curproc);
+ cpu_flush_page((ptr_t)(i << 12));
+
+ *curproc = *curproc & ~PG_WRITE;
+ *newproc = *newproc & ~PG_WRITE;
+ } else {
+ // 如果是私有页,则将该页从新进程中移除。
+ *newproc = 0;
+ }
+ }
+}
+
+static inline void
+__dup_fdtable(struct proc_info* pcb)
+{
+ for (size_t i = 0; i < VFS_MAX_FD; i++) {
+ struct v_fd* fd = __current->fdtable->fds[i];
+ if (!fd)
+ continue;
+ vfs_dup_fd(fd, &pcb->fdtable->fds[i]);
+ }
+}
+
+
+static void
+__dup_kernel_stack(struct thread* thread, ptr_t vm_mnt)
+{
+ ptr_t kstack_pn = PN(current_thread->kstack);
+
+ // copy the kernel stack
+ for (size_t i = 0; i < PN(KSTACK_SIZE); i++) {
+ volatile x86_pte_t* orig_ppte = &PTE_MOUNTED(VMS_SELF, kstack_pn);
+ x86_pte_t p = *orig_ppte;
+ ptr_t kstack = kstack_pn * PG_SIZE;
+
+ if (guardian_page(p)) {
+ vmm_set_mapping(vm_mnt, kstack, 0, 0, VMAP_GUARDPAGE);
+ } else {
+ ptr_t ppa = vmm_dup_page(PG_ENTRY_ADDR(p));
+ vmm_set_mapping(vm_mnt, kstack, ppa, p & 0xfff, 0);
+ }
+
+ kstack_pn--;
+ }
+}
+
+/*
+ Duplicate the current active thread to the forked process's
+ main thread.
+
+ This is not the same as "fork a thread within the same
+ process". In fact, it is impossible to do such "thread forking"
+ as the new forked thread's kernel and user stack must not
+ coincide with the original thread (because the same vm space)
+ thus all reference to the stack space are staled which could
+ lead to undefined behaviour.
+
+*/
+
+static struct thread*
+dup_active_thread(ptr_t vm_mnt, struct proc_info* duped_pcb)
+{
+ struct thread* th = alloc_thread(duped_pcb);
+ if (!th) {
+ return NULL;
+ }
+
+ th->intr_ctx = current_thread->intr_ctx;
+ th->kstack = current_thread->kstack;
+
+ signal_dup_context(&th->sigctx);
+
+ /*
+ * store the return value for forked process.
+ * this will be implicit carried over after kernel stack is copied.
+ */
+ store_retval_to(th, 0);
+
+ __dup_kernel_stack(th, vm_mnt);
+
+ if (!current_thread->ustack) {
+ goto done;
+ }
+
+ struct mm_region* old_stack = current_thread->ustack;
+ struct mm_region *pos, *n;
+ llist_for_each(pos, n, vmregions(duped_pcb), head)
+ {
+ // remove stack of other threads.
+ if (!stack_region(pos)) {
+ continue;
+ }
+
+ if (!same_region(pos, old_stack)) {
+ mem_unmap_region(vm_mnt, pos);
+ }
+ else {
+ th->ustack = pos;
+ }
+ }
+
+ assert(th->ustack);
+
+done:
+ return th;
+}
+
+pid_t
+dup_proc()
+{
+ struct proc_info* pcb = alloc_process();
+ if (!pcb) {
+ syscall_result(ENOMEM);
+ return -1;
+ }
+
+ pcb->parent = __current;
+
+ // FIXME need a more elagent refactoring
+ if (__current->cmd) {
+ pcb->cmd_len = __current->cmd_len;
+ pcb->cmd = valloc(pcb->cmd_len);
+ memcpy(pcb->cmd, __current->cmd, pcb->cmd_len);
+ }
+
+ if (__current->cwd) {
+ pcb->cwd = __current->cwd;
+ vfs_ref_dnode(pcb->cwd);
+ }
+
+ __dup_fdtable(pcb);
+ procvm_dup(pcb);
+
+ vmm_mount_pd(VMS_MOUNT_1, vmroot(pcb));
+
+ struct thread* main_thread = dup_active_thread(VMS_MOUNT_1, pcb);
+ if (!main_thread) {
+ syscall_result(ENOMEM);
+ vmm_unmount_pd(VMS_MOUNT_1);
+ delete_process(pcb);
+ return -1;
+ }
+
+ // 根据 mm_region 进一步配置页表
+ struct mm_region *pos, *n;
+ llist_for_each(pos, n, &pcb->mm->regions, head)
+ {
+ region_maybe_cow(pos);
+ }
+
+ vmm_unmount_pd(VMS_MOUNT_1);
+
+ commit_process(pcb);
+ commit_thread(main_thread);
+
+ return pcb->pid;
+}
+
+__DEFINE_LXSYSCALL(pid_t, fork)
+{
+ return dup_proc();
+}
#include <klibc/string.h>
#include <lunaix/clock.h>
#include <lunaix/mm/mmap.h>
-#include <lunaix/mm/pmm.h>
+#include <lunaix/mm/vmm.h>
#include <lunaix/mm/region.h>
#include <lunaix/mm/valloc.h>
-#include <lunaix/mm/vmm.h>
#include <lunaix/process.h>
#include <lunaix/spike.h>
#include <lunaix/status.h>
#include <lunaix/syscall.h>
#include <lunaix/syslog.h>
+#include <lunaix/exec.h>
+#include <lunaix/fs.h>
#include <sys/abi.h>
-#include <sys/mm/mempart.h>
+#include <sys/mm/mm_defs.h>
LOG_MODULE("PROC")
-ptr_t
-__dup_pagetable(pid_t pid, ptr_t mount_point)
-{
- ptr_t ptd_pp = pmm_alloc_page(pid, PP_FGPERSIST);
- vmm_set_mapping(VMS_SELF, PG_MOUNT_1, ptd_pp, PG_PREM_RW, VMAP_NULL);
-
- x86_page_table* ptd = (x86_page_table*)PG_MOUNT_1;
- x86_page_table* pptd = (x86_page_table*)(mount_point | (0x3FF << 12));
-
- size_t kspace_l1inx = L1_INDEX(KERNEL_EXEC);
-
- for (size_t i = 0; i < PG_MAX_ENTRIES - 1; i++) {
-
- x86_pte_t ptde = pptd->entry[i];
- // 空或者是未在内存中的L1页表项直接照搬过去。
- // 内核地址空间直接共享过去。
- if (!ptde || i >= kspace_l1inx || !(ptde & PG_PRESENT)) {
- ptd->entry[i] = ptde;
- continue;
- }
-
- // 复制L2页表
- ptr_t pt_pp = pmm_alloc_page(pid, PP_FGPERSIST);
- vmm_set_mapping(VMS_SELF, PG_MOUNT_2, pt_pp, PG_PREM_RW, VMAP_NULL);
-
- x86_page_table* ppt = (x86_page_table*)(mount_point | (i << 12));
- x86_page_table* pt = (x86_page_table*)PG_MOUNT_2;
-
- for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
- x86_pte_t pte = ppt->entry[j];
- pmm_ref_page(pid, PG_ENTRY_ADDR(pte));
- pt->entry[j] = pte;
- }
-
- ptd->entry[i] = (ptr_t)pt_pp | PG_ENTRY_FLAGS(ptde);
- }
-
- ptd->entry[PG_MAX_ENTRIES - 1] = NEW_L1_ENTRY(T_SELF_REF_PERM, ptd_pp);
-
- return ptd_pp;
-}
-
-void
-__del_pagetable(pid_t pid, ptr_t mount_point)
-{
- x86_page_table* pptd = (x86_page_table*)(mount_point | (0x3FF << 12));
-
- // only remove user address space
- for (size_t i = 0; i < L1_INDEX(KERNEL_EXEC); i++) {
- x86_pte_t ptde = pptd->entry[i];
- if (!ptde || !(ptde & PG_PRESENT)) {
- continue;
- }
-
- x86_page_table* ppt = (x86_page_table*)(mount_point | (i << 12));
-
- for (size_t j = 0; j < PG_MAX_ENTRIES; j++) {
- x86_pte_t pte = ppt->entry[j];
- // free the 4KB data page
- if ((pte & PG_PRESENT)) {
- pmm_free_page(pid, PG_ENTRY_ADDR(pte));
- }
- }
- // free the L2 page table
- pmm_free_page(pid, PG_ENTRY_ADDR(ptde));
- }
- // free the L1 directory
- pmm_free_page(pid, PG_ENTRY_ADDR(pptd->entry[PG_MAX_ENTRIES - 1]));
-}
-
-ptr_t
-vmm_dup_vmspace(pid_t pid)
-{
- return __dup_pagetable(pid, VMS_SELF);
-}
-
-__DEFINE_LXSYSCALL(pid_t, fork)
-{
- return dup_proc();
-}
-
__DEFINE_LXSYSCALL(pid_t, getpid)
{
return __current->pid;
struct proc_info* proc = pid ? get_process(pid) : __current;
if (!proc) {
- __current->k_status = EINVAL;
+ syscall_result(EINVAL);
return -1;
}
struct proc_info* gruppenfuhrer = get_process(pgid);
if (!gruppenfuhrer || proc->pgid == gruppenfuhrer->pid) {
- __current->k_status = EINVAL;
+ syscall_result(EINVAL);
return -1;
}
return 0;
}
-void
-__stack_copied(struct mm_region* region)
-{
- mm_index((void**)®ion->proc_vms->stack, region);
-}
-
-void
-init_proc_user_space(struct proc_info* pcb)
+int
+spawn_process(struct thread** created, ptr_t entry, bool with_ustack)
{
- vmm_mount_pd(VMS_MOUNT_1, pcb->page_table);
+ struct proc_info* kproc = alloc_process();
- /*--- 分配用户栈 ---*/
+ procvm_init_clean(kproc);
- struct mm_region* mapped;
- struct mmap_param param = { .vms_mnt = VMS_MOUNT_1,
- .pvms = &pcb->mm,
- .mlen = USR_STACK_SIZE,
- .proct = PROT_READ | PROT_WRITE,
- .flags = MAP_ANON | MAP_PRIVATE | MAP_FIXED,
- .type = REGION_TYPE_STACK };
+ vmm_mount_pd(VMS_MOUNT_1, vmroot(kproc));
+
+ struct thread* kthread = create_thread(kproc, VMS_MOUNT_1, with_ustack);
- int status = 0;
- if ((status = mem_map(NULL, &mapped, USR_STACK, NULL, ¶m))) {
- kprintf(KFATAL "fail to alloc user stack: %d", status);
+ if (!kthread) {
+ vmm_unmount_pd(VMS_MOUNT_1);
+ delete_process(kproc);
+ return -1;
}
- mapped->region_copied = __stack_copied;
- mm_index((void**)&pcb->mm.stack, mapped);
-
- // TODO other uspace initialization stuff
+ commit_process(kproc);
+ start_thread(kthread, VMS_MOUNT_1, entry);
vmm_unmount_pd(VMS_MOUNT_1);
-}
-void
-__mark_region(ptr_t start_vpn, ptr_t end_vpn, int attr)
-{
- for (size_t i = start_vpn; i <= end_vpn; i++) {
- x86_pte_t* curproc = &PTE_MOUNTED(VMS_SELF, i);
- x86_pte_t* newproc = &PTE_MOUNTED(VMS_MOUNT_1, i);
-
- cpu_flush_page((ptr_t)newproc);
-
- if ((attr & REGION_MODE_MASK) == REGION_RSHARED) {
- // 如果读共享,则将两者的都标注为只读,那么任何写入都将会应用COW策略。
- cpu_flush_page((ptr_t)curproc);
- cpu_flush_page((ptr_t)(i << 12));
-
- *curproc = *curproc & ~PG_WRITE;
- *newproc = *newproc & ~PG_WRITE;
- } else {
- // 如果是私有页,则将该页从新进程中移除。
- *newproc = 0;
- }
+ if (created) {
+ *created = kthread;
}
-}
-void
-__copy_fdtable(struct proc_info* pcb)
-{
- for (size_t i = 0; i < VFS_MAX_FD; i++) {
- struct v_fd* fd = __current->fdtable->fds[i];
- if (!fd)
- continue;
- vfs_dup_fd(fd, &pcb->fdtable->fds[i]);
- }
+ return 0;
}
-pid_t
-dup_proc()
+int
+spawn_process_usr(struct thread** created, char* path,
+ const char** argv, const char** envp)
{
- struct proc_info* pcb = alloc_process();
- pcb->intr_ctx = __current->intr_ctx;
- pcb->parent = __current;
+ // FIXME remote injection of user stack not yet implemented
- if (__current->cwd) {
- pcb->cwd = __current->cwd;
- vfs_ref_dnode(pcb->cwd);
- }
+ struct proc_info* proc = alloc_process();
+
+ assert(!kernel_process(proc));
- __copy_fdtable(pcb);
- region_copy_mm(&__current->mm, &pcb->mm);
+ procvm_init_clean(proc);
- /*
- * store the return value for forked process.
- * this will be implicit carried over after kernel stack is copied.
- */
- store_retval(0);
+ vmm_mount_pd(VMS_MOUNT_1, vmroot(proc));
- copy_kernel_stack(pcb, VMS_SELF);
+ int errno = 0;
+ struct thread* main_thread;
+ if (!(main_thread = create_thread(proc, VMS_MOUNT_1, true))) {
+ errno = ENOMEM;
+ goto fail;
+ }
- // 根据 mm_region 进一步配置页表
+ struct exec_container container;
+ exec_init_container(&container, main_thread, VMS_MOUNT_1, argv, envp);
+ if ((errno = exec_load_byname(&container, path))) {
+ goto fail;
+ }
- struct mm_region *pos, *n;
- llist_for_each(pos, n, &pcb->mm.regions, head)
- {
- // 如果写共享,则不作处理。
- if ((pos->attr & REGION_WSHARED)) {
- continue;
- }
+ commit_process(proc);
+ start_thread(main_thread, VMS_MOUNT_1, container.exe.entry);
- ptr_t start_vpn = pos->start >> 12;
- ptr_t end_vpn = pos->end >> 12;
- __mark_region(start_vpn, end_vpn, pos->attr);
+ if (created) {
+ *created = main_thread;
}
vmm_unmount_pd(VMS_MOUNT_1);
+ return 0;
- commit_process(pcb);
-
- return pcb->pid;
+fail:
+ vmm_unmount_pd(VMS_MOUNT_1);
+ delete_process(proc);
+ return errno;
}
-extern void __kexec_end;
-
-void
-copy_kernel_stack(struct proc_info* proc, ptr_t usedMnt)
-{
- // copy the entire kernel page table
- pid_t pid = proc->pid;
- ptr_t pt_copy = __dup_pagetable(pid, usedMnt);
-
- vmm_mount_pd(VMS_MOUNT_1, pt_copy); // 将新进程的页表挂载到挂载点#2
-
- // copy the kernel stack
- for (size_t i = KERNEL_STACK >> 12; i <= KERNEL_STACK_END >> 12; i++) {
- volatile x86_pte_t* ppte = &PTE_MOUNTED(VMS_MOUNT_1, i);
-
- /*
- This is a fucking nightmare, the TLB caching keep the rewrite to PTE
- from updating. Even the Nightmare Moon the Evil is far less nasty
- than this. It took me hours of debugging to figure this out.
-
- In the name of Celestia our glorious goddess, I will fucking HATE
- the TLB for the rest of my LIFE!
- */
- cpu_flush_page((ptr_t)ppte);
-
- x86_pte_t p = *ppte;
- ptr_t ppa = vmm_dup_page(pid, PG_ENTRY_ADDR(p));
- pmm_free_page(pid, PG_ENTRY_ADDR(p));
- *ppte = (p & 0xfff) | ppa;
- }
- proc->page_table = pt_copy;
+ptr_t proc_vmroot() {
+ return __current->mm->vmroot;
}
\ No newline at end of file
#include <sys/abi.h>
-#include <sys/interrupts.h>
#include <sys/mm/mempart.h>
#include <hal/intc.h>
#include <lunaix/mm/pmm.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/procvm.h>
#include <lunaix/process.h>
#include <lunaix/sched.h>
#include <lunaix/signal.h>
#include <lunaix/status.h>
#include <lunaix/syscall.h>
#include <lunaix/syslog.h>
+#include <lunaix/pcontext.h>
+#include <lunaix/kpreempt.h>
#include <klibc/string.h>
volatile struct proc_info* __current;
-
-static struct proc_info dummy_proc;
-
-struct proc_info dummy;
+volatile struct thread* current_thread;
struct scheduler sched_ctx;
-struct cake_pile* proc_pile;
+struct cake_pile *proc_pile ,*thread_pile;
LOG_MODULE("SCHED")
-void
-sched_init_dummy();
-
void
sched_init()
{
proc_pile = cake_new_pile("proc", sizeof(struct proc_info), 1, 0);
+ thread_pile = cake_new_pile("thread", sizeof(struct thread), 1, 0);
cake_set_constructor(proc_pile, cake_ctor_zeroing);
+ cake_set_constructor(thread_pile, cake_ctor_zeroing);
sched_ctx = (struct scheduler){
- ._procs = vzalloc(PROC_TABLE_SIZE), .ptable_len = 0, .procs_index = 0};
-
- // TODO initialize dummy_proc
- sched_init_dummy();
+ .procs = vzalloc(PROC_TABLE_SIZE), .ptable_len = 0, .procs_index = 0};
+
+ llist_init_head(&sched_ctx.sleepers);
}
-#define DUMMY_STACK_SIZE 2048
-
void
-sched_init_dummy()
+run(struct thread* thread)
{
- // This surely need to be simplified or encapsulated!
- // It is a living nightmare!
+ thread->state = PS_RUNNING;
+ thread->process->state = PS_RUNNING;
+ thread->process->th_active = thread;
- extern void my_dummy();
- static char dummy_stack[DUMMY_STACK_SIZE] __attribute__((aligned(16)));
+ set_current_executing(thread);
- ptr_t stktop = (ptr_t)dummy_stack + DUMMY_STACK_SIZE;
+ switch_context();
+ fail("unexpected return from switching");
+}
- dummy_proc = (struct proc_info){};
+/*
+ Currently, we do not allow self-destorying thread, doing
+ so will eliminate current kernel stack which is disaster.
+ A compromise solution is to perform a regular scan and
+ clean-up on these thread, in the preemptible kernel thread.
+*/
- proc_init_transfer(&dummy_proc, stktop, (ptr_t)my_dummy, TRANSFER_IE);
+void _preemptible
+cleanup_detached_threads() {
+ ensure_preempt_caller();
- dummy_proc.page_table = cpu_ldvmspace();
- dummy_proc.state = PS_READY;
- dummy_proc.parent = &dummy_proc;
- dummy_proc.pid = KERNEL_PID;
+ // XXX may be a lock on sched_context will ben the most appropriate?
+ cpu_disable_interrupt();
- __current = &dummy_proc;
-}
+ int i = 0;
+ struct thread *pos, *n;
+ llist_for_each(pos, n, sched_ctx.threads, sched_sibs) {
+ if (likely(!proc_terminated(pos) || !thread_detached(pos))) {
+ continue;
+ }
-void
-run(struct proc_info* proc)
-{
- proc->state = PS_RUNNING;
+ vmm_mount_pd(VMS_MOUNT_1, vmroot(pos->process));
+ destory_thread(VMS_MOUNT_1, pos);
+ vmm_unmount_pd(VMS_MOUNT_1);
+
+ i++;
+ }
- intc_notify_eos(0);
- switch_context(proc);
+ if (i) {
+ INFO("cleaned %d terminated detached thread(s)", i);
+ }
+
+ cpu_enable_interrupt();
}
int
-can_schedule(struct proc_info* proc)
+can_schedule(struct thread* thread)
{
- if (!proc) {
+ if (!thread) {
return 0;
}
- struct sighail* sh = &proc->sigctx;
+ if (unlikely(kernel_process(thread->process))) {
+ // a kernel process is always runnable
+ return thread->state == PS_READY;
+ }
+
+ struct sigctx* sh = &thread->sigctx;
- if ((proc->state & PS_PAUSED)) {
+ if ((thread->state & PS_PAUSED)) {
return !!(sh->sig_pending & ~1);
}
- if ((proc->state & PS_BLOCKED)) {
+ if ((thread->state & PS_BLOCKED)) {
return sigset_test(sh->sig_pending, _SIGINT);
}
- if (sigset_test(sh->sig_pending, _SIGCONT)) {
- sigset_clear(sh->sig_pending, _SIGSTOP);
- } else if (sigset_test(sh->sig_pending, _SIGSTOP)) {
- // 如果进程受到SIGSTOP,则该进程不给予调度。
+ if (sigset_test(sh->sig_pending, _SIGSTOP)) {
+ // If one thread is experiencing SIGSTOP, then we know
+ // all other threads are also SIGSTOP (as per POSIX-2008.1)
+ // In which case, the entire process is stopped.
+ thread->state = PS_STOPPED;
return 0;
}
+ if (sigset_test(sh->sig_pending, _SIGCONT)) {
+ thread->state = PS_READY;
+ }
- return (proc->state == PS_READY);
+ return (thread->state == PS_READY) \
+ && proc_runnable(thread->process);
}
void
check_sleepers()
{
- struct proc_info* leader = sched_ctx._procs[0];
- struct proc_info *pos, *n;
+ struct thread *pos, *n;
time_t now = clock_systime() / 1000;
- llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers)
+
+ llist_for_each(pos, n, &sched_ctx.sleepers, sleep.sleepers)
{
if (proc_terminated(pos)) {
goto del;
if (atime && now >= atime) {
pos->sleep.alarm_time = 0;
- proc_setsignal(pos, _SIGALRM);
+ thread_setsignal(pos, _SIGALRM);
}
if (!wtime && !atime) {
void
schedule()
{
- if (!sched_ctx.ptable_len) {
- return;
- }
+ assert(sched_ctx.ptable_len && sched_ctx.ttable_len);
// 上下文切换相当的敏感!我们不希望任何的中断打乱栈的顺序……
cpu_disable_interrupt();
- struct proc_info* next;
- int prev_ptr = sched_ctx.procs_index;
- int ptr = prev_ptr;
- int found = 0;
- if (!(__current->state & ~PS_RUNNING)) {
+ if (!(current_thread->state & ~PS_RUNNING)) {
+ current_thread->state = PS_READY;
__current->state = PS_READY;
}
check_sleepers();
// round-robin scheduler
+
+ struct thread* current = current_thread;
+ struct thread* to_check = current;
+
do {
- ptr = (ptr + 1) % sched_ctx.ptable_len;
- next = sched_ctx._procs[ptr];
+ to_check = list_next(to_check, struct thread, sched_sibs);
- if (!(found = can_schedule(next))) {
- if (ptr == prev_ptr) {
- next = &dummy_proc;
- goto done;
- }
+ if (can_schedule(to_check)) {
+ break;
+ }
+
+ if (to_check == current) {
+ // FIXME do something less leathal here
+ fail("Ran out of threads!")
+ goto done;
}
- } while (!found);
- sched_ctx.procs_index = ptr;
+ } while (1);
+
+ sched_ctx.procs_index = to_check->process->pid;
done:
- run(next);
+ intc_notify_eos(0);
+ run(to_check);
+
+ fail("unexpected return from scheduler");
}
void
-sched_yieldk()
+sched_pass()
{
cpu_enable_interrupt();
cpu_trap_sched();
}
time_t systime = clock_systime() / 1000;
+ struct haybed* bed = ¤t_thread->sleep;
- if (__current->sleep.wakeup_time) {
- return (__current->sleep.wakeup_time - systime);
+ if (bed->wakeup_time) {
+ return (bed->wakeup_time - systime);
}
- struct proc_info* root_proc = sched_ctx._procs[0];
- __current->sleep.wakeup_time = systime + seconds;
+ bed->wakeup_time = systime + seconds;
- if (llist_empty(&__current->sleep.sleepers)) {
- llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
+ if (llist_empty(&bed->sleepers)) {
+ llist_append(&sched_ctx.sleepers, &bed->sleepers);
}
store_retval(seconds);
- block_current();
+ block_current_thread();
schedule();
return 0;
__DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds)
{
- time_t prev_ddl = __current->sleep.alarm_time;
+ struct haybed* bed = ¤t_thread->sleep;
+ time_t prev_ddl = bed->alarm_time;
time_t now = clock_systime() / 1000;
- __current->sleep.alarm_time = seconds ? now + seconds : 0;
+ bed->alarm_time = seconds ? now + seconds : 0;
- struct proc_info* root_proc = sched_ctx._procs[0];
- if (llist_empty(&__current->sleep.sleepers)) {
- llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
+ struct proc_info* root_proc = sched_ctx.procs[0];
+ if (llist_empty(&bed->sleepers)) {
+ llist_append(&sched_ctx.sleepers, &bed->sleepers);
}
return prev_ddl ? (prev_ddl - now) : 0;
__DEFINE_LXSYSCALL1(void, exit, int, status)
{
- terminate_proc(status);
+ terminate_current(status);
schedule();
}
__DEFINE_LXSYSCALL(int, geterrno)
{
- return __current->k_status;
+ return current_thread->syscall_ret;
}
pid_t
return 0;
}
// 放弃当前的运行机会
- sched_yieldk();
+ sched_pass();
goto repeat;
done:
return destroy_process(proc->pid);
}
-struct proc_info*
-alloc_process()
-{
+static inline pid_t
+get_free_pid() {
pid_t i = 0;
- for (; i < sched_ctx.ptable_len && sched_ctx._procs[i]; i++)
+
+ for (; i < sched_ctx.ptable_len && sched_ctx.procs[i]; i++)
;
-
- if (i == MAX_PROCESS) {
+
+ if (unlikely(i == MAX_PROCESS)) {
panick("Panic in Ponyville shimmer!");
}
+ return i;
+}
+
+struct thread*
+alloc_thread(struct proc_info* process) {
+ if (process->thread_count >= MAX_THREAD_PP) {
+ return NULL;
+ }
+
+ struct thread* th = cake_grab(thread_pile);
+
+ th->process = process;
+ th->created = clock_systime();
+
+ // FIXME we need a better tid allocation method!
+ th->tid = th->created;
+ th->tid = (th->created ^ ((ptr_t)th)) % MAX_THREAD_PP;
+
+ th->state = PS_CREATED;
+
+ llist_init_head(&th->sleep.sleepers);
+ llist_init_head(&th->sched_sibs);
+ llist_init_head(&th->proc_sibs);
+ waitq_init(&th->waitqueue);
+
+ return th;
+}
+
+struct proc_info*
+alloc_process()
+{
+ pid_t i = get_free_pid();
+
if (i == sched_ctx.ptable_len) {
sched_ctx.ptable_len++;
}
struct proc_info* proc = cake_grab(proc_pile);
+ if (!proc) {
+ return NULL;
+ }
proc->state = PS_CREATED;
proc->pid = i;
- proc->mm.pid = i;
proc->created = clock_systime();
proc->pgid = proc->pid;
+
+ proc->sigreg = vzalloc(sizeof(struct sigregister));
proc->fdtable = vzalloc(sizeof(struct v_fdtable));
- llist_init_head(&proc->mm.regions);
+ proc->mm = procvm_create(proc);
+
llist_init_head(&proc->tasks);
llist_init_head(&proc->children);
llist_init_head(&proc->grp_member);
- llist_init_head(&proc->sleep.sleepers);
+ llist_init_head(&proc->threads);
iopoll_init(&proc->pollctx);
- waitq_init(&proc->waitqueue);
- sched_ctx._procs[i] = proc;
+ sched_ctx.procs[i] = proc;
return proc;
}
void
-commit_process(struct proc_info* process)
-{
- assert(process == sched_ctx._procs[process->pid]);
+commit_thread(struct thread* thread) {
+ struct proc_info* process = thread->process;
- if (process->state != PS_CREATED) {
- __current->k_status = EINVAL;
- return;
+ assert(process && !proc_terminated(process));
+
+ llist_append(&process->threads, &thread->proc_sibs);
+
+ if (sched_ctx.threads) {
+ llist_append(sched_ctx.threads, &thread->sched_sibs);
+ } else {
+ sched_ctx.threads = &thread->sched_sibs;
}
+ sched_ctx.ttable_len++;
+ process->thread_count++;
+ thread->state = PS_READY;
+}
+
+void
+commit_process(struct proc_info* process)
+{
+ assert(process == sched_ctx.procs[process->pid]);
+ assert(process->state == PS_CREATED);
+
// every process is the child of first process (pid=1)
if (!process->parent) {
- process->parent = sched_ctx._procs[1];
+ if (likely(!kernel_process(process))) {
+ process->parent = sched_ctx.procs[1];
+ } else {
+ process->parent = process;
+ }
+ } else {
+ assert(!proc_terminated(process->parent));
+ }
+
+ if (sched_ctx.proc_list) {
+ llist_append(sched_ctx.proc_list, &process->tasks);
+ } else {
+ sched_ctx.proc_list = &process->tasks;
}
llist_append(&process->parent->children, &process->siblings);
- llist_append(&sched_ctx._procs[0]->tasks, &process->tasks);
process->state = PS_READY;
}
-// from <kernel/process.c>
-extern void
-__del_pagetable(pid_t pid, ptr_t mount_point);
+void
+destory_thread(ptr_t vm_mnt, struct thread* thread)
+{
+ cake_ensure_valid(thread);
+
+ struct proc_info* proc = thread->process;
-pid_t
-destroy_process(pid_t pid)
+ llist_delete(&thread->sched_sibs);
+ llist_delete(&thread->proc_sibs);
+ llist_delete(&thread->sleep.sleepers);
+ waitq_cancel_wait(&thread->waitqueue);
+
+ thread_release_mem(thread, vm_mnt);
+
+ proc->thread_count--;
+ sched_ctx.ttable_len--;
+
+ cake_release(thread_pile, thread);
+}
+
+void
+delete_process(struct proc_info* proc)
{
- int index = pid;
- if (index <= 0 || index > sched_ctx.ptable_len) {
- __current->k_status = EINVAL;
- return -1;
- }
+ pid_t pid = proc->pid;
- struct proc_info* proc = sched_ctx._procs[index];
- sched_ctx._procs[index] = 0;
+ assert(pid); // long live the pid0 !!
+
+ sched_ctx.procs[pid] = NULL;
llist_delete(&proc->siblings);
llist_delete(&proc->grp_member);
llist_delete(&proc->tasks);
- llist_delete(&proc->sleep.sleepers);
- iopoll_free(pid, &proc->pollctx);
+ iopoll_free(proc);
taskfs_invalidate(pid);
vfs_unref_dnode(proc->cwd);
}
+ if (proc->cmd) {
+ vfree(proc->cmd);
+ }
+
for (size_t i = 0; i < VFS_MAX_FD; i++) {
struct v_fd* fd = proc->fdtable->fds[i];
if (fd) {
vfree(proc->fdtable);
- vmm_mount_pd(VMS_MOUNT_1, proc->page_table);
+ signal_free_registers(proc->sigreg);
- struct mm_region *pos, *n;
- llist_for_each(pos, n, &proc->mm.regions, head)
- {
- mem_sync_pages(VMS_MOUNT_1, pos, pos->start, pos->end - pos->start, 0);
- region_release(pos);
+ vmm_mount_pd(VMS_MOUNT_1, vmroot(proc));
+
+ struct thread *pos, *n;
+ llist_for_each(pos, n, &proc->threads, proc_sibs) {
+ // terminate and destory all thread unconditionally
+ destory_thread(VMS_MOUNT_1, pos);
}
- __del_pagetable(pid, VMS_MOUNT_1);
+ procvm_cleanup(VMS_MOUNT_1, proc);
vmm_unmount_pd(VMS_MOUNT_1);
cake_release(proc_pile, proc);
+}
+
+pid_t
+destroy_process(pid_t pid)
+{
+ int index = pid;
+ if (index <= 0 || index > sched_ctx.ptable_len) {
+ syscall_result(EINVAL);
+ return -1;
+ }
+
+ struct proc_info* proc = sched_ctx.procs[index];
+ delete_process(proc);
return pid;
}
+static void
+terminate_proc_only(struct proc_info* proc, int exit_code) {
+ proc->state = PS_TERMNAT;
+ proc->exit_code = exit_code;
+
+ proc_setsignal(proc->parent, _SIGCHLD);
+}
+
void
-terminate_proc(int exit_code)
-{
- __current->state = PS_TERMNAT;
- __current->exit_code = exit_code;
+terminate_thread(struct thread* thread, ptr_t val) {
+ thread->exit_val = val;
+ thread->state = PS_TERMNAT;
+
+ struct proc_info* proc = thread->process;
+ if (proc->thread_count == 1) {
+ terminate_proc_only(thread->process, 0);
+ }
+}
+
+void
+terminate_current_thread(ptr_t val) {
+ terminate_thread(current_thread, val);
+}
+
+void
+terminate_proccess(struct proc_info* proc, int exit_code) {
+ assert(!kernel_process(proc));
+
+ if (proc->pid == 1) {
+ panick("Attempt to kill init");
+ }
+
+ terminate_proc_only(proc, exit_code);
- proc_setsignal(__current->parent, _SIGCHLD);
+ struct thread *pos, *n;
+ llist_for_each(pos, n, &__current->threads, proc_sibs) {
+ pos->state = PS_TERMNAT;
+ }
+}
+
+void
+terminate_current(int exit_code)
+{
+ terminate_proccess(__current, exit_code);
}
struct proc_info*
if (index < 0 || index > sched_ctx.ptable_len) {
return NULL;
}
- return sched_ctx._procs[index];
+ return sched_ctx.procs[index];
}
int
return 0;
if (pid >= sched_ctx.ptable_len)
return 0;
- struct proc_info* proc = sched_ctx._procs[pid];
+ struct proc_info* proc = sched_ctx.procs[pid];
struct proc_info* parent = proc->parent;
// 如果其父进程的状态是terminated 或 destroy中的一种
#include <lunaix/status.h>
#include <lunaix/syscall.h>
#include <lunaix/syslog.h>
+#include <lunaix/mm/valloc.h>
#include <klibc/string.h>
#include <sys/mm/mempart.h>
+// FIXME issues with signal
+
LOG_MODULE("SIG")
extern struct scheduler sched_ctx; /* kernel/sched.c */
-#define UNMASKABLE (sigset(SIGKILL) | sigset(SIGTERM))
+#define UNMASKABLE (sigset(SIGKILL) | sigset(SIGTERM) | sigset(SIGILL))
#define TERMSIG (sigset(SIGSEGV) | sigset(SIGINT) | UNMASKABLE)
#define CORE (sigset(SIGSEGV))
#define within_kstack(addr) \
- (KERNEL_STACK <= (addr) && (addr) <= KERNEL_STACK_END)
+ (KSTACK_AREA <= (addr) && (addr) <= KSTACK_AREA_END)
static inline void
-signal_terminate(int errcode)
+signal_terminate(int caused_by)
{
- terminate_proc(errcode | PEXITSIG);
+ terminate_current(caused_by | PEXITSIG);
}
// Referenced in kernel/asm/x86/interrupt.S
void*
signal_dispatch()
{
- if (!__current->sigctx.sig_pending) {
+ if (kernel_process(__current)) {
+ // signal is undefined under 'kernel process'
+ return 0;
+ }
+
+ if (!pending_sigs(current_thread)) {
// 没有待处理信号
return 0;
}
- struct sighail* psig = &__current->sigctx;
- struct sigact* prev_working = psig->inprogress;
+ struct sigregister* sigreg = __current->sigreg;
+ struct sigctx* psig = ¤t_thread->sigctx;
+ struct sigact* prev_working = active_signal(current_thread);
sigset_t mask = psig->sig_mask | (prev_working ? prev_working->sa_mask : 0);
int sig_selected = 31 - clz(psig->sig_pending & ~mask);
-
sigset_clear(psig->sig_pending, sig_selected);
- struct sigact* action = &psig->signals[sig_selected];
-
- if (sig_selected == 0) {
+ if (!sig_selected) {
// SIG0 is reserved
return 0;
}
- if (!action->sa_actor) {
+ struct sigact* action = sigreg->signals[sig_selected];
+ if (!action || !action->sa_actor) {
if (sigset_test(TERMSIG, sig_selected)) {
signal_terminate(sig_selected);
schedule();
return 0;
}
- ptr_t ustack = __current->ustack_top;
-
- if ((int)(ustack - USR_STACK) < (int)sizeof(struct proc_sig)) {
+ ptr_t ustack = current_thread->ustack_top;
+ ptr_t ustack_start = current_thread->ustack->start;
+ if ((int)(ustack - ustack_start) < (int)sizeof(struct proc_sig)) {
// 用户栈没有空间存放信号上下文
return 0;
}
sigframe->sigact = action->sa_actor;
sigframe->sighand = action->sa_handler;
- sigframe->saved_ictx = __current->intr_ctx;
+ sigframe->saved_ictx = current_thread->intr_ctx;
- action->prev = prev_working;
- psig->inprogress = action;
+ sigactive_push(current_thread, sig_selected);
return sigframe;
}
+static inline void must_inline
+__set_signal(struct thread* thread, signum_t signum)
+{
+ raise_signal(thread, signum);
+
+ // for these mutually exclusive signal
+ if (signum == SIGCONT || signum == SIGSTOP) {
+ sigset_clear(thread->sigctx.sig_pending, signum ^ 1);
+ }
+
+ struct sigact* sig = sigact_of(thread->process, signum);
+ if (sig) {
+ sig->sender = __current->pid;
+ }
+}
+
+static inline void must_inline
+__set_signal_all_threads(struct proc_info* proc, signum_t signum)
+{
+ struct thread *pos, *n;
+ llist_for_each(pos, n, &proc->threads, proc_sibs) {
+ __set_signal(pos, signum);
+ }
+}
+
void
-proc_clear_signal(struct proc_info* proc)
+thread_setsignal(struct thread* thread, signum_t signum)
{
- memset(&proc->sigctx, 0, sizeof(proc->sigctx));
+ if (unlikely(kernel_process(thread->process))) {
+ return;
+ }
+
+ __set_signal(thread, signum);
}
void
-proc_setsignal(struct proc_info* proc, int signum)
+proc_setsignal(struct proc_info* proc, signum_t signum)
{
- sigset_add(proc->sigctx.sig_pending, signum);
- proc->sigctx.signals[signum].sender = __current->pid;
+ if (unlikely(kernel_process(proc))) {
+ return;
+ }
+
+ // FIXME handle signal delivery at process level.
+ switch (signum)
+ {
+ case SIGKILL:
+ signal_terminate(signum);
+ break;
+ case SIGCONT:
+ case SIGSTOP:
+ __set_signal_all_threads(proc, signum);
+ default:
+ break;
+ }
+
+ __set_signal(proc->th_active, signum);
}
int
-signal_send(pid_t pid, int signum)
+signal_send(pid_t pid, signum_t signum)
{
- if (signum < 0 || signum >= _SIG_NUM) {
- __current->k_status = EINVAL;
+ if (signum >= _SIG_NUM) {
+ syscall_result(EINVAL);
return -1;
}
} else {
// TODO: send to all process.
// But I don't want to support it yet.
- __current->k_status = EINVAL;
+ syscall_result(EINVAL);
return -1;
}
struct proc_info *pos, *n;
llist_for_each(pos, n, &proc->grp_member, grp_member)
{
- struct sighail* sh = &pos->sigctx;
- sigset_add(sh->sig_pending, signum);
- sh->signals[signum].sender = sender_pid;
+ proc_setsignal(pos, signum);
}
send_single:
if (proc_terminated(proc)) {
- __current->k_status = EINVAL;
+ syscall_result(EINVAL);
return -1;
}
- sigset_add(proc->sigctx.sig_pending, signum);
- proc->sigctx.signals[signum].sender = sender_pid;
+ proc_setsignal(proc, signum);
return 0;
}
-__DEFINE_LXSYSCALL1(int, sigreturn, struct proc_sig, *sig_ctx)
+void
+signal_dup_context(struct sigctx* dest_ctx)
{
- __current->intr_ctx = sig_ctx->saved_ictx;
+ struct sigctx* old_ctx = ¤t_thread->sigctx;
+ memcpy(dest_ctx, old_ctx, sizeof(struct sigctx));
+}
+
+void
+signal_dup_registers(struct sigregister* dest_reg)
+{
+ struct sigregister* oldreg = __current->sigreg;
+ for (int i = 0; i < _SIG_NUM; i++) {
+ struct sigact* oldact = oldreg->signals[i];
+ if (!oldact) {
+ continue;
+ }
+
+ struct sigact* newact = valloc(sizeof(struct sigact));
+ memcpy(newact, oldact, sizeof(struct sigact));
+
+ dest_reg->signals[i] = newact;
+ }
+}
- struct sigact* current = __current->sigctx.inprogress;
- if (current) {
- __current->sigctx.inprogress = current->prev;
- current->prev = NULL;
+void
+signal_reset_context(struct sigctx* sigctx) {
+ memset(sigctx, 0, sizeof(struct sigctx));
+}
+
+void
+signal_reset_register(struct sigregister* sigreg) {
+ for (int i = 0; i < _SIG_NUM; i++) {
+ struct sigact* act = sigreg->signals[i];
+ if (act) {
+ vfree(act);
+ sigreg->signals[i] = NULL;
+ }
+ }
+}
+
+void
+signal_free_registers(struct sigregister* sigreg) {
+ signal_reset_register(sigreg);
+ vfree(sigreg);
+}
+
+static bool
+signal_set_sigmask(struct thread* thread, int how, sigset_t* oldset, sigset_t* set)
+{
+ struct sigctx* sh = ¤t_thread->sigctx;
+ *oldset = sh->sig_mask;
+
+ if (how == _SIG_BLOCK) {
+ sigset_union(sh->sig_mask, *set);
+ } else if (how == _SIG_UNBLOCK) {
+ sigset_intersect(sh->sig_mask, ~(*set));
+ } else if (how == _SIG_SETMASK) {
+ sh->sig_mask = *set;
} else {
- __current->sigctx.inprogress = NULL;
+ return false;
}
+ sigset_intersect(sh->sig_mask, ~UNMASKABLE);
+ return true;
+}
+
+__DEFINE_LXSYSCALL1(int, sigreturn, struct proc_sig, *sig_ctx)
+{
+ struct sigctx* sigctx = ¤t_thread->sigctx;
+ struct sigact* active = active_signal(current_thread);
+
+ /* We choose signal#0 as our base case, that is sig#0 means no signal.
+ Therefore, it is an ill situation to return from such sigctx.
+ */
+ if (!active) {
+ signal_terminate(SIGSEGV);
+ schedule();
+ }
+
+ current_thread->intr_ctx = sig_ctx->saved_ictx;
if (proc_terminated(__current)) {
__current->exit_code |= PEXITSIG;
} else if (sigset_test(CORE, sig_ctx->sig_num)) {
signal_terminate(sig_ctx->sig_num);
}
- ptr_t ictx = (ptr_t)__current->intr_ctx;
+ ptr_t ictx = (ptr_t)current_thread->intr_ctx;
/*
Ensure our restored context is within kernel stack
signal_terminate(SIGSEGV);
}
+ sigactive_pop(current_thread);
+
schedule();
// never reach!
__DEFINE_LXSYSCALL3(
int, sigprocmask, int, how, const sigset_t, *set, sigset_t, *oldset)
{
- struct sighail* sh = &__current->sigctx;
- *oldset = sh->sig_mask;
+ // TODO maybe it is a good opportunity to introduce a process-wide
+ // signal mask?
+
+ if (signal_set_sigmask(current_thread, how, oldset, set)) {
+ return 0;
+ }
- if (how == _SIG_BLOCK) {
- sigset_union(sh->sig_mask, *set);
- } else if (how == _SIG_UNBLOCK) {
- sigset_intersect(sh->sig_mask, ~(*set));
- } else if (how == _SIG_SETMASK) {
- sh->sig_mask = *set;
- } else {
+ syscall_result(EINVAL);
+ return -1;
+}
+
+__DEFINE_LXSYSCALL3(
+ int, th_sigmask, int, how, const sigset_t, *set, sigset_t, *oldset)
+{
+ if (signal_set_sigmask(current_thread, how, oldset, set)) {
return 0;
}
- sigset_intersect(sh->sig_mask, ~UNMASKABLE);
- return 1;
+ return EINVAL;
}
__DEFINE_LXSYSCALL2(int, sys_sigaction, int, signum, struct sigaction*, action)
return -1;
}
- struct sigact* sa = &__current->sigctx.signals[signum];
+ struct sigctx* sigctx = ¤t_thread->sigctx;
+ if (signum == sigctx->sig_active) {
+ return -1;
+ }
+
+ struct sigact* sa = sigact_of(__current, signum);
+
+ if (!sa) {
+ sa = vzalloc(sizeof(struct sigact));
+ set_sigact(__current, signum, sa);
+ }
sa->sa_actor = (void*)action->sa_sigaction;
sa->sa_handler = (void*)action->sa_handler;
__DEFINE_LXSYSCALL(int, pause)
{
- pause_current();
- sched_yieldk();
+ pause_current_thread();
+ sched_pass();
- __current->k_status = EINTR;
+ syscall_result(EINTR);
return -1;
}
__DEFINE_LXSYSCALL1(int, sigpending, sigset_t, *sigset)
{
- *sigset = __current->sigctx.sig_pending;
+ *sigset = pending_sigs(current_thread);
return 0;
}
__DEFINE_LXSYSCALL1(int, sigsuspend, sigset_t, *mask)
{
- sigset_t tmp = __current->sigctx.sig_mask;
- __current->sigctx.sig_mask = (*mask) & ~UNMASKABLE;
+ struct sigctx* sigctx = ¤t_thread->sigctx;
+ sigset_t tmp = current_thread->sigctx.sig_mask;
+ sigctx->sig_mask = (*mask) & ~UNMASKABLE;
- pause_current();
- sched_yieldk();
+ pause_current_thread();
+ sched_pass();
- __current->sigctx.sig_mask = tmp;
+ sigctx->sig_mask = tmp;
return -1;
}
\ No newline at end of file
#include <lunaix/fs/taskfs.h>
#include <lunaix/process.h>
-void
-__read_pending_sig(struct twimap* map)
-{
- struct proc_info* proc = twimap_data(map, struct proc_info*);
- twimap_printf(map, "%bb", proc->sigctx.sig_pending);
-}
-
-void
-__read_masked_sig(struct twimap* map)
-{
- struct proc_info* proc = twimap_data(map, struct proc_info*);
- twimap_printf(map, "%bb", proc->sigctx.sig_mask);
-}
-
void
__read_parent(struct twimap* map)
{
export_task_attr()
{
struct twimap* map;
- map = twimap_create(NULL);
- map->read = __read_pending_sig;
- taskfs_export_attr("sig_pending", map);
+
+ // FIXME goes to thread specific location
+ // map = twimap_create(NULL);
+ // map->read = __read_pending_sig;
+ // taskfs_export_attr("sig_pending", map);
- map = twimap_create(NULL);
- map->read = __read_masked_sig;
- taskfs_export_attr("sig_masked", map);
+ // map = twimap_create(NULL);
+ // map->read = __read_masked_sig;
+ // taskfs_export_attr("sig_masked", map);
map = twimap_create(NULL);
map->read = __read_parent;
--- /dev/null
+#include <lunaix/process.h>
+#include <lunaix/sched.h>
+#include <lunaix/syscall.h>
+#include <lunaix/syscall_utils.h>
+#include <lunaix/mm/mmap.h>
+#include <lunaix/mm/page.h>
+#include <lunaix/mm/vmm.h>
+#include <lunaix/mm/pmm.h>
+#include <lunaix/syslog.h>
+
+#include <usr/lunaix/threads.h>
+
+#include <sys/abi.h>
+#include <sys/mm/mm_defs.h>
+
+LOG_MODULE("THREAD")
+
+static inline void
+inject_guardian_page(ptr_t vm_mnt, ptr_t va)
+{
+ vmm_set_mapping(vm_mnt, PG_ALIGN(va), 0, 0, VMAP_GUARDPAGE);
+}
+
+static ptr_t
+__alloc_user_thread_stack(struct proc_info* proc, struct mm_region** stack_region, ptr_t vm_mnt)
+{
+ ptr_t th_stack_top = (proc->thread_count + 1) * USR_STACK_SIZE;
+ th_stack_top = ROUNDUP(USR_STACK_END - th_stack_top, MEM_PAGE);
+
+ struct mm_region* vmr;
+ struct proc_mm* mm = vmspace(proc);
+ struct mmap_param param = { .vms_mnt = vm_mnt,
+ .pvms = mm,
+ .mlen = USR_STACK_SIZE,
+ .proct = PROT_READ | PROT_WRITE,
+ .flags = MAP_ANON | MAP_PRIVATE,
+ .type = REGION_TYPE_STACK };
+
+ int errno = mmap_user((void**)&th_stack_top, &vmr, th_stack_top, NULL, ¶m);
+
+ if (errno) {
+ WARN("failed to create user thread stack: %d", errno);
+ return 0;
+ }
+
+ // Pre-allocate a page contains stack top, to avoid immediate trap to kernel
+ // upon thread execution
+ ptr_t pa = pmm_alloc_page(0);
+ ptr_t stack_top = align_stack(th_stack_top + USR_STACK_SIZE - 1);
+ if (likely(pa)) {
+ vmm_set_mapping(vm_mnt, PG_ALIGN(stack_top),
+ pa, region_ptattr(vmr), 0);
+ }
+
+ inject_guardian_page(vm_mnt, vmr->start);
+
+ *stack_region = vmr;
+
+ return stack_top;
+}
+
+static ptr_t
+__alloc_kernel_thread_stack(struct proc_info* proc, ptr_t vm_mnt)
+{
+ v_mapping mapping;
+ ptr_t kstack = PG_ALIGN(KSTACK_AREA_END - KSTACK_SIZE);
+ while (kstack >= KSTACK_AREA) {
+ // first page in the kernel stack is guardian page
+ if (!vmm_lookupat(vm_mnt, kstack + MEM_PAGE, &mapping)
+ || !PG_IS_PRESENT(mapping.flags))
+ {
+ break;
+ }
+
+ kstack -= KSTACK_SIZE;
+ }
+
+ if (kstack < KSTACK_AREA) {
+ WARN("failed to create kernel stack: max stack num reach\n");
+ return 0;
+ }
+
+ ptr_t pa = pmm_alloc_cpage(PN(KSTACK_SIZE) - 1, 0);
+
+ if (!pa) {
+ WARN("failed to create kernel stack: nomem\n");
+ return 0;
+ }
+
+ inject_guardian_page(vm_mnt, kstack);
+ for (size_t i = MEM_PAGE, j = 0; i < KSTACK_SIZE; i+=MEM_PAGE, j+=MEM_PAGE) {
+ vmm_set_mapping(vm_mnt, kstack + i, pa + j, PG_PREM_RW, 0);
+ }
+
+ return align_stack(kstack + KSTACK_SIZE - 1);
+}
+
+void
+thread_release_mem(struct thread* thread, ptr_t vm_mnt)
+{
+ for (size_t i = 0; i < KSTACK_SIZE; i+=MEM_PAGE) {
+ ptr_t stack_page = PG_ALIGN(thread->kstack - i);
+ vmm_del_mapping(vm_mnt, stack_page);
+ }
+
+ if (thread->ustack) {
+ if ((thread->ustack->start & 0xfff)) {
+ fail("invalid ustack struct");
+ }
+ mem_unmap_region(vm_mnt, thread->ustack);
+ }
+}
+
+struct thread*
+create_thread(struct proc_info* proc, ptr_t vm_mnt, bool with_ustack)
+{
+ struct mm_region* ustack_region = NULL;
+ if (with_ustack &&
+ !(__alloc_user_thread_stack(proc, &ustack_region, vm_mnt)))
+ {
+ return NULL;
+ }
+
+ ptr_t kstack = __alloc_kernel_thread_stack(proc, vm_mnt);
+ if (!kstack) {
+ mem_unmap_region(vm_mnt, ustack_region);
+ return NULL;
+ }
+
+ struct thread* th = alloc_thread(proc);
+ if (!th) {
+ return NULL;
+ }
+
+ th->kstack = kstack;
+ th->ustack = ustack_region;
+
+ return th;
+}
+
+void
+start_thread(struct thread* th, ptr_t vm_mnt, ptr_t entry)
+{
+ assert(th && entry);
+
+ struct transfer_context transfer;
+ if (!kernel_addr(entry)) {
+ assert(th->ustack);
+
+ ptr_t ustack_top = align_stack(th->ustack->end - 1);
+ ustack_top -= 16; // pre_allocate a 16 byte for inject parameter
+ thread_create_user_transfer(&transfer, th->kstack, ustack_top, entry);
+
+ th->ustack_top = ustack_top;
+ }
+ else {
+ thread_create_kernel_transfer(&transfer, th->kstack, entry);
+ }
+
+ inject_transfer_context(vm_mnt, &transfer);
+ th->intr_ctx = (isr_param*)transfer.inject;
+
+ commit_thread(th);
+}
+
+void
+exit_thread(void* val) {
+ terminate_current_thread((ptr_t)val);
+ schedule();
+}
+
+struct thread*
+thread_find(struct proc_info* proc, tid_t tid)
+{
+ struct thread *pos, *n;
+ llist_for_each(pos, n, &proc->threads, proc_sibs) {
+ if (pos->tid == tid) {
+ return pos;
+ }
+ }
+
+ return NULL;
+}
+
+__DEFINE_LXSYSCALL4(int, th_create, tid_t*, tid, struct uthread_info*, thinfo,
+ void*, entry, void*, param)
+{
+ struct thread* th = create_thread(__current, VMS_SELF, true);
+ if (!th) {
+ return EAGAIN;
+ }
+
+ start_thread(th, VMS_SELF, (ptr_t)entry);
+
+ ptr_t ustack_top = th->ustack_top;
+ *((void**)ustack_top) = param;
+
+ thinfo->th_stack_sz = region_size(th->ustack);
+ thinfo->th_stack_top = (void*)ustack_top;
+
+ if (tid) {
+ *tid = th->tid;
+ }
+
+ return 0;
+}
+
+__DEFINE_LXSYSCALL(tid_t, th_self)
+{
+ return current_thread->tid;
+}
+
+__DEFINE_LXSYSCALL1(void, th_exit, void*, val)
+{
+ exit_thread(val);
+}
+
+__DEFINE_LXSYSCALL2(int, th_join, tid_t, tid, void**, val_ptr)
+{
+ struct thread* th = thread_find(__current, tid);
+ if (!th) {
+ return EINVAL;
+ }
+
+ if (th == current_thread) {
+ return EDEADLK;
+ }
+
+ while (!proc_terminated(th)) {
+ sched_pass();
+ }
+
+ if (val_ptr) {
+ *val_ptr = (void*)th->exit_val;
+ }
+
+ destory_thread(VMS_SELF, th);
+
+ return 0;
+}
+
+__DEFINE_LXSYSCALL1(int, th_detach, tid_t, tid)
+{
+ // can not detach the only thread
+ if (__current->thread_count == 1) {
+ return EINVAL;
+ }
+
+ struct thread* th = thread_find(__current, tid);
+ if (!th) {
+ return EINVAL;
+ }
+
+ detach_thread(th);
+ return 0;
+}
+
+__DEFINE_LXSYSCALL2(int, th_kill, tid_t, tid, int, signum)
+{
+ struct thread* target = thread_find(__current, tid);
+ if (!target) {
+ return EINVAL;
+ }
+
+ if (signum > _SIG_NUM) {
+ return EINVAL;
+ }
+
+ if (signum) {
+ thread_setsignal(target, signum);
+ }
+
+ return 0;
+}
#include <klibc/strfmt.h>
#include <lunaix/spike.h>
-#include <sys/interrupts.h>
+#include <lunaix/pcontext.h>
static char buffer[1024];
* @copyright Copyright (c) 2022
*
*/
-#include <sys/interrupts.h>
#include <lunaix/mm/cake.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
#include <lunaix/syslog.h>
#include <lunaix/timer.h>
+#include <lunaix/pcontext.h>
#include <hal/hwtimer.h>
dest[i] = c;
i++;
}
- dest[i] = '\0';
- return dest;
+ dest[i++] = '\0';
+ return &dest[i];
}
char*
l++;
}
- if (!l)
- return str;
- return strcpy(str, str + l);
+ if (l)
+ strcpy(str, str + l);
+ return str;
}
\ No newline at end of file
. = 0x100000;
/* 这里是我们的高半核初始化代码段和数据段 */
- .boot.text BLOCK(4K) : {
+ .boot.text BLOCK(4K) :
+ {
*(.multiboot)
*(.boot.text)
}
- .boot.bss BLOCK(4K) : {
+ .boot.bss BLOCK(4K) :
+ {
*(.boot.bss)
}
- .boot.data BLOCK(4K) : {
+ .boot.data BLOCK(4K) :
+ {
*(.boot.data)
}
- .boot.rodata BLOCK(4K) : {
+ .boot.rodata BLOCK(4K) :
+ {
*(.boot.rodata)
}
- .boot.bss BLOCK(4K) : {
+ .boot.bss BLOCK(4K) :
+ {
*(.boot.rodata)
}
__kexec_boot_end = ALIGN(4K);
. += 0xC0000000;
/* 好了,我们的内核…… */
- .text BLOCK(4K) : AT ( ADDR(.text) - 0xC0000000 ) {
+
+ PROVIDE(__kexec_text_start = ALIGN(4K));
+
+ .text BLOCK(4K) : AT ( ADDR(.text) - 0xC0000000 )
+ {
__kexec_start = .;
- PROVIDE(__kexec_text_start = .);
*(.text)
+ }
+
+ .kf.preempt BLOCK(4K) : AT ( ADDR(.kf.preempt) - 0xC0000000 )
+ {
+ PROVIDE(__kf_preempt_start = .);
+
+ KEEP(*(.kf.preempt));
- PROVIDE(__kexec_text_end = .);
+ PROVIDE(__kf_preempt_end = .);
}
- .data BLOCK(4K) : AT ( ADDR(.data) - 0xC0000000 ) {
+ PROVIDE(__kexec_text_end = .);
+
+ .data BLOCK(4K) : AT ( ADDR(.data) - 0xC0000000 )
+ {
*(.data)
}
- .rodata BLOCK(4K) : AT ( ADDR(.rodata) - 0xC0000000 ) {
+ .rodata BLOCK(4K) : AT ( ADDR(.rodata) - 0xC0000000 )
+ {
*(.rodata)
}
- .kpg BLOCK(4K) : AT ( ADDR(.kpg) - 0xC0000000 ) {
+ .kpg BLOCK(4K) : AT ( ADDR(.kpg) - 0xC0000000 )
+ {
*(.kpg)
}
/* for generated array, we align to address line size */
- .lga BLOCK(4K) : AT ( ADDR(.lga) - 0xC0000000 ) {
+ .lga BLOCK(4K) : AT ( ADDR(.lga) - 0xC0000000 )
+ {
PROVIDE(__lga_twiplugin_inits_start = .);
KEEP(*(.lga.twiplugin_inits));
}
- .bss BLOCK(4K) : AT ( ADDR(.bss) - 0xC0000000 ) {
+ .bss BLOCK(4K) : AT ( ADDR(.bss) - 0xC0000000 )
+ {
*(.bss)
}
-rtc base=utc \
-no-reboot \
-machine q35 \
+ -cpu pentium3,rdrand \
-no-shutdown \
-d cpu_reset \
-d trace:ide_dma_cb \
-vga std,retrace=precise \
- -serial telnet::12345,server,nowait\
+ -serial telnet::12345,server,nowait,logfile=lunaix_ttyS0.log\
-drive id=cdrom,file="$(1)",readonly=on,if=none,format=raw \
-device ahci,id=ahci \
-device ide-cd,drive=cdrom,bus=ahci.0 \
self.mapping[src] = dest.strip()
- def render(self):
+ def render(self, selected = []):
for k, v in self.mapping.items():
src: Path = self.tbase_path.joinpath(k)
dest: Path = self.pbase_path.joinpath(v)
+ if (k not in selected):
+ continue
+
if not src.is_file():
continue
def main():
parser = argparse.ArgumentParser()
+ parser.add_argument("selects", nargs="*")
parser.add_argument("--arch", default='i386')
parser.add_argument("-twd", "--template_dir", default=str(Path.cwd()))
parser.add_argument("-pwd", "--project_dir", default=str(Path.cwd()))
expander = TemplateExpander(Path(args.template_dir), Path(args.project_dir), args.arch)
- expander.render()
+ expander.render(args.selects)
# pprint.pprint(expander.data)
if __name__ == "__main__":
-from .commands import load_commands
+import os
-load_commands()
\ No newline at end of file
+if "LUNADBG_ARCH" not in os.environ:
+ os.environ["LUNADBG_ARCH"] = "x86_32"
+
+from .region_dump import MemoryRegionDump
+from .sched_dump import ProcessDump, SchedulerDump
+from .mem import MMStats
+from .syslog import SysLogDump
+
+MemoryRegionDump()
+SchedulerDump()
+ProcessDump()
+SysLogDump()
+MMStats()
--- /dev/null
+import os
+
+if os.environ["LUNADBG_ARCH"].startswith("x86_"):
+ from .x86 import *
\ No newline at end of file
--- /dev/null
+import os
+
+if os.environ["LUNADBG_ARCH"] == 'x86_32':
+ from .pte import PageTableHelper32 as PageTableHelper
+else:
+ from .pte import PageTableHelper64 as PageTableHelper
--- /dev/null
+class PageTableHelperBase:
+ @staticmethod
+ def null_mapping(pte):
+ return pte == 0
+
+ @staticmethod
+ def translation_level(level=-1):
+ raise NotImplementedError()
+
+ @staticmethod
+ def translation_shift_bits(level):
+ raise NotImplementedError()
+
+ @staticmethod
+ def mapping_present(pte):
+ raise NotImplementedError()
+
+ @staticmethod
+ def huge_page(pte):
+ raise NotImplementedError()
+
+ @staticmethod
+ def protections(pte):
+ raise NotImplementedError()
+
+ @staticmethod
+ def other_attributes(level, pte):
+ raise NotImplementedError()
+
+ @staticmethod
+ def same_kind(pte1, pte2):
+ raise NotImplementedError()
+
+ @staticmethod
+ def physical_pfn(pte):
+ raise NotImplementedError()
+
+ @staticmethod
+ def vaddr_width():
+ raise NotImplementedError()
+
+class PageTableHelper32(PageTableHelperBase):
+ @staticmethod
+ def translation_level(level = -1):
+ return [0, 1][level]
+
+ @staticmethod
+ def translation_shift_bits(level):
+ return [9, 0][level] + 12
+
+ @staticmethod
+ def mapping_present(pte):
+ return bool(pte & 1)
+
+ @staticmethod
+ def huge_page(pte):
+ return bool(pte & (1 << 7))
+
+ @staticmethod
+ def protections(pte):
+ prot = ['R'] # RWXUP
+ if (pte & (1 << 1)):
+ prot.append('W')
+ if (pte & -1):
+ prot.append('X')
+ if (pte & (1 << 2)):
+ prot.append('U')
+ if (pte & (1)):
+ prot.append('P')
+ return prot
+
+ @staticmethod
+ def other_attributes(level, pte):
+ attrs = []
+ if pte & (1 << 5):
+ attrs.append("A")
+ if pte & (1 << 6):
+ attrs.append("D")
+ if pte & (1 << 3):
+ attrs.append("PWT")
+ if pte & (1 << 4):
+ attrs.append("PCD")
+ if PageTableHelper32.translation_level(level) == 1 and pte & (1 << 8):
+ attrs.append("G")
+ return attrs
+
+ @staticmethod
+ def same_kind(pte1, pte2):
+ attr_mask = 0x19f # P, R/W, U/S, PWT, PCD, PS, G
+ return (pte1 & attr_mask) == (pte2 & attr_mask)
+
+ @staticmethod
+ def physical_pfn(pte):
+ return pte >> 12
+
+ @staticmethod
+ def vaddr_width():
+ return 32
+
+class PageTableHelper64(PageTableHelperBase):
+ pass
\ No newline at end of file
-from .region_dump import MemoryRegionDump
-from .proc_table_dump import ProcessDump, ProcessTableDump
-from .syslog import SysLogDump
+from gdb import Command, COMMAND_USER
+import argparse
-def load_commands():
- MemoryRegionDump()
- ProcessTableDump()
- ProcessDump()
- SysLogDump()
\ No newline at end of file
+class LunadbgCommand(Command):
+ def __init__(self, name: str) -> None:
+ super().__init__(name, COMMAND_USER)
+ self._parser = argparse.ArgumentParser()
+
+ def _parse_args(self, gdb_argstr: str):
+ args, argv = self._parser.parse_known_args(gdb_argstr.strip().split(' '), None)
+ if argv:
+ print('unrecognized arguments: %s'%(' '.join(argv)))
+ print(self._parser.format_usage())
+ print(self._parser.format_help())
+ return None
+ return args
\ No newline at end of file
--- /dev/null
+from .commands import LunadbgCommand
+from .pp import MyPrettyPrinter
+from .profiling.pmstat import PhysicalMemProfile
+from .structs.pagetable import PageTable, PageTableEntry
+
+class MMStats(LunadbgCommand):
+ def __init__(self) -> None:
+ super().__init__("mm")
+ subparsers = self._parser.add_subparsers(dest="cmd")
+
+ stats = subparsers.add_parser("stats")
+ stats.add_argument("state_type")
+ stats.add_argument("-g", "--granule", type=int, default=512)
+ stats.add_argument("--cols", type=int, default=32)
+
+ lookup = subparsers.add_parser("lookup")
+ lookup.add_argument("mem_type")
+ lookup.add_argument("address")
+ lookup.add_argument("-l", "--level", type=int, default=-1)
+ lookup.add_argument("-n", type=int, default=0)
+ lookup.add_argument("-t", "--to", dest="to_addr", default='0')
+
+ self.__ptw = PageTable()
+
+ def print_pmem_stats(self, pp: MyPrettyPrinter, optn):
+ pmem = PhysicalMemProfile()
+ pmem.rescan_pmem(optn.granule)
+
+ pp.printf("Total: %dKiB (%d@4K)",
+ pmem.max_mem_sz, pmem.max_mem_pg)
+
+ pp.printf("Used: %dKiB (%d@4K) ~%.2f%%",
+ pmem.consumed_pg * 4096,
+ pmem.consumed_pg, pmem.utilisation * 100)
+
+ pp.printf("Fragmentations: %d ~%.2f%%", pmem.discontig, pmem.fragmentation * 100)
+ pp.print()
+
+ pp.print("Distribution")
+ pp2 = pp.next_level(2)
+ row = []
+ for i in range(0, len(pmem.mem_distr)):
+ ratio = pmem.mem_distr[i] / pmem.page_per_granule
+ cat = int(ratio * 9)
+ if ratio == 0:
+ row.append('.')
+ elif ratio == 1:
+ row.append('F')
+ else:
+ row.append(str(cat))
+
+ if (i + 1) % optn.cols == 0:
+ pp2.print(''.join(row))
+ row.clear()
+ if (i + 1) % optn.cols != 0:
+ pp2.print(''.join(row))
+
+ pp.printf("(granule: %d, density: %d@4K)", optn.granule, pmem.page_per_granule)
+
+ def vm_lookup(self, pp, va, optn):
+ to_addr = int(optn.to_addr, 0)
+ if not optn.n and not to_addr:
+ pp.print(self.__ptw.get_pte(va, level=optn.level))
+ else:
+ if to_addr:
+ self.__ptw.print_ptes_between(pp, va, to_addr, optn.level)
+ else:
+ self.__ptw.print_ptes(pp, va, optn.n, optn.level)
+
+ def __do_stats(self, pp, optn):
+ if optn.state_type == "pmem":
+ self.print_pmem_stats(pp, optn)
+ else:
+ print("unknow stats type:", optn.state_type)
+
+ def __do_lookup(self, pp, address, optn):
+ if optn.mem_type == "vm":
+ self.vm_lookup(pp, int(address, base=0), optn)
+ else:
+ print("unknow mem type:", optn.state_type)
+
+ def invoke(self, argument: str, from_tty: bool) -> None:
+ optn = self._parse_args(argument)
+ pp = MyPrettyPrinter()
+
+ if optn.cmd == 'stats':
+ self.__do_stats(pp, optn)
+ elif optn.cmd == 'lookup':
+ self.__do_lookup(pp, optn.address, optn)
+ else:
+ print("unknown command:", optn.cmd)
\ No newline at end of file
--- /dev/null
+from enum import Enum
+
+class PrettyPrintable:
+ def __init__(self) -> None:
+ pass
+
+ def print_simple(self, pp, *args):
+ raise NotImplementedError()
+
+ def print_abstract(self, pp, *args):
+ raise NotImplementedError()
+
+ def print_detailed(self, pp, *args):
+ raise NotImplementedError()
+
+class PrintMode(Enum):
+ Simple = 0
+ Detailed = 1
+ Abstract = 2
+
+class TypeConverter(Enum):
+ Identity = lambda v: v
+ CString = lambda v: v.string()
+
+class MyPrettyPrinter:
+ INDENT = 3
+ def __init__(self, level = 0, prefix='') -> None:
+ self.__level = level
+ self.__padding = " " * MyPrettyPrinter.INDENT * level
+ self.__prefix = prefix
+
+ def set_prefix(self, prefix):
+ self.__prefix = prefix
+ return self
+
+ def clear_prefix(self):
+ self.__prefix = ''
+ return self
+
+ def next_level(self, indent_inc = 1):
+ return MyPrettyPrinter(indent_inc + self.__level, self.__prefix)
+
+ def print(self, *vals, indent=0, mode=PrintMode.Simple):
+ val = '' if len(vals) == 0 else vals[0]
+ if isinstance(val, PrettyPrintable):
+ pp = self
+ if indent > 0:
+ pp = self.next_level(indent)
+ [ val.print_simple,
+ val.print_detailed,
+ val.print_abstract ][mode.value](pp)
+ else:
+ new_id = " " * indent
+ print(f"{self.__padding}{new_id}", f"{self.__prefix}{val}", *vals[1:])
+
+ return self
+
+ def printf(self, fmt, *args, indent=0):
+ assert isinstance(fmt, str)
+ self.print(fmt%args, indent=indent)
+ return self
+
+ def printfa(self, fmt, *args, indent=0):
+ assert isinstance(fmt, str)
+ self.print(fmt.format(*args), indent=indent)
+ return self
+
+
+ def print_field(self, obj, field, fmt=None, val=None, cast=TypeConverter.Identity):
+ val = obj[field] if val is None else val
+ val = cast(val)
+
+ if fmt is None:
+ self.printf("%s: %s", field, val)
+ else:
+ self.printf("%s: %s", field, fmt%(val))
+ return self
\ No newline at end of file
+++ /dev/null
-import gdb
-from .utils import pid_argument
-
-class ProcessHelper:
- PS_READY = 0
- PS_RUNNING = 1
- PS_TERMNAT = 2
- PS_DESTROY = 4
- PS_PAUSED = 8
- PS_BLOCKED = 16
- PS_CREATED = 32
-
- def get_state(proc: gdb.Value):
- state_t = proc["state"]
- if (state_t == ProcessHelper.PS_READY):
- return "ready"
- if (state_t == ProcessHelper.PS_RUNNING):
- return "running"
- if (state_t & (ProcessHelper.PS_TERMNAT | ProcessHelper.PS_DESTROY)):
- return "terminated"
- if (state_t & ProcessHelper.PS_BLOCKED):
- return "blocked"
- if (state_t & ProcessHelper.PS_PAUSED):
- return "paused"
-
- def process_at(pid):
- return gdb.parse_and_eval(pid_argument(pid))
-
- @staticmethod
- def pp_process(proc: gdb.Value):
- print(" pid:", proc["pid"])
- print(" pgid:", proc["pgid"])
- if proc["parent"] == 0:
- print(" root process")
- else:
- print(" ppid:", proc["parent"]["pid"])
- print(" page table:", proc["page_table"])
-
- print(" state:", ProcessHelper.get_state(proc))
- print(" created: +%dms"%(proc["created"]))
- print(" saved context:")
- print(" %s"%(proc["intr_ctx"].dereference()
- .format_string(max_depth=3, format='x')
- .replace('\n', '\n ')))
-
-
-class ProcessDump(gdb.Command):
- """Dump the state of Lunaix PCB"""
- def __init__(self) -> None:
- super().__init__("proc", gdb.COMMAND_USER)
-
- def invoke(self, argument: str, from_tty: bool) -> None:
- argument = pid_argument(argument)
-
- proc = gdb.parse_and_eval(argument)
-
- ProcessHelper.pp_process(proc)
-
-class ProcessTableDump(gdb.Command):
- """Dump the state of Lunaix process table"""
- def __init__(self) -> None:
- super().__init__("proc_table", gdb.COMMAND_USER)
-
- def invoke(self, argument: str, from_tty: bool) -> None:
- sched_context = gdb.parse_and_eval("sched_ctx")
- total_entries = sched_context["ptable_len"]
- print("inited entries: %d"%(total_entries))
- print("scheduled pid: %d"%(sched_context["procs_index"]))
- print("Process Table:")
-
- for i in range(0, total_entries):
- p = ProcessHelper.process_at(i)
- if (p == 0):
- continue
- state = ProcessHelper.get_state(p)
- print(" pid:%02d [%s]"%(i, state))
\ No newline at end of file
--- /dev/null
+from ..symbols import LunaixSymbols
+from ..structs.page import PageStruct
+from ..pp import MyPrettyPrinter
+import math
+
+class PhysicalMemProfile:
+ def __init__(self) -> None:
+ super().__init__()
+ self._pm_list = LunaixSymbols.debug_sym("pmm", "pm_table")
+
+ self.max_mem_pg = int(LunaixSymbols.debug_sym("pmm", "max_pg").value())
+ self.max_mem_sz = self.max_mem_pg * 4096
+ self.mem_distr = []
+
+ def rescan_pmem(self, distr_granule = 256):
+ self.__mem_distr_granule = distr_granule
+ self.mem_distr.clear()
+
+ page_per_granule = self.max_mem_pg / self.__mem_distr_granule
+ page_per_granule = math.ceil(page_per_granule)
+ remainder = self.max_mem_pg % self.__mem_distr_granule
+ bucket = 0
+ non_contig = 0
+ last_contig = False
+ for i in range(self.max_mem_pg):
+ element = PageStruct(self._pm_list[i].address)
+ bucket += int(element.ref > 0)
+ if last_contig:
+ last_contig = element.ref > 0
+ non_contig += int(not last_contig)
+ else:
+ last_contig = element.ref > 0
+
+ if (i + 1) % page_per_granule == 0:
+ self.mem_distr.append(bucket)
+ bucket = 0
+
+ if remainder > 0:
+ if bucket > 0:
+ bucket += page_per_granule - remainder
+ self.mem_distr.append(bucket)
+
+ self.consumed_pg = sum(self.mem_distr)
+ self.utilisation = self.consumed_pg / self.max_mem_pg
+ self.fragmentation = 2 * non_contig / self.max_mem_pg
+ self.discontig = non_contig
+ self.page_per_granule = page_per_granule
+
\ No newline at end of file
import gdb
from .utils import pid_argument, llist_foreach
+from .structs.region import MemRegion
+from .pp import MyPrettyPrinter
class MemoryRegionDump(gdb.Command):
"""Dump virtual memory regions associated with a process"""
def __init__(self) -> None:
super().__init__("vmrs", gdb.COMMAND_USER)
- def region_callback(self, idx, region):
- print(f"VMR #{idx}:")
- print( " 0x%x...0x%x [0x%x]"%(
- region['start'], region['end'],
- region['end'] - region['start']))
-
- attr = region["attr"]
- attr_str = []
- if (attr & (1 << 2)):
- attr_str.append("R")
- if (attr & (1 << 3)):
- attr_str.append("W")
- if (attr & (1 << 4)):
- attr_str.append("X")
- print( " attr: 0x%x (%s)"%(attr, "".join(attr_str)))
-
- file = region["mfile"]
- if file == 0:
- print( " anonymous region")
- else:
- print( " file mapped:")
- print( " dnode: %s @0x%x"%(file["dnode"]["name"]["value"].string(), file))
- print( " frange: 0x%x+0x%x"%(region["foff"], region["flen"]))
+ def region_callback(self, pp, idx, region):
+ pp.print(f"VMR #{idx}:")
+ ppp = pp.next_level()
+ vmr = MemRegion(region)
+ vmr.print_detailed(ppp)
def invoke(self, argument: str, from_tty: bool) -> None:
argument = pid_argument(argument)
pid = gdb.parse_and_eval(f"{argument}->pid")
- argument = f"&{argument}->mm.regions"
+ argument = f"&{argument}->mm->regions"
val = gdb.parse_and_eval(argument)
- head = val
-
region_t = gdb.lookup_type("struct mm_region").pointer()
- print("VMRS (pid: %d)"%(pid))
-
- llist_foreach(val, region_t, lambda a,b: self.region_callback(a,b))
+ pp = MyPrettyPrinter()
+ pp.print("VMRS (pid: %d)"%(pid))
+
+ num = llist_foreach(val, region_t, "head", lambda a,b: self.region_callback(pp, a,b), inclusive=False)
+ if not num:
+ pp.print("no regions")
--- /dev/null
+import gdb
+from .utils import pid_argument, llist_foreach
+from .pp import MyPrettyPrinter
+from .structs.pcb import ProcInfo
+from .structs.scheduler import Scheduler
+from .commands import LunadbgCommand
+
+class ProcessDump(LunadbgCommand):
+ """Dump the state of Lunaix PCB"""
+ def __init__(self) -> None:
+ super().__init__("proc")
+
+ def invoke(self, argument: str, from_tty: bool) -> None:
+ pp = MyPrettyPrinter()
+ ProcInfo.process_at(argument).print_detailed(pp)
+
+
+class SchedulerDump(LunadbgCommand):
+ """Dump the state of Lunaix process table"""
+ def __init__(self) -> None:
+ super().__init__("sched")
+ self._parser.add_argument("print_type")
+ self._parser.add_argument("-l", "--long-list",
+ required=False, default=False, action='store_true')
+
+ def invoke(self, argument: str, from_tty: bool) -> None:
+ args = self._parse_args(argument)
+ if args is None:
+ return
+
+ sched_context = gdb.parse_and_eval("&sched_ctx")
+ sched = Scheduler(sched_context)
+
+ sched.print_detailed(MyPrettyPrinter(), args.print_type, args.long_list)
\ No newline at end of file
--- /dev/null
+import gdb
+from ..pp import PrettyPrintable
+
+
+class KernelStruct(PrettyPrintable):
+ def __init__(self, gdb_inferior: gdb.Value, impl) -> None:
+ super().__init__()
+ self._kstruct = gdb_inferior.cast(impl.get_type())
+
+ def get_struct_instance(self):
+ return self._kstruct
+
+ @staticmethod
+ def get_type() -> gdb.Type :
+ return gdb.lookup_type("void").pointer()
\ No newline at end of file
--- /dev/null
+from gdb import Type, Value, lookup_type
+from . import KernelStruct
+
+class PageStruct(KernelStruct):
+ def __init__(self, gdb_inferior: Value) -> None:
+ super().__init__(gdb_inferior, PageStruct)
+ self.ref = self._kstruct["ref_counts"]
+ self.attr = self._kstruct["attr"]
+
+ @staticmethod
+ def get_type() -> Type:
+ return lookup_type("struct pp_struct").pointer()
+
--- /dev/null
+from gdb import Type, Value, lookup_type
+from . import KernelStruct
+from ..arch import PageTableHelper as TLB
+
+class PageTableEntry(KernelStruct):
+ def __init__(self, gdb_inferior: Value, level, va) -> None:
+ self.level = level
+ self.pg_mask = self.get_page_mask()
+ self.va = va & ~self.pg_mask
+ self.base_page_order = TLB.translation_shift_bits(-1)
+
+ ptep = gdb_inferior[va // (self.pg_mask + 1)].address
+ super().__init__(ptep, PageTableEntry)
+
+ try:
+ self.pte = int(self._kstruct.dereference())
+ except:
+ self.pte = 0
+
+ self.pa = TLB.physical_pfn(self.pte) << self.base_page_order
+
+ def print_abstract(self, pp, *args):
+ self.print_detailed(pp, *args)
+
+ def print_simple(self, pp, *args):
+ self.print_detailed(pp, *args)
+
+ def print_detailed(self, pp, *args):
+ if self.null():
+ pp.print("<Mapping not exists>")
+ return
+
+ page_order = TLB.translation_shift_bits(self.level)
+ page_order -= self.base_page_order
+
+ pp.printf("Level %d Translation", TLB.translation_level(self.level))
+
+ pp2 = pp.next_level()
+ pp2.printf("Entry value: 0x%x", self.pte)
+ pp2.printf("Virtual address: 0x%x (ptep=0x%x)", self.va, int(self._kstruct))
+ pp2.printf("Mapped physical: 0x%x (order %d page)", self.pa, page_order)
+ pp2.printf("Page Protection: %s", self.get_page_prot())
+ pp2.printf("Present: %s", self.present())
+ pp2.printf("Huge: %s", TLB.huge_page(self.pte))
+ pp2.print("Attributes:")
+ pp2.next_level().print(self.get_attributes())
+
+ @staticmethod
+ def get_type() -> Type:
+ return lookup_type("unsigned int").pointer()
+
+ def get_page_mask(self):
+ return PageTableEntry.get_level_shift(self.level) - 1
+
+ def present(self):
+ return TLB.mapping_present(self.pte)
+
+ def get_page_prot(self):
+ return ''.join(TLB.protections(self.pte))
+
+ def get_attributes(self):
+ attrs = [ self.get_page_prot(),
+ *TLB.other_attributes(self.level, self.pte) ]
+ return ', '.join(attrs)
+
+ def null(self):
+ return TLB.null_mapping(self.pte)
+
+ def same_kind_to(self, pte2):
+ return TLB.same_kind(self.pte, pte2.pte)
+
+ @staticmethod
+ def get_level_shift(level):
+ return 1 << TLB.translation_shift_bits(level)
+
+ @staticmethod
+ def max_page_count():
+ return 1 << (TLB.vaddr_width() - TLB.translation_shift_bits(-1))
+
+class PageTable():
+ def __init__(self) -> None:
+ self.levels = [
+ Value(0xFFFFF000).cast(PageTableEntry.get_type()),
+ Value(0xFFC00000).cast(PageTableEntry.get_type())
+ ]
+
+ def get_pte(self, va, level=-1) -> PageTableEntry:
+ return PageTableEntry(self.levels[level], level, va)
+
+ def __print_pte_ranged(self, pp, pte_head, pte_tail):
+ start_va = pte_head.va
+ end_va = pte_tail.va
+ sz = end_va - start_va
+ if not (pte_head.null() and pte_tail.null()):
+ pp.printf("0x%016x...0x%016x, 0x%016x [0x%08x] %s",
+ start_va, end_va - 1, pte_head.pa, sz,
+ pte_head.get_attributes())
+ else:
+ pp.printfa("0x{:016x}...0x{:016x}, {:^18s} [0x{:08x}] <no mapping>",
+ start_va, end_va - 1, "n/a", sz)
+
+ def print_ptes_between(self, pp, va, va_end, level=-1):
+ shift = PageTableEntry.get_level_shift(level)
+ n = (va_end - va) // shift
+ self.print_ptes(pp, va, n, level)
+
+ def print_ptes(self, pp, va, pte_num, level=-1):
+ head_pte = PageTableEntry(self.levels[level], level, va)
+ curr_pte = head_pte
+ va = head_pte.va
+
+ pp.printfa("{:^18s} {:^18s} {:^18s} {:^10s} {:^20s}",
+ "va-start", "va-end", "physical", "size", "attributes")
+ for i in range(1, pte_num):
+ va_ = va + i * PageTableEntry.get_level_shift(level)
+ curr_pte = PageTableEntry(self.levels[level], level, va_)
+
+ if not curr_pte.same_kind_to(head_pte):
+ self.__print_pte_ranged(pp, head_pte, curr_pte)
+ head_pte = curr_pte
+
+ if curr_pte != head_pte:
+ self.__print_pte_ranged(pp, head_pte, curr_pte)
\ No newline at end of file
--- /dev/null
+import gdb
+from ..pp import MyPrettyPrinter
+from . import KernelStruct
+from ..utils import pid_argument, llist_foreach, get_dnode_path
+
+
+class ProcInfo(KernelStruct):
+ def __init__(self, gdb_inferior: gdb.Value) -> None:
+ super().__init__(gdb_inferior, ProcInfo)
+
+ @staticmethod
+ def get_type() -> gdb.Type:
+ return gdb.lookup_type("struct proc_info").pointer()
+
+ def print_detailed(self, pp : MyPrettyPrinter, *args):
+ self.print_abstract(pp)
+
+ pp2 = (pp.next_level()
+ .clear_prefix())
+ self.print_simple(pp2)
+
+ def print_simple(self, pp : MyPrettyPrinter, *args):
+ pp.print_field(self._kstruct, 'pid')
+ pp.print_field(self._kstruct, 'pgid')
+ pp.print_field(self._kstruct, 'parent', fmt="(pid=%d)", cast=lambda v: v["pid"])
+ pp.print_field(self._kstruct, 'cwd', cast=lambda v: get_dnode_path(v))
+ pp.print_field(self._kstruct, 'created', '+%dms')
+ pp.print_field(self._kstruct, 'exit_code')
+ pp.print_field(self._kstruct, 'thread_count')
+
+ pp.printf("active thread: (tid=%d)", self._kstruct['th_active']['tid'])
+
+ def print_abstract(self, pp : MyPrettyPrinter, *args):
+ pid = self._kstruct['pid']
+ ppid = self._kstruct['parent']['pid']
+ cmd = self._kstruct['cmd']
+ cmd = cmd.string() if cmd != 0 else ''
+ state = ProcInfo.get_state(self._kstruct["state"])
+
+ pp.print(f"pid={pid}, ppid={ppid}, cmd='{cmd}', {state}")
+
+
+ PS_READY = 0
+ PS_RUNNING = 1
+ PS_TERMNAT = 2
+ PS_DESTROY = 4
+ PS_PAUSED = 8
+ PS_BLOCKED = 16
+ PS_CREATED = 32
+
+ @staticmethod
+ def get_state(state_t):
+ if (state_t == ProcInfo.PS_READY):
+ return "ready"
+ if (state_t == ProcInfo.PS_RUNNING):
+ return "running"
+ if (state_t & (ProcInfo.PS_TERMNAT | ProcInfo.PS_DESTROY)):
+ return "terminated"
+ if (state_t & ProcInfo.PS_BLOCKED):
+ return "blocked"
+ if (state_t & ProcInfo.PS_PAUSED):
+ return "paused"
+ return "<unknown> (0x%x)"%(state_t)
+
+ @staticmethod
+ def process_at(pid):
+ return ProcInfo(gdb.parse_and_eval(pid_argument(pid)))
+
+
\ No newline at end of file
--- /dev/null
+from gdb import Type, Value, lookup_type
+from . import KernelStruct
+from ..utils import get_dnode_path
+
+class MemRegion(KernelStruct):
+ def __init__(self, gdb_inferior: Value) -> None:
+ super().__init__(gdb_inferior, MemRegion)
+ self.__attr = self._kstruct["attr"]
+
+ @staticmethod
+ def get_type() -> Type:
+ return lookup_type("struct mm_region").pointer()
+
+ def print_abstract(self, pp, *args):
+ self.print_detailed(pp, *args)
+
+ def print_simple(self, pp, *args):
+ self.print_detailed(pp, *args)
+
+ def print_detailed(self, pp, *args):
+ pp.print( "0x%x...0x%x [0x%x]"%(
+ self._kstruct['start'], self._kstruct['end'],
+ self._kstruct['end'] - self._kstruct['start']))
+
+ pp.printf("attributes: %s (0x%x)", ", ".join([self.get_vmr_kind(), self.get_protection()]), self.__attr)
+
+ file = self._kstruct["mfile"]
+ if file == 0:
+ pp.print("anonymous region")
+ else:
+ pp.print("file mapped:")
+ ppp = pp.next_level()
+ ppp.print("dnode: %s @0x%x"%(get_dnode_path(file["dnode"]), file))
+ ppp.print("range: 0x%x+0x%x"%(self._kstruct["foff"], self._kstruct["flen"]))
+
+ def get_protection(self):
+ attr_str = []
+ if (self.__attr & (1 << 2)):
+ attr_str.append("R")
+ if (self.__attr & (1 << 3)):
+ attr_str.append("W")
+ if (self.__attr & (1 << 4)):
+ attr_str.append("X")
+ return ''.join(attr_str)
+
+ def get_vmr_kind(self):
+ """
+ #define REGION_TYPE_CODE (1 << 16)
+ #define REGION_TYPE_GENERAL (2 << 16)
+ #define REGION_TYPE_HEAP (3 << 16)
+ #define REGION_TYPE_STACK (4 << 16)
+ """
+ types = ["exec", "data", "heap", "stack"]
+ attr = ((self.__attr >> 16) & 0xf) - 1
+ if attr >= len(types):
+ return "unknown kind %d"%attr
+ return types[attr]
+
+
+
\ No newline at end of file
--- /dev/null
+import gdb
+from . import KernelStruct
+from .pcb import ProcInfo
+from .thread import ThreadStruct
+from ..utils import llist_foreach
+
+class Scheduler(KernelStruct):
+ def __init__(self, gdb_inferior: gdb.Value) -> None:
+ super().__init__(gdb_inferior, Scheduler)
+
+ self._current_t = gdb.parse_and_eval("current_thread")
+ self._current_p = gdb.parse_and_eval("__current")
+
+ @staticmethod
+ def get_type() -> gdb.Type:
+ return gdb.lookup_type("struct scheduler").pointer()
+
+ def __print_thread_cb(self, v, pp, long_list):
+ pi = ThreadStruct(v)
+ pi.print_abstract(pp)
+ if long_list:
+ pi.print_simple(pp.next_level())
+ pp.print()
+
+
+ def __print_threads(self, pp, long_list):
+ pp.print("# of threads:", self._kstruct["ttable_len"])
+ pp.printf("scheduled: pid=%d, tid=%d", self._current_p['pid'], self._current_t['tid'] )
+ pp.print()
+
+ pp2 = pp.next_level()
+ plist = self._kstruct["threads"]
+ llist_foreach(plist, ThreadStruct.get_type(), "sched_sibs",
+ lambda i,v: self.__print_thread_cb(v, pp2, long_list))
+ pp.print()
+
+
+ def __print_proc_cb(self, v, pp, long_list):
+ pi = ProcInfo(v)
+ pi.print_abstract(pp)
+ if long_list:
+ pi.print_simple(pp.next_level())
+
+
+ def __print_processes(self, pp, long_list = False):
+ pp.print("# of process:", self._kstruct["ptable_len"])
+ pp.printf("scheduled: pid=%d", self._current_p['pid'])
+ pp.print()
+
+ pp2 = pp.next_level()
+ plist = self._kstruct["proc_list"]
+ llist_foreach(plist, ProcInfo.get_type(), "tasks",
+ lambda i,v: self.__print_proc_cb(v, pp2, long_list))
+
+
+ def print_detailed(self, pp, *args):
+ print_type = args[0]
+ print_longlist = args[1]
+ if print_type == 'procs':
+ self.__print_processes(pp, print_longlist)
+ elif print_type == 'threads':
+ self.__print_threads(pp, print_longlist)
+ else:
+ pp.print("Unknown print type:", print_type)
+
+
+ def print_abstract(self, pp, *args):
+ self.print_detailed(pp, *args)
+
+
+ def print_simple(self, pp, *args):
+ self.print_detailed(pp, *args)
\ No newline at end of file
--- /dev/null
+from gdb import Type, Value, lookup_type
+from . import KernelStruct
+from ..pp import MyPrettyPrinter
+
+class SignalContext(KernelStruct):
+ __SIGNUM = 16
+ def __init__(self, gdb_inferior: Value) -> None:
+ super().__init__(gdb_inferior, SignalContext)
+
+ @staticmethod
+ def get_type() -> Type:
+ return lookup_type("struct sigctx").pointer()
+
+ def print_abstract(self, pp : MyPrettyPrinter, *args):
+ sigactive = self._kstruct["sig_active"]
+ sigpending = SignalHelper.get_sig_bitmap(self._kstruct["sig_pending"])
+
+ pp.print(f"sig: handling={sigactive}, pending=[{sigpending}]")
+
+ def print_simple(self, pp : MyPrettyPrinter, *args):
+ pp.print_field(self._kstruct, "sig_active")
+ pp.print_field(self._kstruct, "sig_pending", cast=SignalHelper.get_sig_bitmap)
+ pp.print_field(self._kstruct, "sig_mask", cast=SignalHelper.get_sig_bitmap)
+
+ order = []
+ active = int(self._kstruct['sig_active'])
+ sig_order = self._kstruct["sig_order"]
+ while active != 0:
+ order.append(str(active))
+ active = int(sig_order[active])
+
+ pp.print("nestings:", ' -> '.join(reversed(order)))
+
+ def print_detailed(self, pp, *args):
+ self.print_simple(pp)
+
+
+
+class SignalHelper:
+
+ @staticmethod
+ def get_sig_bitmap(sigbmp):
+ if sigbmp == 0:
+ return '<None>'
+ v = []
+ i = 0
+ while sigbmp != 0:
+ if sigbmp & 1 != 0:
+ v.append(str(i))
+ sigbmp = sigbmp >> 1
+ i+=1
+ return ",".join(v)
\ No newline at end of file
--- /dev/null
+import gdb
+from . import KernelStruct
+from .pcb import ProcInfo
+from .signal import SignalContext
+
+class ThreadStruct(KernelStruct):
+ def __init__(self, gdb_inferior: gdb.Value) -> None:
+ super().__init__(gdb_inferior, ThreadStruct)
+
+ self.__sigctx = SignalContext(self._kstruct["sigctx"].address)
+
+ @staticmethod
+ def get_type() -> gdb.Type:
+ return gdb.lookup_type("struct thread").pointer()
+
+ def print_abstract(self, pp, *args):
+ tid = self._kstruct['tid']
+ pid = self._kstruct['process']['pid']
+ thactive = self._kstruct['process']['th_active']
+ state = ProcInfo.get_state(self._kstruct['state'])
+
+ notes = f"(acting, {state})" if thactive == self._kstruct else f"({state})"
+ pp.print(f"Thread: tid={int(tid)}, pid={int(pid)}", notes)
+
+ self.__sigctx.print_abstract(pp.next_level())
+
+ def print_detailed(self, pp, *args):
+ self.print_abstract(pp)
+
+ pp2 = pp.next_level()
+ self.print_simple(pp2)
+
+ pp3 = pp2.next_level()
+ pp2.print("Containing Process")
+ pp3.print(ProcInfo(self._kstruct['process']))
+
+ self.__sigctx.print_detailed(pp2)
+
+
+ def print_simple(self, pp, *args):
+ pp2 = pp.next_level()
+ pp2.print_field(self._kstruct, 'created', '+%dms')
+ pp2.print_field(self._kstruct, 'syscall_ret')
+ pp2.print_field(self._kstruct, 'exit_val')
+ pp2.print_field(self._kstruct, 'kstack')
+ self.__sigctx.print_simple(pp2)
class SymbolAccesser:
def __init__(self, sym) -> None:
self.sym = f"({sym})"
+ self.__sym = gdb.parse_and_eval(self.sym)
def deref_and_access(self, members):
return gdb.parse_and_eval(f"{self.sym}->{members}")
def access(self, members):
return gdb.parse_and_eval(f"{self.sym}.{members}")
+
+ def __getitem__(self, index):
+ return self.__sym[index]
+
+ def value(self):
+ return self.__sym
@staticmethod
- def exported(domain, sym_name):
- name = f"*__SYMEXPORT_Z{domain.value}_{sym_name}"
+ def exported(domain, namespace, sym_name):
+ name = f"*__SYMEXPORT_Z{domain.value}_N{namespace}_S{sym_name}"
+ return LunaixSymbols.SymbolAccesser(name)
+
+ @staticmethod
+ def debug_sym(namespace, sym_name):
+ name = f"*__SYMEXPORT_Z{SymbolDomain.DEBUG}_N{namespace}_S{sym_name}"
return LunaixSymbols.SymbolAccesser(name)
\ No newline at end of file
print(f"[{time_str}] <L{self.log_level[lvl]}> {log.string()}")
def invoke(self, argument: str, from_tty: bool) -> None:
- log_recs = LunaixSymbols.exported(SymbolDomain.DEBUG, "kprecs")
+ log_recs = LunaixSymbols.exported(SymbolDomain.DEBUG, "kprintf", "kprecs")
head = log_recs.deref_and_access("kp_ents.ents").address
ent_type = gdb.lookup_type("struct kp_entry").pointer()
- llist_foreach(head, ent_type, lambda a,b: self.syslog_entry_callback(a, b))
\ No newline at end of file
+ llist_foreach(head, ent_type, "ents", lambda a,b: self.syslog_entry_callback(a, b))
\ No newline at end of file
if not argument:
return "__current"
else:
- return f"sched_ctx._procs[({argument})]"
+ return f"sched_ctx.procs[({argument})]"
-def llist_foreach(head, container_type, cb):
+def llist_foreach(head: gdb.Value, container_type: gdb.Type, field, cb, inclusive=True):
c = head
i = 0
- while (c["next"] != head):
- el = c["next"].cast(container_type)
+ offset = gdb.Value(0).cast(container_type)[field].address
+ offset_p = int(offset)
+
+ if not inclusive:
+ c = c["next"]
+ if c == head:
+ return 0
+
+ while (True):
+ current = gdb.Value(int(c) - offset_p)
+ el = current.cast(container_type)
cb(i, el)
c = c["next"]
- i+=1
\ No newline at end of file
+ i+=1
+ if c == head:
+ break
+ return i
+
+def get_dnode_name(dnode):
+ return dnode['name']['value'].string()
+
+def get_dnode_path(dnode):
+ components = []
+ current = dnode
+ while (current != 0 and current != current['parent']):
+ components.append(get_dnode_name(current))
+ current = current['parent']
+ if len(components) == 0:
+ components.append('')
+ components.append('')
+ return '/'.join(reversed(components))
\ No newline at end of file
},
"regions": [
{
- "name": "kernel_stack",
- "start": "3@1M",
- "size": "1@1M",
+ "name": "kstack_area",
+ "start": "1@1M",
+ "size": "3@1M",
"stk_align": 16
},
{
},
{
"name": "usr_stack",
- "size": "1@4M",
+ "size": "64@page",
"stk_align": 16
},
{
--- /dev/null
+*.o
+build/
+.vscode/
\ No newline at end of file
ls
signal_demo
cat
-stat
\ No newline at end of file
+stat
+test_pthread
\ No newline at end of file
int
main(int argc, const char** argv)
{
-
mkdir("/dev");
mkdir("/sys");
mkdir("/task");
waitpid(pid, &err, 0);
if (WEXITSTATUS(err)) {
- printf("shell exit abnormally (%d)", err);
+ printf("shell exit abnormally (%d)\n", err);
}
+ printf("init exiting\n");
+
return err;
}
\ No newline at end of file
--- /dev/null
+#include <lunaix/syscallid.h>
+
+#define LUNAIX_SYSCALL 33
+#define regsize 4
+
+ .struct 8
+saved_registers:
+ .struct saved_registers + 5 * regsize
+id:
+ .struct id + regsize
+a1:
+ .struct a1 + regsize
+a2:
+ .struct a2 + regsize
+a3:
+ .struct a3 + regsize
+a4:
+ .struct a4 + regsize
+a5:
+
+.section .text
+ .type do_lunaix_syscall, @function
+ .global do_lunaix_syscall
+ do_lunaix_syscall:
+ push %ebp
+ movl %esp, %ebp
+
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %edi
+ pushl %esi
+
+ movl id(%esp), %eax
+ movl a1(%esp), %ebx
+ movl a2(%esp), %ecx
+ movl a3(%esp), %edx
+ movl a4(%esp), %edi
+ movl a5(%esp), %esi
+
+ int $LUNAIX_SYSCALL
+
+ popl %esi
+ popl %edi
+ popl %edx
+ popl %ecx
+ popl %ebx
+
+ leave
+ ret
\ No newline at end of file
--- /dev/null
+#ifndef __LUNAIX_OSDEPS_SYSCALL_H
+#define __LUNAIX_OSDEPS_SYSCALL_H
+
+#include <lunaix/syscallid.h>
+
+extern unsigned long
+do_lunaix_syscall(unsigned long call_id, ...);
+
+#endif /* __LUNAIX_OSDEPS_SYSCALL_H */
--- /dev/null
+#ifndef __LUNAIX_PTHREAD_H
+#define __LUNAIX_PTHREAD_H
+
+#include <lunaix/threads.h>
+
+typedef unsigned int pthread_t;
+
+typedef struct {
+ // TODO
+} pthread_attr_t;
+
+int
+pthread_create(pthread_t* thread,
+ const pthread_attr_t* attr,
+ void *(*start_routine)(void*), void* arg);
+
+int
+pthread_detach(pthread_t thread);
+
+void
+pthread_exit(void *value_ptr);
+
+int
+pthread_join(pthread_t thread, void **value_ptr);
+
+int
+pthread_kill(pthread_t thread, int sig);
+
+pthread_t pthread_self(void);
+
+
+
+#endif /* __LUNAIX_PTHREAD_H */
extern int
sigprocmask(int how, const sigset_t* set, sigset_t* oldset);
+int
+pthread_sigmask(int how, const sigset_t *restrict set,
+ sigset_t *restrict oset);
+
#endif /* __LUNAIX_SIGNAL_H */
int
printf(const char* fmt, ...)
{
- char buf[1024];
+ char buf[512];
va_list args;
va_start(args, fmt);
- int n = __vprintf_internal(buf, fmt, 1024, args);
+ int n = __vprintf_internal(buf, fmt, 512, args);
va_end(args);
return write(stdout, buf, n);
--- /dev/null
+#include <lunaix/syscall.h>
+#include <pthread.h>
+
+static void*
+__pthread_routine_wrapper(void *(*start_routine)(void*), void* arg)
+{
+ void* ret = start_routine(arg);
+
+ do_lunaix_syscall(__SYSCALL_th_exit, ret);
+
+ return ret; // should not reach
+}
+
+int
+pthread_create(pthread_t* thread,
+ const pthread_attr_t* attr,
+ void *(*start_routine)(void*), void* arg)
+{
+ // FIXME attr currently not used
+
+ struct uthread_info th_info;
+ int ret = do_lunaix_syscall(__SYSCALL_th_create, thread, &th_info, __pthread_routine_wrapper, NULL);
+
+ if (ret) {
+ return ret;
+ }
+
+ // FIXME we should encapsulate these parameter into struct
+ // and pass it as a single thread param.
+
+ void** th_stack = (void**) th_info.th_stack_top;
+ th_stack[1] = (void*)start_routine;
+ th_stack[2] = arg;
+
+ return ret;
+}
+
+int
+pthread_detach(pthread_t thread)
+{
+ return do_lunaix_syscall(__SYSCALL_th_detach, thread);
+}
+
+void
+pthread_exit(void *value_ptr)
+{
+ do_lunaix_syscall(__SYSCALL_th_exit, value_ptr);
+}
+
+int
+pthread_join(pthread_t thread, void **value_ptr)
+{
+ return do_lunaix_syscall(__SYSCALL_th_join, thread, value_ptr);
+}
+
+int
+pthread_kill(pthread_t thread, int sig)
+{
+ return do_lunaix_syscall(__SYSCALL_th_kill, thread, sig);
+}
+
+pthread_t
+pthread_self(void)
+{
+ return do_lunaix_syscall(__SYSCALL_th_self);
+}
--- /dev/null
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+
+/*
+ Test payloads
+*/
+
+static void*
+__print_and_sleep_randsec(void* value)
+{
+ pthread_t tid = pthread_self();
+ printf("thread %d: gets number %d\n", tid, (int)value);
+
+ int fd = open("/dev/rand", O_RDONLY | O_DIRECT);
+ if (fd < 0) {
+ printf("thread %d: failed to get random source: %d\n", tid, errno);
+ return NULL;
+ }
+
+ unsigned char rand = 0;
+ if (read(fd, &rand, sizeof(rand)) != 1) {
+ printf("thread %d: failed to get random number: %d\n", tid, errno);
+ close(fd);
+ return NULL;
+ }
+
+ rand = rand % 30;
+
+ printf("thread %d: going to sleep %ds\n", tid, rand);
+ sleep(rand);
+ close(fd);
+ printf("thread %d: exit\n", tid);
+ return NULL;
+}
+
+static void*
+__print_and_sleep_seq(void* value)
+{
+ pthread_t tid = pthread_self();
+ printf("thread %d: gets number %d\n", tid, (int)value);
+
+ int second = (int)value % 30;
+
+ printf("thread %d: going to sleep %ds\n", tid, second);
+ sleep(second);
+
+ printf("thread %d: exit\n", tid);
+ return NULL;
+}
+
+static void*
+__print_and_sleep(void* value)
+{
+ pthread_t tid = pthread_self();
+ printf("thread %d: gets number %d\n", tid, (int)value);
+
+ sleep(1);
+ printf("thread %d: exit\n", tid);
+ return NULL;
+}
+
+long long __counter_shared = 0;
+
+static void*
+__inc_number(void* value)
+{
+ for (int i = 0; i < 10000000; i++)
+ {
+ __counter_shared++;
+ }
+
+ printf("thread %d: exit\n", pthread_self());
+ return NULL;
+}
+
+static void*
+__spawn_and_quit(void* value)
+{
+ // quit right-away
+ printf("thread %d: exit\n", pthread_self());
+ return NULL;
+}
+
+/*
+ Test cases
+*/
+
+static void
+spawn_detached_thread(void* (*fn)(void *), int amount)
+{
+ do {
+ int err;
+ pthread_t created;
+ for (int i = 0; i < amount; i++) {
+ err = pthread_create(&created, NULL, fn, (void*)i);
+ if (err) {
+ printf("unable to create thread: %d\n", err);
+ continue;
+ }
+ if((err = pthread_detach(created))) {
+ printf("failed to detach: %d\n", err);
+ }
+ printf("created %d-th\n", i);
+ }
+ } while(0);
+}
+
+static void
+pthread_test_rand_sleep(int param)
+{
+ printf("spawning %d threads\n", param);
+ spawn_detached_thread(__print_and_sleep_randsec, param);
+ // wait for max 30 seconds
+ printf("wait for completion\n");
+ sleep(30);
+}
+
+static void
+pthread_test_seq_sleep(int param)
+{
+ printf("spawning %d threads\n", param);
+ spawn_detached_thread(__print_and_sleep_seq, param);
+ // wait for max 30 seconds
+ printf("wait for completion\n");
+ sleep(30);
+}
+
+static void
+pthread_test_join(int param)
+{
+ int err;
+ pthread_t created;
+ void* v;
+ for (int i = 0; i < param; i++)
+ {
+ err = pthread_create(&created, NULL, __print_and_sleep, (void*)i);
+ if (err) {
+ printf("unable to create thread: %d\n", err);
+ }
+
+ pthread_join(created, &v);
+ }
+}
+
+
+static void
+pthread_test_shared_race(int param)
+{
+ __counter_shared = 0;
+
+ spawn_detached_thread(__inc_number, param);
+
+ sleep(10);
+ printf("counter val: %ld\n", __counter_shared);
+}
+
+static void
+pthread_test_quit(int param)
+{
+ spawn_detached_thread(__spawn_and_quit, param);
+ sleep(5);
+}
+
+
+#define run_test(testn, note, ...) \
+ do { \
+ printf("** [%s] test start\n", note); \
+ pthread_test_##testn(__VA_ARGS__); \
+ printf("** [%s] test passed\n"); \
+ } while (0)
+
+int main()
+{
+ run_test(rand_sleep, "rand_sleep5", 5);
+ run_test(rand_sleep, "rand_sleep10", 10);
+ run_test(rand_sleep, "rand_sleep50", 50);
+
+ run_test(seq_sleep, "seq_sleep50", 50);
+ run_test(seq_sleep, "seq_sleep100", 100);
+ run_test(seq_sleep, "seq_sleep200", 200);
+
+ run_test(join, "join5", 5);
+ run_test(join, "join20", 20);
+
+ run_test(quit, "quit10", 10);
+ run_test(quit, "quit50", 50);
+ run_test(quit, "quit100", 100);
+
+ // FIXME not good, this panic the kernel upon exit, need investigate
+ run_test(shared_race, "shared_race10", 10);
+ run_test(shared_race, "shared_race40", 40);
+
+ // TODO test pthread + signal
+ printf("All test passed.\n");
+}
\ No newline at end of file