From: Minep Date: Sat, 18 Nov 2023 18:47:07 +0000 (+0000) Subject: feat: IO polling for file descriptor X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/commitdiff_plain/78cd005fac540973751b5a108c37a715bc64b5a2?ds=sidebyside;hp=9eed27f6f2f002145667fb4abfc5e476b53630e5 feat: IO polling for file descriptor refactor: remove the need of a separate property to track mmaped file length we just bring in an entire page-length of content that contains what we care and ignore all that garbage comes with it. fix: sync the dirty pages in file-maped mem region upon unmapping. chore: update readme chore: remove objdump -S of entire kernel. --- diff --git a/README.md b/README.md index 4acc346..2be89a9 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ LunaixOS - 一个简单的,详细的,POSIX兼容的(但愿!),带有 + 内存管理与按需分页(Demand Paging) + 键盘输入 + 多进程 -+ 50个常见的Linux/POSIX系统调用([附录1](#appendix1)) ++ 54个常见的Linux/POSIX系统调用([附录1](#appendix1)) + 用户模式 + 信号机制 + PCI 3.0 @@ -255,6 +255,10 @@ qemu-img create -f vdi machine/disk0.vdi 128M 2. `mmap(2)` 2. `munmap(2)` 2. `execve(2)` +3. `poll(2)` (via `pollctl`) +3. `epoll_create(2)` (via `pollctl`) +3. `epoll_ctl(2)` (via `pollctl`) +3. `epoll_wait(2)` (via `pollctl`) **LunaixOS自有** @@ -262,6 +266,7 @@ qemu-img create -f vdi machine/disk0.vdi 128M 2. `geterrno` 3. `realpathat` 4. `syslog` +5. `pollctl` ( **※**:该系统调用暂未经过测试 ) diff --git a/lunaix-os/arch/i386/mm/pfault.c b/lunaix-os/arch/i386/mm/pfault.c index 165062c..848f215 100644 --- a/lunaix-os/arch/i386/mm/pfault.c +++ b/lunaix-os/arch/i386/mm/pfault.c @@ -119,10 +119,7 @@ intr_routine_page_fault(const isr_param* param) memset((void*)ptr, 0, PG_SIZE); - int errno = 0; - if (mseg_off < hit_region->flen) { - errno = file->ops->read_page(file->inode, (void*)ptr, mfile_off); - } + int errno = file->ops->read_page(file->inode, (void*)ptr, mfile_off); if (errno < 0) { ERROR("fail to populate page (%d)", errno); diff --git a/lunaix-os/arch/i386/syscall.S b/lunaix-os/arch/i386/syscall.S index 5a6f6a0..bbfc43c 100644 --- a/lunaix-os/arch/i386/syscall.S +++ b/lunaix-os/arch/i386/syscall.S @@ -63,6 +63,7 @@ .long __lxsys_munmap .long __lxsys_execve .long __lxsys_fstat /* 55 */ + .long __lxsys_pollctl 2: .rept __SYSCALL_MAX - (2b - 1b)/4 .long 0 diff --git a/lunaix-os/includes/lunaix/device.h b/lunaix-os/includes/lunaix/device.h index f650821..affdb10 100644 --- a/lunaix-os/includes/lunaix/device.h +++ b/lunaix-os/includes/lunaix/device.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -95,13 +96,16 @@ struct device { + /* -- device structing -- */ + u32_t magic; struct llist_header siblings; struct llist_header children; struct device* parent; - mutex_t lock; - // TODO investigate event polling + /* -- device state -- */ + + mutex_t lock; struct hstr name; struct devident ident; @@ -111,6 +115,10 @@ struct device char name_val[DEVICE_NAME_SIZE]; void* underlay; + /* -- polling -- */ + int poll_evflags; + poll_evt_q pollers; + struct { // TODO Think about where will they fit. @@ -122,6 +130,7 @@ struct device int (*read_page)(struct device* dev, void* buf, size_t offset); int (*write_page)(struct device* dev, void* buf, size_t offset); int (*exec_cmd)(struct device* dev, u32_t req, va_list args); + int (*poll)(struct device* dev); } ops; }; diff --git a/lunaix-os/includes/lunaix/iopoll.h b/lunaix-os/includes/lunaix/iopoll.h new file mode 100644 index 0000000..027f6fb --- /dev/null +++ b/lunaix-os/includes/lunaix/iopoll.h @@ -0,0 +1,65 @@ +#ifndef __LUNAIX_IOPOLL_H +#define __LUNAIX_IOPOLL_H + +#include +#include +#include + +#include + +typedef struct llist_header poll_evt_q; + +struct poll_opts +{ + struct pollfd** upoll; + int upoll_num; + int timeout; +}; + +struct iopoller +{ + poll_evt_q evt_listener; + struct v_file* file_ref; + pid_t pid; +}; + +struct iopoll +{ + struct iopoller** pollers; + int n_poller; +}; + +static inline void +iopoll_listen_on(struct iopoller* listener, poll_evt_q* source) +{ + llist_append(source, &listener->evt_listener); +} + +void +iopoll_wake_pollers(poll_evt_q*); + +void +iopoll_init(struct iopoll*); + +void +iopoll_free(pid_t, struct iopoll*); + +int +iopoll_install(pid_t, struct iopoll*, struct v_fd*); + +int +iopoll_remove(pid_t, struct iopoll*, int); + +static inline void +poll_setrevt(struct poll_info* pinfo, int evt) +{ + pinfo->revents = (pinfo->revents & ~evt) | evt; +} + +static inline int +poll_checkevt(struct poll_info* pinfo, int evt) +{ + return pinfo->events & evt; +} + +#endif /* __LUNAIX_POLL_H */ diff --git a/lunaix-os/includes/lunaix/mm/mm.h b/lunaix-os/includes/lunaix/mm/mm.h index 433ecfc..2af2a9f 100644 --- a/lunaix-os/includes/lunaix/mm/mm.h +++ b/lunaix-os/includes/lunaix/mm/mm.h @@ -56,7 +56,7 @@ struct mm_region // mapped file offset off_t foff; // mapped file length - u32_t flen; + u32_t flen; // XXX it seems that we don't need this actually.. ptr_t start; ptr_t end; diff --git a/lunaix-os/includes/lunaix/process.h b/lunaix-os/includes/lunaix/process.h index 10dc5ec..552ac4f 100644 --- a/lunaix-os/includes/lunaix/process.h +++ b/lunaix-os/includes/lunaix/process.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -110,26 +111,32 @@ struct proc_info struct v_fdtable* fdtable; struct v_dnode* cwd; pid_t pgid; + + struct iopoll pollctx; }; extern volatile struct proc_info* __current; +#define resume_process(proc) (proc)->state = PS_READY +#define pause_process(proc) (proc)->state = PS_PAUSED +#define block_process(proc) (proc)->state = PS_BLOCKED + static inline void block_current() { - __current->state = PS_BLOCKED; + block_process(__current); } static inline void pause_current() { - __current->state = PS_PAUSED; + pause_process(__current); } static inline void resume_current() { - __current->state = PS_RUNNING; + resume_process(__current); } /** diff --git a/lunaix-os/includes/usr/lunaix/poll.h b/lunaix-os/includes/usr/lunaix/poll.h new file mode 100644 index 0000000..8fed581 --- /dev/null +++ b/lunaix-os/includes/usr/lunaix/poll.h @@ -0,0 +1,28 @@ +#ifndef __LUNAIX_UPOLL_H +#define __LUNAIX_UPOLL_H + +struct poll_info +{ + int pld; + short events; + short revents; + int flags; +}; + +#define _POLLIN (1) +#define _POLLPRI (1 << 1) +#define _POLLOUT (1 << 2) +#define _POLLRDHUP (1 << 3) +#define _POLLERR (1 << 4) +#define _POLLHUP (1 << 5) +#define _POLLNVAL (1 << 6) + +#define _SPOLL_ADD 0 +#define _SPOLL_RM 1 +#define _SPOLL_WAIT 2 +#define _SPOLL_WAIT_ANY 3 + +#define _POLLEE_ALWAYS 1 +#define _POLLEE_RM_ON_ERR (1 << 1) + +#endif /* __LUNAIX_UPOLL_H */ diff --git a/lunaix-os/includes/usr/lunaix/syscallid.h b/lunaix-os/includes/usr/lunaix/syscallid.h index aa1180d..ddf396b 100644 --- a/lunaix-os/includes/usr/lunaix/syscallid.h +++ b/lunaix-os/includes/usr/lunaix/syscallid.h @@ -64,6 +64,7 @@ #define __SYSCALL_execve 54 #define __SYSCALL_fstat 55 +#define __SYSCALL_pollctl 56 #define __SYSCALL_MAX 0x100 diff --git a/lunaix-os/kernel/device/poll.c b/lunaix-os/kernel/device/poll.c new file mode 100644 index 0000000..e7f0f87 --- /dev/null +++ b/lunaix-os/kernel/device/poll.c @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_POLLER_COUNT 16 + +static inline void +current_rmiopoll(int pld) +{ + iopoll_remove(__current->pid, &__current->pollctx, pld); +} + +static struct iopoller* +iopoll_getpoller(struct iopoll* ctx, int pld) +{ + if (pld < 0 || pld >= MAX_POLLER_COUNT) { + return NULL; + } + + return ctx->pollers[pld]; +} + +static inline int +__do_poll(struct poll_info* pinfo, int pld) +{ + struct iopoller* poller = iopoll_getpoller(&__current->pollctx, pld); + if (!poller) { + return 0; + } + + struct device* dev; + int evt = 0; + + if ((dev = device_cast(poller->file_ref->inode->data))) { + dev->ops.poll(dev); + } else { + // TODO handle generic file + /* + N.B. In Linux, polling on any of the non-device mapped file cause + immediate return of poller, in other words, the I/O signal on file is + always active. Which make it no use on monitoring any file + modifications. However, polling for such modifications + must go through inotify_* API. Which is not that elegant as it breaks + the nice consistency that *poll(2) should have. Let see want can we + do in Lunaix. + */ + } + + if (evt < 0) { + poll_setrevt(pinfo, _POLLERR); + goto has_err; + } + + if ((evt = poll_checkevt(pinfo, evt))) { + poll_setrevt(pinfo, evt); + goto has_event; + } + + return 0; + +has_err: + if ((pinfo->flags & _POLLEE_RM_ON_ERR)) { + current_rmiopoll(pld); + return 1; + } + +has_event: + if (!(pinfo->flags & _POLLEE_ALWAYS)) { + current_rmiopoll(pld); + } + + return 1; +} + +static int +__do_poll_round(struct poll_info* pinfos, int ninfo) +{ + int nc = 0; + struct v_fd* fd_s; + struct device* dev; + for (int i = 0; i < ninfo; i++) { + struct poll_info* pinfo = &pinfos[i]; + int pld = pinfo->pld; + + if (__do_poll(pinfo, pld)) { + nc++; + } + } + + return nc; +} + +static int +__do_poll_all(struct poll_info* pinfo) +{ + for (int i = 0; i < MAX_POLLER_COUNT; i++) { + if (!__do_poll(pinfo, i)) { + continue; + } + + pinfo->pld = i; + return 1; + } + + return 0; +} + +#define fd2dev(fd) device_cast((fd)->file->inode->data) + +static int +__alloc_pld() +{ + for (size_t i = 0; i < MAX_POLLER_COUNT; i++) { + if (!__current->pollctx.pollers[i]) { + return i; + } + } + + return -1; +} + +static int +__append_pollers(int* ds, int npoller) +{ + int err = 0, nc = 0; + struct v_fd* fd_s; + for (int i = 0; i < npoller; i++) { + int* fd = &ds[i]; + if ((err = vfs_getfd(*fd, &fd_s))) { + *fd = err; + nc++; + continue; + } + + int pld = iopoll_install(__current->pid, &__current->pollctx, fd_s); + if (pld < 0) { + nc++; + } + + *fd = pld; + } + + return nc; +} + +static int +__wait_until_event() +{ + block_current(); + sched_yieldk(); +} + +void +iopoll_init(struct iopoll* ctx) +{ + ctx->pollers = valloc(sizeof(ptr_t) * MAX_POLLER_COUNT); + ctx->n_poller = 0; +} + +void +iopoll_free(pid_t pid, struct iopoll* ctx) +{ + for (int i = 0; i < MAX_POLLER_COUNT; i++) { + struct iopoller* poller = ctx->pollers[i]; + if (poller) { + vfs_pclose(poller->file_ref, pid); + llist_delete(&poller->evt_listener); + vfree(poller); + } + + vfree(ctx->pollers); + } +} + +void +iopoll_wake_pollers(poll_evt_q* pollers_q) +{ + struct iopoller *pos, *n; + llist_for_each(pos, n, pollers_q, evt_listener) + { + struct proc_info* proc = get_process(pos->pid); + if (proc_hanged(proc)) { + resume_process(proc); + } + + assert(!proc_terminated(proc)); + } +} + +int +iopoll_remove(pid_t pid, struct iopoll* ctx, int pld) +{ + struct iopoller* poller = ctx->pollers[pld]; + if (!poller) { + return ENOENT; + } + + vfs_pclose(poller->file_ref, pid); + vfree(poller); + ctx->pollers[pld] = NULL; + ctx->n_poller--; + + return 0; +} + +int +iopoll_install(pid_t pid, struct iopoll* pollctx, struct v_fd* fd) +{ + int pld = __alloc_pld(); + if (pld < 0) { + return EMFILE; + } + + struct iopoller* iop = valloc(sizeof(struct iopoller)); + *iop = (struct iopoller){ + .file_ref = fd->file, + .pid = pid, + }; + + vfs_ref_file(fd->file); + __current->pollctx.pollers[pld] = iop; + __current->pollctx.n_poller++; + + struct device* dev; + if ((dev = fd2dev(fd))) { + iopoll_listen_on(iop, &dev->pollers); + } else { + // TODO handle generic file + } + + return pld; +} + +__DEFINE_LXSYSCALL2(int, pollctl, int, action, va_list, va) +{ + int retcode = 0; + switch (action) { + case _SPOLL_ADD: { + int* ds = va_arg(va, int*); + int nds = va_arg(va, int); + retcode = __append_pollers(ds, nds); + } break; + case _SPOLL_RM: { + int pld = va_arg(va, int); + retcode = iopoll_remove(__current->pid, &__current->pollctx, pld); + } break; + case _SPOLL_WAIT: { + struct poll_info* pinfos = va_arg(va, struct poll_info*); + int npinfos = va_arg(va, int); + int timeout = va_arg(va, int); + + time_t t1 = clock_systime() + timeout; + while (!(retcode == __do_poll_round(pinfos, npinfos))) { + if (timeout >= 0 && t1 >= clock_systime()) { + break; + } + __wait_until_event(); + } + } break; + case _SPOLL_WAIT_ANY: { + struct poll_info* pinfo = va_arg(va, struct poll_info*); + int timeout = va_arg(va, int); + + time_t t1 = clock_systime() + timeout; + while (!(retcode == __do_poll_all(pinfo))) { + if (timeout >= 0 && t1 >= clock_systime()) { + break; + } + __wait_until_event(); + } + } break; + default: + retcode = EINVAL; + break; + } + + return DO_STATUS(retcode); +} \ No newline at end of file diff --git a/lunaix-os/kernel/mm/mmap.c b/lunaix-os/kernel/mm/mmap.c index afb4246..d39269c 100644 --- a/lunaix-os/kernel/mm/mmap.c +++ b/lunaix-os/kernel/mm/mmap.c @@ -255,7 +255,10 @@ mem_unmap_region(ptr_t mnt, struct mm_region* region) ((vmr)->start > (addr) && ((addr) + (len)) > (vmr)->end) static void -__unmap_overlapped_cases(struct mm_region* vmr, ptr_t* addr, size_t* length) +__unmap_overlapped_cases(ptr_t mnt, + struct mm_region* vmr, + ptr_t* addr, + size_t* length) { // seg start, umapped segement start ptr_t seg_start = *addr, umps_start = 0; @@ -267,6 +270,8 @@ __unmap_overlapped_cases(struct mm_region* vmr, ptr_t* addr, size_t* length) if (CASE_HITI(vmr, seg_start, seg_len)) { size_t new_start = seg_start + seg_len; + + // Require a split if (new_start < vmr->end) { struct mm_region* region = region_dup(vmr); if (region->mfile) { @@ -293,8 +298,14 @@ __unmap_overlapped_cases(struct mm_region* vmr, ptr_t* addr, size_t* length) shrink = vmr->end - vmr->start; umps_len = shrink; umps_start = vmr->start; - } else { - fail("invalid case"); + } + + mem_sync_pages(mnt, vmr, vmr->start, umps_len, 0); + for (size_t i = 0; i < umps_len; i += PG_SIZE) { + ptr_t pa = vmm_del_mapping(mnt, vmr->start + i); + if (pa) { + pmm_free_page(vmr->proc_vms->pid, pa); + } } vmr->start += displ; @@ -331,7 +342,7 @@ mem_unmap(ptr_t mnt, vm_regions_t* regions, ptr_t addr, size_t length) while (&pos->head != regions && length) { n = container_of(pos->head.next, typeof(*pos), head); - __unmap_overlapped_cases(pos, &cur_addr, &length); + __unmap_overlapped_cases(mnt, pos, &cur_addr, &length); pos = n; } diff --git a/lunaix-os/kernel/process/sched.c b/lunaix-os/kernel/process/sched.c index aa75714..4056021 100644 --- a/lunaix-os/kernel/process/sched.c +++ b/lunaix-os/kernel/process/sched.c @@ -324,6 +324,8 @@ alloc_process() llist_init_head(&proc->children); llist_init_head(&proc->grp_member); llist_init_head(&proc->sleep.sleepers); + + iopoll_init(&proc->pollctx); waitq_init(&proc->waitqueue); sched_ctx._procs[i] = proc; @@ -373,6 +375,8 @@ destroy_process(pid_t pid) llist_delete(&proc->tasks); llist_delete(&proc->sleep.sleepers); + iopoll_free(pid, &proc->pollctx); + taskfs_invalidate(pid); if (proc->cwd) { diff --git a/lunaix-os/makefile b/lunaix-os/makefile index af07f60..779a38e 100644 --- a/lunaix-os/makefile +++ b/lunaix-os/makefile @@ -75,7 +75,6 @@ instable: all all-debug: bootable-debug @echo "Dumping the disassembled kernel code to $(kbuild_dir)/kdump.txt" - @i686-elf-objdump -S $(kbin) > $(kbuild_dir)/kdump.txt clean: @rm -rf $(kbuild_dir) || exit 1