+ 内存管理与按需分页(Demand Paging)
+ 键盘输入
+ 多进程
-+ 50个常见的Linux/POSIX系统调用([附录1](#appendix1))
++ 54个常见的Linux/POSIX系统调用([附录1](#appendix1))
+ 用户模式
+ 信号机制
+ PCI 3.0
2. `mmap(2)`
2. `munmap(2)`
2. `execve(2)`
+3. `poll(2)` (via `pollctl`)
+3. `epoll_create(2)` (via `pollctl`)
+3. `epoll_ctl(2)` (via `pollctl`)
+3. `epoll_wait(2)` (via `pollctl`)
**LunaixOS自有**
2. `geterrno`
3. `realpathat`
4. `syslog`
+5. `pollctl`
( **※**:该系统调用暂未经过测试 )
memset((void*)ptr, 0, PG_SIZE);
- int errno = 0;
- if (mseg_off < hit_region->flen) {
- errno = file->ops->read_page(file->inode, (void*)ptr, mfile_off);
- }
+ int errno = file->ops->read_page(file->inode, (void*)ptr, mfile_off);
if (errno < 0) {
ERROR("fail to populate page (%d)", errno);
.long __lxsys_munmap
.long __lxsys_execve
.long __lxsys_fstat /* 55 */
+ .long __lxsys_pollctl
2:
.rept __SYSCALL_MAX - (2b - 1b)/4
.long 0
#include <lunaix/ds/ldga.h>
#include <lunaix/ds/llist.h>
#include <lunaix/ds/mutex.h>
+#include <lunaix/iopoll.h>
#include <lunaix/types.h>
#include <usr/lunaix/device.h>
struct device
{
+ /* -- device structing -- */
+
u32_t magic;
struct llist_header siblings;
struct llist_header children;
struct device* parent;
- mutex_t lock;
- // TODO investigate event polling
+ /* -- device state -- */
+
+ mutex_t lock;
struct hstr name;
struct devident ident;
char name_val[DEVICE_NAME_SIZE];
void* underlay;
+ /* -- polling -- */
+ int poll_evflags;
+ poll_evt_q pollers;
+
struct
{
// TODO Think about where will they fit.
int (*read_page)(struct device* dev, void* buf, size_t offset);
int (*write_page)(struct device* dev, void* buf, size_t offset);
int (*exec_cmd)(struct device* dev, u32_t req, va_list args);
+ int (*poll)(struct device* dev);
} ops;
};
--- /dev/null
+#ifndef __LUNAIX_IOPOLL_H
+#define __LUNAIX_IOPOLL_H
+
+#include <lunaix/device.h>
+#include <lunaix/ds/llist.h>
+#include <lunaix/fs.h>
+
+#include <usr/lunaix/poll.h>
+
+typedef struct llist_header poll_evt_q;
+
+struct poll_opts
+{
+ struct pollfd** upoll;
+ int upoll_num;
+ int timeout;
+};
+
+struct iopoller
+{
+ poll_evt_q evt_listener;
+ struct v_file* file_ref;
+ pid_t pid;
+};
+
+struct iopoll
+{
+ struct iopoller** pollers;
+ int n_poller;
+};
+
+static inline void
+iopoll_listen_on(struct iopoller* listener, poll_evt_q* source)
+{
+ llist_append(source, &listener->evt_listener);
+}
+
+void
+iopoll_wake_pollers(poll_evt_q*);
+
+void
+iopoll_init(struct iopoll*);
+
+void
+iopoll_free(pid_t, struct iopoll*);
+
+int
+iopoll_install(pid_t, struct iopoll*, struct v_fd*);
+
+int
+iopoll_remove(pid_t, struct iopoll*, int);
+
+static inline void
+poll_setrevt(struct poll_info* pinfo, int evt)
+{
+ pinfo->revents = (pinfo->revents & ~evt) | evt;
+}
+
+static inline int
+poll_checkevt(struct poll_info* pinfo, int evt)
+{
+ return pinfo->events & evt;
+}
+
+#endif /* __LUNAIX_POLL_H */
// mapped file offset
off_t foff;
// mapped file length
- u32_t flen;
+ u32_t flen; // XXX it seems that we don't need this actually..
ptr_t start;
ptr_t end;
#include <lunaix/clock.h>
#include <lunaix/ds/waitq.h>
#include <lunaix/fs.h>
+#include <lunaix/iopoll.h>
#include <lunaix/mm/mm.h>
#include <lunaix/mm/region.h>
#include <lunaix/signal.h>
struct v_fdtable* fdtable;
struct v_dnode* cwd;
pid_t pgid;
+
+ struct iopoll pollctx;
};
extern volatile struct proc_info* __current;
+#define resume_process(proc) (proc)->state = PS_READY
+#define pause_process(proc) (proc)->state = PS_PAUSED
+#define block_process(proc) (proc)->state = PS_BLOCKED
+
static inline void
block_current()
{
- __current->state = PS_BLOCKED;
+ block_process(__current);
}
static inline void
pause_current()
{
- __current->state = PS_PAUSED;
+ pause_process(__current);
}
static inline void
resume_current()
{
- __current->state = PS_RUNNING;
+ resume_process(__current);
}
/**
--- /dev/null
+#ifndef __LUNAIX_UPOLL_H
+#define __LUNAIX_UPOLL_H
+
+struct poll_info
+{
+ int pld;
+ short events;
+ short revents;
+ int flags;
+};
+
+#define _POLLIN (1)
+#define _POLLPRI (1 << 1)
+#define _POLLOUT (1 << 2)
+#define _POLLRDHUP (1 << 3)
+#define _POLLERR (1 << 4)
+#define _POLLHUP (1 << 5)
+#define _POLLNVAL (1 << 6)
+
+#define _SPOLL_ADD 0
+#define _SPOLL_RM 1
+#define _SPOLL_WAIT 2
+#define _SPOLL_WAIT_ANY 3
+
+#define _POLLEE_ALWAYS 1
+#define _POLLEE_RM_ON_ERR (1 << 1)
+
+#endif /* __LUNAIX_UPOLL_H */
#define __SYSCALL_execve 54
#define __SYSCALL_fstat 55
+#define __SYSCALL_pollctl 56
#define __SYSCALL_MAX 0x100
--- /dev/null
+#include <lunaix/clock.h>
+#include <lunaix/device.h>
+#include <lunaix/fs.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/process.h>
+#include <lunaix/sched.h>
+#include <lunaix/spike.h>
+#include <lunaix/syscall.h>
+#include <lunaix/syscall_utils.h>
+
+#define MAX_POLLER_COUNT 16
+
+static inline void
+current_rmiopoll(int pld)
+{
+ iopoll_remove(__current->pid, &__current->pollctx, pld);
+}
+
+static struct iopoller*
+iopoll_getpoller(struct iopoll* ctx, int pld)
+{
+ if (pld < 0 || pld >= MAX_POLLER_COUNT) {
+ return NULL;
+ }
+
+ return ctx->pollers[pld];
+}
+
+static inline int
+__do_poll(struct poll_info* pinfo, int pld)
+{
+ struct iopoller* poller = iopoll_getpoller(&__current->pollctx, pld);
+ if (!poller) {
+ return 0;
+ }
+
+ struct device* dev;
+ int evt = 0;
+
+ if ((dev = device_cast(poller->file_ref->inode->data))) {
+ dev->ops.poll(dev);
+ } else {
+ // TODO handle generic file
+ /*
+ N.B. In Linux, polling on any of the non-device mapped file cause
+ immediate return of poller, in other words, the I/O signal on file is
+ always active. Which make it no use on monitoring any file
+ modifications. However, polling for such modifications
+ must go through inotify_* API. Which is not that elegant as it breaks
+ the nice consistency that *poll(2) should have. Let see want can we
+ do in Lunaix.
+ */
+ }
+
+ if (evt < 0) {
+ poll_setrevt(pinfo, _POLLERR);
+ goto has_err;
+ }
+
+ if ((evt = poll_checkevt(pinfo, evt))) {
+ poll_setrevt(pinfo, evt);
+ goto has_event;
+ }
+
+ return 0;
+
+has_err:
+ if ((pinfo->flags & _POLLEE_RM_ON_ERR)) {
+ current_rmiopoll(pld);
+ return 1;
+ }
+
+has_event:
+ if (!(pinfo->flags & _POLLEE_ALWAYS)) {
+ current_rmiopoll(pld);
+ }
+
+ return 1;
+}
+
+static int
+__do_poll_round(struct poll_info* pinfos, int ninfo)
+{
+ int nc = 0;
+ struct v_fd* fd_s;
+ struct device* dev;
+ for (int i = 0; i < ninfo; i++) {
+ struct poll_info* pinfo = &pinfos[i];
+ int pld = pinfo->pld;
+
+ if (__do_poll(pinfo, pld)) {
+ nc++;
+ }
+ }
+
+ return nc;
+}
+
+static int
+__do_poll_all(struct poll_info* pinfo)
+{
+ for (int i = 0; i < MAX_POLLER_COUNT; i++) {
+ if (!__do_poll(pinfo, i)) {
+ continue;
+ }
+
+ pinfo->pld = i;
+ return 1;
+ }
+
+ return 0;
+}
+
+#define fd2dev(fd) device_cast((fd)->file->inode->data)
+
+static int
+__alloc_pld()
+{
+ for (size_t i = 0; i < MAX_POLLER_COUNT; i++) {
+ if (!__current->pollctx.pollers[i]) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+static int
+__append_pollers(int* ds, int npoller)
+{
+ int err = 0, nc = 0;
+ struct v_fd* fd_s;
+ for (int i = 0; i < npoller; i++) {
+ int* fd = &ds[i];
+ if ((err = vfs_getfd(*fd, &fd_s))) {
+ *fd = err;
+ nc++;
+ continue;
+ }
+
+ int pld = iopoll_install(__current->pid, &__current->pollctx, fd_s);
+ if (pld < 0) {
+ nc++;
+ }
+
+ *fd = pld;
+ }
+
+ return nc;
+}
+
+static int
+__wait_until_event()
+{
+ block_current();
+ sched_yieldk();
+}
+
+void
+iopoll_init(struct iopoll* ctx)
+{
+ ctx->pollers = valloc(sizeof(ptr_t) * MAX_POLLER_COUNT);
+ ctx->n_poller = 0;
+}
+
+void
+iopoll_free(pid_t pid, struct iopoll* ctx)
+{
+ for (int i = 0; i < MAX_POLLER_COUNT; i++) {
+ struct iopoller* poller = ctx->pollers[i];
+ if (poller) {
+ vfs_pclose(poller->file_ref, pid);
+ llist_delete(&poller->evt_listener);
+ vfree(poller);
+ }
+
+ vfree(ctx->pollers);
+ }
+}
+
+void
+iopoll_wake_pollers(poll_evt_q* pollers_q)
+{
+ struct iopoller *pos, *n;
+ llist_for_each(pos, n, pollers_q, evt_listener)
+ {
+ struct proc_info* proc = get_process(pos->pid);
+ if (proc_hanged(proc)) {
+ resume_process(proc);
+ }
+
+ assert(!proc_terminated(proc));
+ }
+}
+
+int
+iopoll_remove(pid_t pid, struct iopoll* ctx, int pld)
+{
+ struct iopoller* poller = ctx->pollers[pld];
+ if (!poller) {
+ return ENOENT;
+ }
+
+ vfs_pclose(poller->file_ref, pid);
+ vfree(poller);
+ ctx->pollers[pld] = NULL;
+ ctx->n_poller--;
+
+ return 0;
+}
+
+int
+iopoll_install(pid_t pid, struct iopoll* pollctx, struct v_fd* fd)
+{
+ int pld = __alloc_pld();
+ if (pld < 0) {
+ return EMFILE;
+ }
+
+ struct iopoller* iop = valloc(sizeof(struct iopoller));
+ *iop = (struct iopoller){
+ .file_ref = fd->file,
+ .pid = pid,
+ };
+
+ vfs_ref_file(fd->file);
+ __current->pollctx.pollers[pld] = iop;
+ __current->pollctx.n_poller++;
+
+ struct device* dev;
+ if ((dev = fd2dev(fd))) {
+ iopoll_listen_on(iop, &dev->pollers);
+ } else {
+ // TODO handle generic file
+ }
+
+ return pld;
+}
+
+__DEFINE_LXSYSCALL2(int, pollctl, int, action, va_list, va)
+{
+ int retcode = 0;
+ switch (action) {
+ case _SPOLL_ADD: {
+ int* ds = va_arg(va, int*);
+ int nds = va_arg(va, int);
+ retcode = __append_pollers(ds, nds);
+ } break;
+ case _SPOLL_RM: {
+ int pld = va_arg(va, int);
+ retcode = iopoll_remove(__current->pid, &__current->pollctx, pld);
+ } break;
+ case _SPOLL_WAIT: {
+ struct poll_info* pinfos = va_arg(va, struct poll_info*);
+ int npinfos = va_arg(va, int);
+ int timeout = va_arg(va, int);
+
+ time_t t1 = clock_systime() + timeout;
+ while (!(retcode == __do_poll_round(pinfos, npinfos))) {
+ if (timeout >= 0 && t1 >= clock_systime()) {
+ break;
+ }
+ __wait_until_event();
+ }
+ } break;
+ case _SPOLL_WAIT_ANY: {
+ struct poll_info* pinfo = va_arg(va, struct poll_info*);
+ int timeout = va_arg(va, int);
+
+ time_t t1 = clock_systime() + timeout;
+ while (!(retcode == __do_poll_all(pinfo))) {
+ if (timeout >= 0 && t1 >= clock_systime()) {
+ break;
+ }
+ __wait_until_event();
+ }
+ } break;
+ default:
+ retcode = EINVAL;
+ break;
+ }
+
+ return DO_STATUS(retcode);
+}
\ No newline at end of file
((vmr)->start > (addr) && ((addr) + (len)) > (vmr)->end)
static void
-__unmap_overlapped_cases(struct mm_region* vmr, ptr_t* addr, size_t* length)
+__unmap_overlapped_cases(ptr_t mnt,
+ struct mm_region* vmr,
+ ptr_t* addr,
+ size_t* length)
{
// seg start, umapped segement start
ptr_t seg_start = *addr, umps_start = 0;
if (CASE_HITI(vmr, seg_start, seg_len)) {
size_t new_start = seg_start + seg_len;
+
+ // Require a split
if (new_start < vmr->end) {
struct mm_region* region = region_dup(vmr);
if (region->mfile) {
shrink = vmr->end - vmr->start;
umps_len = shrink;
umps_start = vmr->start;
- } else {
- fail("invalid case");
+ }
+
+ mem_sync_pages(mnt, vmr, vmr->start, umps_len, 0);
+ for (size_t i = 0; i < umps_len; i += PG_SIZE) {
+ ptr_t pa = vmm_del_mapping(mnt, vmr->start + i);
+ if (pa) {
+ pmm_free_page(vmr->proc_vms->pid, pa);
+ }
}
vmr->start += displ;
while (&pos->head != regions && length) {
n = container_of(pos->head.next, typeof(*pos), head);
- __unmap_overlapped_cases(pos, &cur_addr, &length);
+ __unmap_overlapped_cases(mnt, pos, &cur_addr, &length);
pos = n;
}
llist_init_head(&proc->children);
llist_init_head(&proc->grp_member);
llist_init_head(&proc->sleep.sleepers);
+
+ iopoll_init(&proc->pollctx);
waitq_init(&proc->waitqueue);
sched_ctx._procs[i] = proc;
llist_delete(&proc->tasks);
llist_delete(&proc->sleep.sleepers);
+ iopoll_free(pid, &proc->pollctx);
+
taskfs_invalidate(pid);
if (proc->cwd) {
all-debug: bootable-debug
@echo "Dumping the disassembled kernel code to $(kbuild_dir)/kdump.txt"
- @i686-elf-objdump -S $(kbin) > $(kbuild_dir)/kdump.txt
clean:
@rm -rf $(kbuild_dir) || exit 1