+ struct hbucket* slot;
+ struct v_inode *pos, *n, *found = NULL;
+
+ cache_atomic_read(&sb->i_cache,
+ {
+ slot = &sb->i_cache.pool[i_id & VFS_HASH_MASK];
+
+ hashtable_bucket_foreach(slot, pos, n, hash_list)
+ {
+ if (pos->id != i_id) {
+ continue;
+ }
+
+ lru_use_one(inode_lru, &pos->lru);
+ found = pos;
+ break;
+ }
+ });
+
+ return found;
+}
+
+void
+vfs_i_addhash(struct v_inode* inode)
+{
+ vfs_vncache_add(inode_cache(inode), inode->id, &inode->hash_list);
+}
+
+struct v_inode*
+vfs_i_alloc(struct v_superblock* sb)
+{
+ assert(sb->ops.init_inode);
+
+ struct v_inode* inode;
+ if (!(inode = cake_grab(inode_pile))) {
+ lru_evict_half(inode_lru);
+ if (!(inode = cake_grab(inode_pile))) {
+ return NULL;
+ }
+ }
+
+ memset(inode, 0, sizeof(*inode));
+ mutex_init(&inode->lock);
+ llist_init_head(&inode->xattrs);
+ llist_init_head(&inode->aka_dnodes);
+
+ sb->ops.init_inode(sb, inode);
+
+ inode->ctime = clock_unixtime();
+ inode->atime = inode->ctime;
+ inode->mtime = inode->ctime;
+
+ vfs_i_assign_sb(inode, sb);
+ lru_use_one(inode_lru, &inode->lru);
+
+ return inode;
+}
+
+void
+vfs_i_free(struct v_inode* inode)
+{
+ if (inode->pg_cache) {
+ pcache_release(inode->pg_cache);
+ vfree(inode->pg_cache);
+ }
+
+ // we don't need to sync inode.
+ // If an inode can be free, then it must be properly closed.
+ // Hence it must be synced already!
+ if (inode->destruct) {
+ inode->destruct(inode);
+ }
+
+ vfs_sb_unref(inode->sb);
+
+ hlist_delete(&inode->hash_list);
+ lru_remove(inode_lru, &inode->lru);
+
+ cake_release(inode_pile, inode);
+}
+
+/* ---- System call definition and support ---- */
+
+// make a new name when not exists
+#define FLOC_MAYBE_MKNAME 1
+
+// name must be non-exist and made.
+#define FLOC_MKNAME 2
+
+// no follow symlink
+#define FLOC_NOFOLLOW 4
+
+int
+vfs_getfd(int fd, struct v_fd** fd_s)
+{
+ struct v_fdtable* fdtab;
+
+ if (!TEST_FD(fd)) {
+ return EBADF;
+ }
+
+ fdtab = __current->fdtable;
+
+ lock_fdtable(fdtab);
+ *fd_s = __current->fdtable->fds[fd];
+ unlock_fdtable(fdtab);
+
+ return !*fd_s ? EBADF : 0;
+}
+
+static int
+__vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
+ unsigned int itype, dev_t* dev)
+{
+ int errno;
+
+ errno = parent->ops->create(parent, dnode, itype);
+ if (errno) {
+ return errno;
+ }
+
+ return 0;
+}
+
+struct file_locator {
+ struct v_dnode* dir;
+ struct v_dnode* file;
+ bool fresh;
+};
+
+/**
+ * @brief unlock the file locator (floc) if possible.
+ * If the file to be located if not exists, and
+ * any FLOC_*MKNAME flag is set, then the parent
+ * dnode will be locked until the file has been properly
+ * finalised by subsequent logic.
+ *
+ * @param floc
+ */
+static inline void
+__floc_try_unlock(struct file_locator* floc)
+{
+ if (floc->fresh) {
+ assert(floc->dir);
+ unlock_dnode(floc->dir);
+ }
+}
+
+static int
+__vfs_try_locate_file(const char* path,
+ struct file_locator* floc,
+ int options)
+{
+ char name_str[VFS_NAME_MAXLEN];
+ struct v_dnode *fdir, *file;
+ struct hstr name = HSTR(name_str, 0);
+ int errno, woption = 0;
+
+ if ((options & FLOC_NOFOLLOW)) {
+ woption |= VFS_WALK_NOFOLLOW;
+ options &= ~FLOC_NOFOLLOW;
+ }
+
+ floc->fresh = false;
+ name_str[0] = 0;
+ errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
+ if (errno) {
+ return errno;
+ }
+
+ lock_dnode(fdir);
+
+ errno = vfs_walk(fdir, name.value, &file, NULL, woption);
+
+ if (errno && errno != ENOENT) {
+ goto error;
+ }
+
+ if (!errno && (options & FLOC_MKNAME)) {
+ errno = EEXIST;
+ goto error;
+ }
+
+ if (!errno) {
+ // the file present, no need to hold the directory lock
+ unlock_dnode(fdir);
+ goto done;
+ }
+
+ // errno == ENOENT
+ if (!options) {
+ goto error;
+ }
+
+ errno = vfs_check_writable(fdir);
+ if (errno) {
+ goto error;
+ }
+
+ floc->fresh = true;
+
+ file = vfs_d_alloc(fdir, &name);
+
+ if (!file) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ vfs_dcache_add(fdir, file);
+
+done:
+ floc->dir = fdir;
+ floc->file = file;
+
+ return errno;
+
+error:
+ unlock_dnode(fdir);
+ return errno;
+}
+
+
+static bool
+__check_unlinkable(struct v_dnode* dnode)
+{
+ int acl;
+ bool wr_self, wr_parent;
+ struct v_dnode* parent;
+
+ parent = dnode->parent;
+ acl = dnode->inode->acl;
+
+ wr_self = check_allow_write(dnode->inode);
+ wr_parent = check_allow_write(parent->inode);
+
+ if (!fsacl_test(acl, svtx)) {
+ return wr_self;
+ }
+
+ if (current_euid() == dnode->inode->uid) {
+ return true;
+ }
+
+ return wr_self && wr_parent;
+}
+
+int
+vfs_do_open(const char* path, int options)
+{
+ int errno, fd, loptions = 0;
+ struct v_dnode *dentry, *file;
+ struct v_file* ofile = NULL;
+ struct file_locator floc;
+ struct v_inode* inode;
+
+ if ((options & FO_CREATE)) {
+ loptions |= FLOC_MAYBE_MKNAME;
+ } else if ((options & FO_NOFOLLOW)) {
+ loptions |= FLOC_NOFOLLOW;
+ }
+
+ errno = __vfs_try_locate_file(path, &floc, loptions);
+
+ if (errno || (errno = vfs_alloc_fdslot(&fd))) {
+ return errno;
+ }
+
+ file = floc.file;
+ dentry = floc.dir;
+
+ if (floc.fresh) {
+ errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
+ if (errno) {
+ vfs_d_free(file);
+ __floc_try_unlock(&floc);
+ return errno;
+ }
+
+ __floc_try_unlock(&floc);
+ }
+
+
+ if ((errno = vfs_open(file, &ofile))) {
+ return errno;
+ }
+
+ inode = ofile->inode;
+ lock_inode(inode);
+
+ struct v_fd* fd_s = cake_grab(fd_pile);
+ memset(fd_s, 0, sizeof(*fd_s));
+
+ if ((options & O_TRUNC)) {
+ file->inode->fsize = 0;
+ }
+
+ if (vfs_get_dtype(inode->itype) == DT_DIR) {
+ ofile->f_pos = 0;
+ }
+
+ fd_s->file = ofile;
+ fd_s->flags = options;
+ __current->fdtable->fds[fd] = fd_s;
+
+ unlock_inode(inode);
+
+ return fd;
+}
+
+__DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
+{
+ int errno = vfs_do_open(path, options);
+ return DO_STATUS_OR_RETURN(errno);
+}
+
+__DEFINE_LXSYSCALL1(int, close, int, fd)
+{
+ struct v_fd* fd_s;
+ int errno = 0;
+ if ((errno = vfs_getfd(fd, &fd_s))) {
+ goto done_err;
+ }
+
+ if ((errno = vfs_close(fd_s->file))) {
+ goto done_err;
+ }
+
+ cake_release(fd_pile, fd_s);
+ __current->fdtable->fds[fd] = 0;
+
+done_err:
+ return DO_STATUS(errno);
+}
+
+void
+__vfs_readdir_callback(struct dir_context* dctx,
+ const char* name,
+ const int len,
+ const int dtype)
+{
+ struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
+ strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));