3 * @author Lunaixsky (zelong56@gmail.com)
4 * @brief Lunaix virtual file system - an abstraction layer for all file system.
8 * @copyright Copyright (c) 2022
12 // Welcome to The Mountain O'Shit! :)
15 TODO vfs & device todos checklist
17 It is overseen by Twilight Sparkle ;)
19 1. Get inodes hooked into lru (CHECKED)
20 2. Get dnodes hooked into lru (CHECKED)
21 3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
22 4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
23 [good idea] or a constructor/destructor pattern in cake allocator ?
24 5. (mount) Figure out a way to identify a busy mount point before unmount
25 maybe a unified mount_point structure that maintain a referencing
26 counter on any dnodes within the subtree? Such a counter will only
27 increament if a file is opened or a dnode is being used as working
28 directory and decreamenting conversely. (CHECKED)
29 6. (mount) Ability to track all mount points (including sub-mounts)
30 so we can be confident to clean up everything when we
32 7. (mount) Figure out a way to acquire the device represented by a dnode.
33 so it can be used to mount. (e.g. we wish to get `struct device*`
34 out of the dnode at /dev/sda)
35 [tip] we should pay attention at twifs and add a private_data field
36 under struct v_dnode? (CHECKED)
37 8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
38 9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
39 image file using a so called "loopback" pseudo device. Maybe
40 we can do similar thing in Lunaix? A block device emulation
41 above the regular file when we mount it on.
42 10. (device) device number (dev_t) allocation
43 [good idea] <class>:<subclass>:<uniq_id> composition (CHECKED)
46 #include <klibc/string.h>
47 #include <lunaix/foptions.h>
48 #include <lunaix/fs.h>
49 #include <lunaix/mm/cake.h>
50 #include <lunaix/mm/valloc.h>
51 #include <lunaix/process.h>
52 #include <lunaix/spike.h>
53 #include <lunaix/syscall.h>
54 #include <lunaix/syscall_utils.h>
56 #include <lunaix/fs/twifs.h>
58 #include <usr/lunaix/dirent_defs.h>
60 #define INODE_ACCESSED 0
61 #define INODE_MODIFY 1
63 static struct cake_pile* dnode_pile;
64 static struct cake_pile* inode_pile;
65 static struct cake_pile* file_pile;
66 static struct cake_pile* superblock_pile;
67 static struct cake_pile* fd_pile;
69 struct v_dnode* vfs_sysroot = NULL;
71 struct lru_zone *dnode_lru, *inode_lru;
73 struct hstr vfs_ddot = HSTR("..", 2);
74 struct hstr vfs_dot = HSTR(".", 1);
75 struct hstr vfs_empty = HSTR("", 0);
78 __vfs_try_evict_dnode(struct lru_node* obj);
81 __vfs_try_evict_inode(struct lru_node* obj);
86 // 为他们专门创建一个蛋糕堆,而不使用valloc,这样我们可以最小化内碎片的产生
87 dnode_pile = cake_new_pile("dnode_cache", sizeof(struct v_dnode), 1, 0);
88 inode_pile = cake_new_pile("inode_cache", sizeof(struct v_inode), 1, 0);
89 file_pile = cake_new_pile("file_cache", sizeof(struct v_file), 1, 0);
90 fd_pile = cake_new_pile("fd_cache", sizeof(struct v_fd), 1, 0);
92 cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
94 dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
95 inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
97 hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
98 hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
101 vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty);
102 vfs_sysroot->parent = vfs_sysroot;
104 vfs_ref_dnode(vfs_sysroot);
107 static inline struct hbucket*
108 __dcache_hash(struct v_dnode* parent, u32_t* hash)
110 struct hbucket* d_cache;
113 d_cache = parent->super_block->d_cache;
115 _hash = _hash ^ (_hash >> VFS_HASHBITS);
116 _hash += (u32_t)__ptr(parent);
119 return &d_cache[_hash & VFS_HASH_MASK];
123 __sync_inode_nolock(struct v_inode* inode)
125 pcache_commit_all(inode);
128 if (inode->ops->sync) {
129 errno = inode->ops->sync(inode);
136 vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
138 if (!str->len || HSTR_EQ(str, &vfs_dot))
141 if (HSTR_EQ(str, &vfs_ddot)) {
142 return parent->parent;
145 u32_t hash = str->hash;
146 struct hbucket* slot = __dcache_hash(parent, &hash);
148 struct v_dnode *pos, *n;
149 hashtable_bucket_foreach(slot, pos, n, hash_list)
151 if (pos->name.hash == hash && pos->parent == parent) {
159 __vfs_touch_inode(struct v_inode* inode, const int type)
161 if (type == INODE_MODIFY) {
162 inode->mtime = clock_unixtime();
165 else if (type == INODE_ACCESSED) {
166 inode->atime = clock_unixtime();
169 lru_use_one(inode_lru, &inode->lru);
173 vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
177 dnode->ref_count = 1;
178 dnode->parent = parent;
179 llist_append(&parent->children, &dnode->siblings);
181 struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
182 hlist_add(&bucket->head, &dnode->hash_list);
186 vfs_dcache_remove(struct v_dnode* dnode)
189 assert(dnode->ref_count == 1);
191 llist_delete(&dnode->siblings);
192 llist_delete(&dnode->aka_list);
193 hlist_delete(&dnode->hash_list);
195 dnode->parent = NULL;
196 dnode->ref_count = 0;
200 vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
204 hstr_rehash(&dnode->name, HSTR_FULL_HASH);
205 vfs_dcache_remove(dnode);
206 vfs_dcache_add(new_parent, dnode);
210 vfs_open(struct v_dnode* dnode, struct v_file** file)
212 struct v_inode* inode = dnode->inode;
214 if (!inode || !inode->ops->open) {
220 struct v_file* vfile = cake_grab(file_pile);
221 memset(vfile, 0, sizeof(*vfile));
223 vfile->dnode = dnode;
224 vfile->inode = inode;
225 vfile->ref_count = 1;
226 vfile->ops = inode->default_fops;
228 if (check_regfile_node(inode) && !inode->pg_cache) {
229 struct pcache* pcache = vzalloc(sizeof(struct pcache));
231 pcache->master = inode;
232 inode->pg_cache = pcache;
235 int errno = inode->ops->open(inode, vfile);
237 cake_release(file_pile, vfile);
239 vfs_ref_dnode(dnode);
251 vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
253 if (assign_to->inode) {
254 llist_delete(&assign_to->aka_list);
255 assign_to->inode->link_count--;
258 llist_append(&inode->aka_dnodes, &assign_to->aka_list);
259 assign_to->inode = inode;
264 vfs_link(struct v_dnode* to_link, struct v_dnode* name)
268 if ((errno = vfs_check_writable(to_link))) {
272 lock_inode(to_link->inode);
273 if (to_link->super_block->root != name->super_block->root) {
275 } else if (!to_link->inode->ops->link) {
277 } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
278 vfs_assign_inode(name, to_link->inode);
280 unlock_inode(to_link->inode);
286 vfs_pclose(struct v_file* file, pid_t pid)
288 struct v_inode* inode;
293 if (vfs_check_duped_file(file)) {
294 vfs_unref_file(file);
300 * This happened when process is terminated while blocking on read.
301 * In that case, the process is still holding the inode lock and it
302 will never get released.
303 * The unlocking should also include ownership check.
305 * To see why, consider two process both open the same file both with
307 * Process A: busy on reading x
308 * Process B: do nothing with x
309 * Assuming that, after a very short time, process B get terminated
310 * while process A is still busy in it's reading business. By this
311 * design, the inode lock of this file x is get released by B rather
312 * than A. And this will cause a probable race condition on A if other
313 * process is writing to this file later after B exit.
315 mutex_unlock_for(&inode->lock, pid);
317 // now regain lock for inode syncing
321 if ((errno = file->ops->close(file))) {
325 vfs_unref_dnode(file->dnode);
326 cake_release(file_pile, file);
328 pcache_commit_all(inode);
331 if (!inode->open_count) {
332 __sync_inode_nolock(inode);
341 vfs_close(struct v_file* file)
343 return vfs_pclose(file, __current->pid);
347 vfs_free_fd(struct v_fd* fd)
349 cake_release(fd_pile, fd);
353 vfs_isync(struct v_inode* inode)
357 int errno = __sync_inode_nolock(inode);
365 vfs_fsync(struct v_file* file)
368 if ((errno = vfs_check_writable(file->dnode))) {
372 return vfs_isync(file->inode);
376 vfs_alloc_fdslot(int* fd)
378 for (size_t i = 0; i < VFS_MAX_FD; i++) {
379 if (!__current->fdtable->fds[i]) {
390 struct v_superblock* sb = cake_grab(superblock_pile);
391 memset(sb, 0, sizeof(*sb));
392 llist_init_head(&sb->sb_list);
394 sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
395 sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
402 vfs_sb_ref(struct v_superblock* sb)
408 vfs_sb_unref(struct v_superblock* sb)
410 assert(sb->ref_count);
413 if (likely(sb->ref_count)) {
417 if (sb->ops.release) {
424 cake_release(superblock_pile, sb);
428 __vfs_try_evict_dnode(struct lru_node* obj)
430 struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
432 if (!dnode->ref_count) {
440 __vfs_try_evict_inode(struct lru_node* obj)
442 struct v_inode* inode = container_of(obj, struct v_inode, lru);
444 if (!inode->link_count && !inode->open_count) {
452 vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
454 struct v_dnode* dnode = cake_grab(dnode_pile);
456 lru_evict_half(dnode_lru);
458 if (!(dnode = cake_grab(dnode_pile))) {
463 memset(dnode, 0, sizeof(*dnode));
464 llist_init_head(&dnode->children);
465 llist_init_head(&dnode->siblings);
466 llist_init_head(&dnode->aka_list);
467 mutex_init(&dnode->lock);
469 dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
471 hstrcpy(&dnode->name, name);
474 vfs_d_assign_sb(dnode, parent->super_block);
475 dnode->mnt = parent->mnt;
478 lru_use_one(dnode_lru, &dnode->lru);
484 vfs_d_free(struct v_dnode* dnode)
486 assert(dnode->ref_count == 1);
489 assert(dnode->inode->link_count > 0);
490 dnode->inode->link_count--;
493 vfs_dcache_remove(dnode);
494 // Make sure the children de-referencing their parent.
495 // With lru presented, the eviction will be propagated over the entire
496 // detached subtree eventually
497 struct v_dnode *pos, *n;
498 llist_for_each(pos, n, &dnode->children, siblings)
500 vfs_dcache_remove(pos);
503 if (dnode->destruct) {
504 dnode->destruct(dnode);
507 vfs_sb_unref(dnode->super_block);
508 vfree((void*)dnode->name.value);
509 cake_release(dnode_pile, dnode);
513 vfs_i_find(struct v_superblock* sb, u32_t i_id)
515 struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
516 struct v_inode *pos, *n;
517 hashtable_bucket_foreach(slot, pos, n, hash_list)
519 if (pos->id == i_id) {
520 lru_use_one(inode_lru, &pos->lru);
529 vfs_i_addhash(struct v_inode* inode)
531 struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
533 hlist_delete(&inode->hash_list);
534 hlist_add(&slot->head, &inode->hash_list);
538 vfs_i_alloc(struct v_superblock* sb)
540 assert(sb->ops.init_inode);
542 struct v_inode* inode;
543 if (!(inode = cake_grab(inode_pile))) {
544 lru_evict_half(inode_lru);
545 if (!(inode = cake_grab(inode_pile))) {
550 memset(inode, 0, sizeof(*inode));
551 mutex_init(&inode->lock);
552 llist_init_head(&inode->xattrs);
553 llist_init_head(&inode->aka_dnodes);
555 sb->ops.init_inode(sb, inode);
557 inode->ctime = clock_unixtime();
558 inode->atime = inode->ctime;
559 inode->mtime = inode->ctime;
561 vfs_i_assign_sb(inode, sb);
562 lru_use_one(inode_lru, &inode->lru);
567 vfs_i_free(struct v_inode* inode)
569 if (inode->pg_cache) {
570 pcache_release(inode->pg_cache);
571 vfree(inode->pg_cache);
573 // we don't need to sync inode.
574 // If an inode can be free, then it must be properly closed.
575 // Hence it must be synced already!
576 if (inode->destruct) {
577 inode->destruct(inode);
580 vfs_sb_unref(inode->sb);
581 hlist_delete(&inode->hash_list);
582 cake_release(inode_pile, inode);
585 /* ---- System call definition and support ---- */
587 // make a new name when not exists
588 #define FLOC_MAYBE_MKNAME 1
590 // name must be non-exist and made.
591 #define FLOC_MKNAME 2
594 #define FLOC_NOFOLLOW 4
597 vfs_getfd(int fd, struct v_fd** fd_s)
599 if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
606 __vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
607 unsigned int itype, dev_t* dev)
611 errno = parent->ops->create(parent, dnode, itype);
619 struct file_locator {
621 struct v_dnode* file;
626 * @brief unlock the file locator (floc) if possible.
627 * If the file to be located if not exists, and
628 * any FLOC_*MKNAME flag is set, then the parent
629 * dnode will be locked until the file has been properly
630 * finalised by subsequent logic.
635 __floc_try_unlock(struct file_locator* floc)
639 unlock_dnode(floc->dir);
644 __vfs_try_locate_file(const char* path,
645 struct file_locator* floc,
648 char name_str[VFS_NAME_MAXLEN];
649 struct v_dnode *fdir, *file;
650 struct hstr name = HSTR(name_str, 0);
651 int errno, woption = 0;
653 if ((options & FLOC_NOFOLLOW)) {
654 woption |= VFS_WALK_NOFOLLOW;
655 options &= ~FLOC_NOFOLLOW;
660 errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
667 errno = vfs_walk(fdir, name.value, &file, NULL, woption);
669 if (errno && errno != ENOENT) {
673 if (!errno && (options & FLOC_MKNAME)) {
679 // the file present, no need to hold the directory lock
689 errno = vfs_check_writable(fdir);
696 file = vfs_d_alloc(fdir, &name);
703 vfs_dcache_add(fdir, file);
718 __check_unlinkable(struct v_dnode* dnode)
721 bool wr_self, wr_parent;
722 struct v_dnode* parent;
724 parent = dnode->parent;
725 acl = dnode->inode->acl;
727 wr_self = check_allow_write(dnode->inode);
728 wr_parent = check_allow_write(parent->inode);
730 if (!fsacl_test(acl, svtx)) {
734 if (current_euid() == dnode->inode->uid) {
738 return wr_self && wr_parent;
742 vfs_do_open(const char* path, int options)
744 int errno, fd, loptions = 0;
745 struct v_dnode *dentry, *file;
746 struct v_file* ofile = NULL;
747 struct file_locator floc;
748 struct v_inode* inode;
750 if ((options & FO_CREATE)) {
751 loptions |= FLOC_MAYBE_MKNAME;
752 } else if ((options & FO_NOFOLLOW)) {
753 loptions |= FLOC_NOFOLLOW;
756 errno = __vfs_try_locate_file(path, &floc, loptions);
758 if (errno || (errno = vfs_alloc_fdslot(&fd))) {
766 errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
769 __floc_try_unlock(&floc);
773 __floc_try_unlock(&floc);
777 if ((errno = vfs_open(file, &ofile))) {
781 inode = ofile->inode;
784 struct v_fd* fd_s = cake_grab(fd_pile);
785 memset(fd_s, 0, sizeof(*fd_s));
787 if ((options & O_TRUNC)) {
788 file->inode->fsize = 0;
791 if (vfs_get_dtype(inode->itype) == DT_DIR) {
796 fd_s->flags = options;
797 __current->fdtable->fds[fd] = fd_s;
804 __DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
806 int errno = vfs_do_open(path, options);
807 return DO_STATUS_OR_RETURN(errno);
810 __DEFINE_LXSYSCALL1(int, close, int, fd)
814 if ((errno = vfs_getfd(fd, &fd_s))) {
818 if ((errno = vfs_close(fd_s->file))) {
822 cake_release(fd_pile, fd_s);
823 __current->fdtable->fds[fd] = 0;
826 return DO_STATUS(errno);
830 __vfs_readdir_callback(struct dir_context* dctx,
835 struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
836 strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
838 dent->d_type = dtype;
841 __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
846 if ((errno = vfs_getfd(fd, &fd_s))) {
850 struct v_inode* inode = fd_s->file->inode;
854 if (!check_directory_node(inode)) {
859 if (!check_allow_read(inode)) {
864 struct dir_context dctx = (struct dir_context) {
866 .read_complete_callback = __vfs_readdir_callback
869 if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
879 return DO_STATUS_OR_RETURN(errno);
882 __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
886 struct v_inode* inode;
888 if ((errno = vfs_getfd(fd, &fd_s))) {
892 struct v_file* file = fd_s->file;
893 if (check_directory_node(file->inode)) {
898 if (!check_allow_read(file->inode)) {
906 __vfs_touch_inode(inode, INODE_ACCESSED);
908 if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
909 errno = file->ops->read(inode, buf, count, file->f_pos);
911 errno = pcache_read(inode, buf, count, file->f_pos);
915 file->f_pos += errno;
923 return DO_STATUS(errno);
926 __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
930 if ((errno = vfs_getfd(fd, &fd_s))) {
934 struct v_inode* inode;
935 struct v_file* file = fd_s->file;
937 if ((errno = vfs_check_writable(file->dnode))) {
941 if (check_directory_node(file->inode)) {
949 __vfs_touch_inode(inode, INODE_MODIFY);
950 if ((fd_s->flags & O_APPEND)) {
951 file->f_pos = inode->fsize;
954 if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
955 errno = file->ops->write(inode, buf, count, file->f_pos);
957 errno = pcache_write(inode, buf, count, file->f_pos);
961 file->f_pos += errno;
962 inode->fsize = MAX(inode->fsize, file->f_pos);
971 return DO_STATUS(errno);
974 __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options)
978 if ((errno = vfs_getfd(fd, &fd_s))) {
982 struct v_file* file = fd_s->file;
983 struct v_inode* inode = file->inode;
985 if (!file->ops->seek) {
990 if (!check_allow_read(inode)) {
998 int fpos = file->f_pos;
1000 if (vfs_get_dtype(inode->itype) == DT_DIR) {
1001 options = (options != FSEEK_END) ? options : FSEEK_SET;
1006 overflow = sadd_of((int)file->f_pos, offset, &fpos);
1009 overflow = sadd_of((int)inode->fsize, offset, &fpos);
1020 errno = file->ops->seek(file, fpos);
1023 unlock_inode(inode);
1026 return DO_STATUS(errno);
1030 vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth)
1037 return ENAMETOOLONG;
1042 if (dnode->parent != dnode) {
1043 len = vfs_get_path(dnode->parent, buf, size, depth + 1);
1050 if (!len || buf[len - 1] != VFS_PATH_DELIM) {
1051 buf[len++] = VFS_PATH_DELIM;
1054 size_t cpy_size = MIN(dnode->name.len, size - len);
1055 strncpy(buf + len, dnode->name.value, cpy_size);
1062 vfs_readlink(struct v_dnode* dnode, char* buf, size_t size)
1065 struct v_inode* inode = dnode->inode;
1067 if (!check_symlink_node(inode)) {
1071 if (!inode->ops->read_symlink) {
1075 if (!check_allow_read(inode)) {
1081 int errno = inode->ops->read_symlink(inode, &link);
1083 strncpy(buf, link, MIN(size, (size_t)errno));
1086 unlock_inode(inode);
1091 vfs_get_dtype(int itype)
1093 int dtype = DT_FILE;
1094 if (check_itype(itype, VFS_IFSYMLINK)) {
1095 dtype |= DT_SYMLINK;
1098 if (check_itype(itype, VFS_IFDIR)) {
1108 __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
1112 if ((errno = vfs_getfd(fd, &fd_s))) {
1116 struct v_dnode* dnode;
1117 errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
1124 return DO_STATUS(errno);
1127 __DEFINE_LXSYSCALL3(int, readlink, const char*, path, char*, buf, size_t, size)
1130 struct v_dnode* dnode;
1131 if (!(errno = vfs_walk_proc(path, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1132 errno = vfs_readlink(dnode, buf, size);
1139 return DO_STATUS(errno);
1142 __DEFINE_LXSYSCALL4(
1143 int, readlinkat, int, dirfd, const char*, pathname, char*, buf, size_t, size)
1147 if ((errno = vfs_getfd(dirfd, &fd_s))) {
1151 pathname = pathname ? pathname : "";
1153 struct v_dnode* dnode;
1154 if (!(errno = vfs_walk(
1155 fd_s->file->dnode, pathname, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1156 errno = vfs_readlink(fd_s->file->dnode, buf, size);
1164 return DO_STATUS(errno);
1169 When we perform operation that could affect the layout of
1170 directory (i.e., rename, mkdir, rmdir). We must lock the parent dir
1171 whenever possible. This will blocking any ongoing path walking to reach
1172 it hence avoid any partial state.
1175 __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
1178 struct v_dnode* dnode;
1179 if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1180 return DO_STATUS(errno);
1185 if (!__check_unlinkable(dnode)) {
1190 if ((errno = vfs_check_writable(dnode))) {
1194 if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
1199 if (dnode->ref_count > 1 || dnode->inode->open_count) {
1204 if (!llist_empty(&dnode->children)) {
1209 struct v_dnode* parent = dnode->parent;
1217 lock_inode(parent->inode);
1219 if (check_directory_node(dnode->inode)) {
1220 errno = parent->inode->ops->rmdir(parent->inode, dnode);
1222 vfs_dcache_remove(dnode);
1228 unlock_inode(parent->inode);
1229 unlock_dnode(parent);
1232 unlock_dnode(dnode);
1233 return DO_STATUS(errno);
1236 __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
1239 struct v_dnode *parent, *dir;
1240 char name_value[VFS_NAME_MAXLEN];
1241 struct hstr name = HHSTR(name_value, 0, 0);
1243 if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
1247 if (!(errno = vfs_walk(parent, name_value, &dir, NULL, 0))) {
1252 if ((errno = vfs_check_writable(parent))) {
1256 if (!(dir = vfs_d_alloc(parent, &name))) {
1261 struct v_inode* inode = parent->inode;
1266 if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
1268 } else if (!inode->ops->mkdir) {
1270 } else if (!check_directory_node(inode)) {
1272 } else if (!(errno = inode->ops->mkdir(inode, dir))) {
1273 vfs_dcache_add(parent, dir);
1280 unlock_inode(inode);
1281 unlock_dnode(parent);
1283 return DO_STATUS(errno);
1287 __vfs_do_unlink(struct v_dnode* dnode)
1290 struct v_inode* inode = dnode->inode;
1292 if (dnode->ref_count > 1) {
1296 if (!__check_unlinkable(dnode)) {
1300 if ((errno = vfs_check_writable(dnode))) {
1306 if (inode->open_count) {
1308 } else if (!check_directory_node(inode)) {
1309 errno = inode->ops->unlink(inode, dnode);
1317 unlock_inode(inode);
1322 __DEFINE_LXSYSCALL1(int, unlink, const char*, pathname)
1325 struct v_dnode* dnode;
1326 if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1330 errno = __vfs_do_unlink(dnode);
1333 return DO_STATUS(errno);
1336 __DEFINE_LXSYSCALL2(int, unlinkat, int, fd, const char*, pathname)
1340 if ((errno = vfs_getfd(fd, &fd_s))) {
1344 struct v_dnode* dnode;
1345 if (!(errno = vfs_walk(fd_s->file->dnode, pathname, &dnode, NULL, 0))) {
1346 errno = __vfs_do_unlink(dnode);
1350 return DO_STATUS(errno);
1353 __DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
1356 struct file_locator floc;
1357 struct v_dnode *to_link, *name_file;
1359 errno = __vfs_try_locate_file(oldpath, &floc, 0);
1364 __floc_try_unlock(&floc);
1366 to_link = floc.file;
1367 errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
1372 name_file = floc.file;
1373 errno = vfs_link(to_link, name_file);
1375 vfs_d_free(name_file);
1379 __floc_try_unlock(&floc);
1380 return DO_STATUS(errno);
1383 __DEFINE_LXSYSCALL1(int, fsync, int, fildes)
1388 if (!(errno = vfs_getfd(fildes, &fd_s))) {
1389 errno = vfs_fsync(fd_s->file);
1392 return DO_STATUS(errno);
1396 vfs_dup_fd(struct v_fd* old, struct v_fd** new)
1399 struct v_fd* copied = cake_grab(fd_pile);
1401 memcpy(copied, old, sizeof(struct v_fd));
1403 vfs_ref_file(old->file);
1411 vfs_dup2(int oldfd, int newfd)
1413 if (newfd == oldfd) {
1418 struct v_fd *oldfd_s, *newfd_s;
1419 if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1423 if (!TEST_FD(newfd)) {
1428 newfd_s = __current->fdtable->fds[newfd];
1429 if (newfd_s && (errno = vfs_close(newfd_s->file))) {
1433 if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1434 __current->fdtable->fds[newfd] = newfd_s;
1439 return DO_STATUS(errno);
1442 __DEFINE_LXSYSCALL2(int, dup2, int, oldfd, int, newfd)
1444 return vfs_dup2(oldfd, newfd);
1447 __DEFINE_LXSYSCALL1(int, dup, int, oldfd)
1450 struct v_fd *oldfd_s, *newfd_s;
1451 if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1455 if (!(errno = vfs_alloc_fdslot(&newfd)) &&
1456 !(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1457 __current->fdtable->fds[newfd] = newfd_s;
1462 return DO_STATUS(errno);
1465 __DEFINE_LXSYSCALL2(
1466 int, symlink, const char*, pathname, const char*, link_target)
1469 struct file_locator floc;
1470 struct v_dnode *file;
1471 struct v_inode *f_ino;
1473 errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
1479 errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
1485 f_ino = file->inode;
1489 errno = vfs_check_writable(file);
1494 if (!f_ino->ops->set_symlink) {
1501 errno = f_ino->ops->set_symlink(f_ino, link_target);
1503 unlock_inode(f_ino);
1506 __floc_try_unlock(&floc);
1507 return DO_STATUS(errno);
1511 vfs_do_chdir_nolock(struct proc_info* proc, struct v_dnode* dnode)
1513 if (!check_directory_node(dnode->inode)) {
1518 vfs_unref_dnode(proc->cwd);
1521 vfs_ref_dnode(dnode);
1528 vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode)
1534 errno = vfs_do_chdir_nolock(proc, dnode);
1536 unlock_dnode(dnode);
1541 __DEFINE_LXSYSCALL1(int, chdir, const char*, path)
1543 struct v_dnode* dnode;
1546 if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1550 errno = vfs_do_chdir((struct proc_info*)__current, dnode);
1553 return DO_STATUS(errno);
1556 __DEFINE_LXSYSCALL1(int, fchdir, int, fd)
1561 if ((errno = vfs_getfd(fd, &fd_s))) {
1565 errno = vfs_do_chdir((struct proc_info*)__current, fd_s->file->dnode);
1568 return DO_STATUS(errno);
1572 __DEFINE_LXSYSCALL1(int, chroot, const char*, path)
1575 struct v_dnode* dnode;
1576 if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1582 errno = vfs_do_chdir_nolock(__current, dnode);
1584 unlock_dnode(dnode);
1588 __current->root = dnode;
1590 unlock_dnode(dnode);
1593 return DO_STATUS(errno);
1596 __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size)
1607 if (!__current->cwd) {
1608 *buf = VFS_PATH_DELIM;
1611 len = vfs_get_path(__current->cwd, buf, size, 0);
1623 syscall_result(errno);
1628 vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
1631 if (current->inode->id == target->inode->id) {
1636 if ((errno = vfs_check_writable(current))) {
1640 if (current->ref_count > 1 || target->ref_count > 1) {
1644 if (current->super_block != target->super_block) {
1648 struct v_dnode* oldparent = current->parent;
1649 struct v_dnode* newparent = target->parent;
1651 lock_dnode(current);
1654 lock_dnode(oldparent);
1656 lock_dnode(newparent);
1658 if (!llist_empty(&target->children)) {
1660 unlock_dnode(target);
1665 current->inode->ops->rename(current->inode, current, target))) {
1666 unlock_dnode(target);
1670 // re-position current
1671 hstrcpy(¤t->name, &target->name);
1672 vfs_dcache_rehash(newparent, current);
1677 unlock_dnode(target);
1680 unlock_dnode(current);
1682 unlock_dnode(oldparent);
1684 unlock_dnode(newparent);
1689 __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
1691 struct v_dnode *cur, *target_parent, *target;
1692 struct hstr name = HSTR(valloc(VFS_NAME_MAXLEN), 0);
1695 if ((errno = vfs_walk_proc(oldpath, &cur, NULL, 0))) {
1699 if ((errno = vfs_walk(
1700 __current->cwd, newpath, &target_parent, &name, VFS_WALK_PARENT))) {
1704 errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
1705 if (errno == ENOENT) {
1706 target = vfs_d_alloc(target_parent, &name);
1707 vfs_dcache_add(target_parent, target);
1717 errno = vfs_do_rename(cur, target);
1720 vfree((void*)name.value);
1721 return DO_STATUS(errno);
1724 __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat)
1729 if ((errno = vfs_getfd(fd, &fds))) {
1733 struct v_inode* vino = fds->file->inode;
1734 struct device* fdev = vino->sb->dev;
1736 stat->st_ino = vino->id;
1737 stat->st_blocks = vino->lb_usage;
1738 stat->st_size = vino->fsize;
1739 stat->st_blksize = vino->sb->blksize;
1740 stat->st_nlink = vino->link_count;
1741 stat->st_uid = vino->uid;
1742 stat->st_gid = vino->gid;
1744 stat->st_ctim = vino->ctime;
1745 stat->st_atim = vino->atime;
1746 stat->st_mtim = vino->mtime;
1748 stat->st_mode = (vino->itype << 16) | vino->acl;
1750 stat->st_ioblksize = PAGE_SIZE;
1752 if (check_device_node(vino)) {
1753 struct device* rdev = resolve_device(vino->data);
1759 stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp,
1760 .unique = rdev->ident.unique,
1761 .index = dev_uid(rdev) };
1765 stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp,
1766 .unique = fdev->ident.unique,
1767 .index = dev_uid(fdev) };
1771 return DO_STATUS(errno);
1774 __DEFINE_LXSYSCALL4(int, fchmodat, int, fd,
1775 const char*, path, int, mode, int, flags)
1778 struct v_dnode *dnode;
1779 struct v_inode* inode;
1781 errno = vfs_walkat(fd, path, flags, &dnode);
1786 errno = vfs_check_writable(dnode);
1791 inode = dnode->inode;
1794 if (!current_is_root()) {
1795 mode = mode & FSACL_RWXMASK;
1799 __vfs_touch_inode(inode, INODE_MODIFY);
1801 unlock_inode(inode);
1804 return DO_STATUS(errno);
1807 __DEFINE_LXSYSCALL5(int, fchownat, int, fd,
1808 const char*, path, uid_t, uid, gid_t, gid, int, flags)
1811 struct v_dnode *dnode;
1812 struct v_inode *inode;
1814 errno = vfs_walkat(fd, path, flags, &dnode);
1819 errno = vfs_check_writable(dnode);
1824 inode = dnode->inode;
1829 __vfs_touch_inode(inode, INODE_MODIFY);
1831 unlock_inode(inode);
1834 return DO_STATUS(errno);
1837 __DEFINE_LXSYSCALL4(int, faccessat, int, fd,
1838 const char*, path, int, amode, int, flags)
1841 struct v_dnode *dnode;
1842 struct v_inode *inode;
1843 struct user_scope* uscope;
1848 errno = vfs_walkat(fd, path, flags, &dnode);
1853 if ((flags & AT_EACCESS)) {
1854 tuid = current_euid();
1855 tgid = current_egid();
1858 uscope = current_user_scope();
1859 tuid = uscope->ruid;
1860 tgid = uscope->rgid;
1863 inode = dnode->inode;
1867 acl &= check_acl_between(inode->uid, inode->gid, tuid, tgid);
1873 return DO_STATUS(errno);