3 * @author Lunaixsky (zelong56@gmail.com)
4 * @brief Lunaix virtual file system - an abstraction layer for all file system.
8 * @copyright Copyright (c) 2022
12 // Welcome to The Mountain O'Shit! :)
15 TODO vfs & device todos checklist
17 It is overseen by Twilight Sparkle ;)
19 1. Get inodes hooked into lru (CHECKED)
20 2. Get dnodes hooked into lru (CHECKED)
21 3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
22 4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
23 [good idea] or a constructor/destructor pattern in cake allocator ?
24 5. (mount) Figure out a way to identify a busy mount point before unmount
25 maybe a unified mount_point structure that maintain a referencing
26 counter on any dnodes within the subtree? Such a counter will only
27 increament if a file is opened or a dnode is being used as working
28 directory and decreamenting conversely. (CHECKED)
29 6. (mount) Ability to track all mount points (including sub-mounts)
30 so we can be confident to clean up everything when we
32 7. (mount) Figure out a way to acquire the device represented by a dnode.
33 so it can be used to mount. (e.g. we wish to get `struct device*`
34 out of the dnode at /dev/sda)
35 [tip] we should pay attention at twifs and add a private_data field
36 under struct v_dnode? (CHECKED)
37 8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
38 9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
39 image file using a so called "loopback" pseudo device. Maybe
40 we can do similar thing in Lunaix? A block device emulation
41 above the regular file when we mount it on.
42 10. (device) device number (dev_t) allocation
43 [good idea] <class>:<subclass>:<uniq_id> composition (CHECKED)
46 #include <klibc/string.h>
47 #include <lunaix/foptions.h>
48 #include <lunaix/fs.h>
49 #include <lunaix/mm/cake.h>
50 #include <lunaix/mm/valloc.h>
51 #include <lunaix/process.h>
52 #include <lunaix/spike.h>
53 #include <lunaix/syscall.h>
54 #include <lunaix/syscall_utils.h>
56 #include <lunaix/fs/twifs.h>
58 #include <usr/lunaix/dirent_defs.h>
60 #define INODE_ACCESSED 0
61 #define INODE_MODIFY 1
63 static struct cake_pile* dnode_pile;
64 static struct cake_pile* inode_pile;
65 static struct cake_pile* file_pile;
66 static struct cake_pile* superblock_pile;
67 static struct cake_pile* fd_pile;
69 struct v_dnode* vfs_sysroot = NULL;
71 struct lru_zone *dnode_lru, *inode_lru;
73 struct hstr vfs_ddot = HSTR("..", 2);
74 struct hstr vfs_dot = HSTR(".", 1);
75 struct hstr vfs_empty = HSTR("", 0);
78 __vfs_try_evict_dnode(struct lru_node* obj);
81 __vfs_try_evict_inode(struct lru_node* obj);
86 // 为他们专门创建一个蛋糕堆,而不使用valloc,这样我们可以最小化内碎片的产生
87 dnode_pile = cake_new_pile("dnode_cache", sizeof(struct v_dnode), 1, 0);
88 inode_pile = cake_new_pile("inode_cache", sizeof(struct v_inode), 1, 0);
89 file_pile = cake_new_pile("file_cache", sizeof(struct v_file), 1, 0);
90 fd_pile = cake_new_pile("fd_cache", sizeof(struct v_fd), 1, 0);
92 cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
94 dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
95 inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
97 hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
98 hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
101 vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty);
102 vfs_sysroot->parent = vfs_sysroot;
104 vfs_ref_dnode(vfs_sysroot);
107 static inline struct hbucket*
108 __dcache_hash(struct v_dnode* parent, u32_t* hash)
110 struct hbucket* d_cache;
113 d_cache = parent->super_block->d_cache;
115 _hash = _hash ^ (_hash >> VFS_HASHBITS);
116 _hash += (u32_t)__ptr(parent);
119 return &d_cache[_hash & VFS_HASH_MASK];
123 __sync_inode_nolock(struct v_inode* inode)
125 pcache_commit_all(inode);
128 if (inode->ops->sync) {
129 errno = inode->ops->sync(inode);
136 vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
138 if (!str->len || HSTR_EQ(str, &vfs_dot))
141 if (HSTR_EQ(str, &vfs_ddot)) {
142 return parent->parent;
145 u32_t hash = str->hash;
146 struct hbucket* slot = __dcache_hash(parent, &hash);
148 struct v_dnode *pos, *n;
149 hashtable_bucket_foreach(slot, pos, n, hash_list)
151 if (pos->name.hash == hash && pos->parent == parent) {
159 __vfs_touch_inode(struct v_inode* inode, const int type)
161 if (type == INODE_MODIFY) {
162 inode->mtime = clock_unixtime();
165 else if (type == INODE_ACCESSED) {
166 inode->atime = clock_unixtime();
169 lru_use_one(inode_lru, &inode->lru);
173 vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
177 dnode->ref_count = 1;
178 dnode->parent = parent;
179 llist_append(&parent->children, &dnode->siblings);
181 struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
182 hlist_add(&bucket->head, &dnode->hash_list);
186 vfs_dcache_remove(struct v_dnode* dnode)
189 assert(dnode->ref_count == 1);
191 llist_delete(&dnode->siblings);
192 llist_delete(&dnode->aka_list);
193 hlist_delete(&dnode->hash_list);
195 dnode->parent = NULL;
196 dnode->ref_count = 0;
200 vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
204 hstr_rehash(&dnode->name, HSTR_FULL_HASH);
205 vfs_dcache_remove(dnode);
206 vfs_dcache_add(new_parent, dnode);
210 vfs_open(struct v_dnode* dnode, struct v_file** file)
212 struct v_inode* inode = dnode->inode;
214 if (!inode || !inode->ops->open) {
220 struct v_file* vfile = cake_grab(file_pile);
221 memset(vfile, 0, sizeof(*vfile));
223 vfile->dnode = dnode;
224 vfile->inode = inode;
225 vfile->ref_count = 1;
226 vfile->ops = inode->default_fops;
228 if (check_regfile_node(inode) && !inode->pg_cache) {
229 struct pcache* pcache = vzalloc(sizeof(struct pcache));
231 pcache->master = inode;
232 inode->pg_cache = pcache;
235 int errno = inode->ops->open(inode, vfile);
237 cake_release(file_pile, vfile);
239 vfs_ref_dnode(dnode);
251 vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
253 if (assign_to->inode) {
254 llist_delete(&assign_to->aka_list);
255 assign_to->inode->link_count--;
258 llist_append(&inode->aka_dnodes, &assign_to->aka_list);
259 assign_to->inode = inode;
264 vfs_link(struct v_dnode* to_link, struct v_dnode* name)
268 if ((errno = vfs_check_writable(to_link))) {
272 lock_inode(to_link->inode);
273 if (to_link->super_block->root != name->super_block->root) {
275 } else if (!to_link->inode->ops->link) {
277 } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
278 vfs_assign_inode(name, to_link->inode);
280 unlock_inode(to_link->inode);
286 vfs_pclose(struct v_file* file, pid_t pid)
288 struct v_inode* inode;
295 * This happened when process is terminated while blocking on read.
296 * In that case, the process is still holding the inode lock and it
297 will never get released.
298 * The unlocking should also include ownership check.
300 * To see why, consider two process both open the same file both with
302 * Process A: busy on reading x
303 * Process B: do nothing with x
304 * Assuming that, after a very short time, process B get terminated
305 * while process A is still busy in it's reading business. By this
306 * design, the inode lock of this file x is get released by B rather
307 * than A. And this will cause a probable race condition on A if other
308 * process is writing to this file later after B exit.
311 mutex_unlock_for(&inode->lock, pid);
313 if (vfs_check_duped_file(file)) {
314 vfs_unref_file(file);
318 if ((errno = file->ops->close(file))) {
322 vfs_unref_dnode(file->dnode);
323 cake_release(file_pile, file);
326 if the current inode is not being locked by other
327 threads that does not share same open context,
328 then we can try to do sync opportunistically
330 if (mutex_on_hold(&inode->lock)) {
336 pcache_commit_all(inode);
339 if (!inode->open_count) {
340 __sync_inode_nolock(inode);
350 vfs_close(struct v_file* file)
352 return vfs_pclose(file, __current->pid);
356 vfs_free_fd(struct v_fd* fd)
358 cake_release(fd_pile, fd);
362 vfs_isync(struct v_inode* inode)
366 int errno = __sync_inode_nolock(inode);
374 vfs_fsync(struct v_file* file)
377 if ((errno = vfs_check_writable(file->dnode))) {
381 return vfs_isync(file->inode);
385 vfs_alloc_fdslot(int* fd)
387 for (size_t i = 0; i < VFS_MAX_FD; i++) {
388 if (!__current->fdtable->fds[i]) {
399 struct v_superblock* sb = cake_grab(superblock_pile);
400 memset(sb, 0, sizeof(*sb));
401 llist_init_head(&sb->sb_list);
403 sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
404 sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
411 vfs_sb_ref(struct v_superblock* sb)
417 vfs_sb_unref(struct v_superblock* sb)
419 assert(sb->ref_count);
422 if (likely(sb->ref_count)) {
426 if (sb->ops.release) {
433 cake_release(superblock_pile, sb);
437 __vfs_try_evict_dnode(struct lru_node* obj)
439 struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
441 if (!dnode->ref_count) {
449 __vfs_try_evict_inode(struct lru_node* obj)
451 struct v_inode* inode = container_of(obj, struct v_inode, lru);
453 if (!inode->link_count && !inode->open_count) {
461 vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
463 struct v_dnode* dnode = cake_grab(dnode_pile);
465 lru_evict_half(dnode_lru);
467 if (!(dnode = cake_grab(dnode_pile))) {
472 memset(dnode, 0, sizeof(*dnode));
473 llist_init_head(&dnode->children);
474 llist_init_head(&dnode->siblings);
475 llist_init_head(&dnode->aka_list);
476 mutex_init(&dnode->lock);
478 dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
480 hstrcpy(&dnode->name, name);
483 vfs_d_assign_sb(dnode, parent->super_block);
484 dnode->mnt = parent->mnt;
487 lru_use_one(dnode_lru, &dnode->lru);
493 vfs_d_free(struct v_dnode* dnode)
495 assert(dnode->ref_count == 1);
498 assert(dnode->inode->link_count > 0);
499 dnode->inode->link_count--;
502 vfs_dcache_remove(dnode);
503 // Make sure the children de-referencing their parent.
504 // With lru presented, the eviction will be propagated over the entire
505 // detached subtree eventually
506 struct v_dnode *pos, *n;
507 llist_for_each(pos, n, &dnode->children, siblings)
509 vfs_dcache_remove(pos);
512 if (dnode->destruct) {
513 dnode->destruct(dnode);
516 vfs_sb_unref(dnode->super_block);
517 vfree((void*)dnode->name.value);
518 cake_release(dnode_pile, dnode);
522 vfs_i_find(struct v_superblock* sb, u32_t i_id)
524 struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
525 struct v_inode *pos, *n;
526 hashtable_bucket_foreach(slot, pos, n, hash_list)
528 if (pos->id == i_id) {
529 lru_use_one(inode_lru, &pos->lru);
538 vfs_i_addhash(struct v_inode* inode)
540 struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
542 hlist_delete(&inode->hash_list);
543 hlist_add(&slot->head, &inode->hash_list);
547 vfs_i_alloc(struct v_superblock* sb)
549 assert(sb->ops.init_inode);
551 struct v_inode* inode;
552 if (!(inode = cake_grab(inode_pile))) {
553 lru_evict_half(inode_lru);
554 if (!(inode = cake_grab(inode_pile))) {
559 memset(inode, 0, sizeof(*inode));
560 mutex_init(&inode->lock);
561 llist_init_head(&inode->xattrs);
562 llist_init_head(&inode->aka_dnodes);
564 sb->ops.init_inode(sb, inode);
566 inode->ctime = clock_unixtime();
567 inode->atime = inode->ctime;
568 inode->mtime = inode->ctime;
570 vfs_i_assign_sb(inode, sb);
571 lru_use_one(inode_lru, &inode->lru);
576 vfs_i_free(struct v_inode* inode)
578 if (inode->pg_cache) {
579 pcache_release(inode->pg_cache);
580 vfree(inode->pg_cache);
582 // we don't need to sync inode.
583 // If an inode can be free, then it must be properly closed.
584 // Hence it must be synced already!
585 if (inode->destruct) {
586 inode->destruct(inode);
589 vfs_sb_unref(inode->sb);
590 hlist_delete(&inode->hash_list);
591 cake_release(inode_pile, inode);
594 /* ---- System call definition and support ---- */
596 // make a new name when not exists
597 #define FLOC_MAYBE_MKNAME 1
599 // name must be non-exist and made.
600 #define FLOC_MKNAME 2
603 #define FLOC_NOFOLLOW 4
606 vfs_getfd(int fd, struct v_fd** fd_s)
608 if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
615 __vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
616 unsigned int itype, dev_t* dev)
620 errno = parent->ops->create(parent, dnode, itype);
628 struct file_locator {
630 struct v_dnode* file;
635 * @brief unlock the file locator (floc) if possible.
636 * If the file to be located if not exists, and
637 * any FLOC_*MKNAME flag is set, then the parent
638 * dnode will be locked until the file has been properly
639 * finalised by subsequent logic.
644 __floc_try_unlock(struct file_locator* floc)
648 unlock_dnode(floc->dir);
653 __vfs_try_locate_file(const char* path,
654 struct file_locator* floc,
657 char name_str[VFS_NAME_MAXLEN];
658 struct v_dnode *fdir, *file;
659 struct hstr name = HSTR(name_str, 0);
660 int errno, woption = 0;
662 if ((options & FLOC_NOFOLLOW)) {
663 woption |= VFS_WALK_NOFOLLOW;
664 options &= ~FLOC_NOFOLLOW;
669 errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
674 errno = vfs_walk(fdir, name.value, &file, NULL, woption);
676 if (errno && errno != ENOENT) {
681 if ((options & FLOC_MKNAME)) {
692 errno = vfs_check_writable(fdir);
699 file = vfs_d_alloc(fdir, &name);
707 vfs_dcache_add(fdir, file);
718 __check_unlinkable(struct v_dnode* dnode)
721 bool wr_self, wr_parent;
722 struct v_dnode* parent;
724 parent = dnode->parent;
725 acl = dnode->inode->acl;
727 wr_self = check_allow_write(dnode->inode);
728 wr_parent = check_allow_write(parent->inode);
730 if (!fsacl_test(acl, svtx)) {
734 if (current_euid() == dnode->inode->uid) {
738 return wr_self && wr_parent;
742 vfs_do_open(const char* path, int options)
744 int errno, fd, loptions = 0;
745 struct v_dnode *dentry, *file;
746 struct v_file* ofile = NULL;
747 struct file_locator floc;
748 struct v_inode* inode;
750 if ((options & FO_CREATE)) {
751 loptions |= FLOC_MAYBE_MKNAME;
752 } else if ((options & FO_NOFOLLOW)) {
753 loptions |= FLOC_NOFOLLOW;
756 errno = __vfs_try_locate_file(path, &floc, loptions);
758 if (errno || (errno = vfs_alloc_fdslot(&fd))) {
766 errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
769 __floc_try_unlock(&floc);
773 __floc_try_unlock(&floc);
777 if ((errno = vfs_open(file, &ofile))) {
781 inode = ofile->inode;
784 struct v_fd* fd_s = cake_grab(fd_pile);
785 memset(fd_s, 0, sizeof(*fd_s));
787 if ((options & O_TRUNC)) {
788 file->inode->fsize = 0;
791 if (vfs_get_dtype(inode->itype) == DT_DIR) {
796 fd_s->flags = options;
797 __current->fdtable->fds[fd] = fd_s;
804 __DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
806 int errno = vfs_do_open(path, options);
807 return DO_STATUS_OR_RETURN(errno);
810 __DEFINE_LXSYSCALL1(int, close, int, fd)
814 if ((errno = vfs_getfd(fd, &fd_s))) {
818 if ((errno = vfs_close(fd_s->file))) {
822 cake_release(fd_pile, fd_s);
823 __current->fdtable->fds[fd] = 0;
826 return DO_STATUS(errno);
830 __vfs_readdir_callback(struct dir_context* dctx,
835 struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
836 strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
838 dent->d_type = dtype;
841 __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
846 if ((errno = vfs_getfd(fd, &fd_s))) {
850 struct v_inode* inode = fd_s->file->inode;
854 if (!check_directory_node(inode)) {
859 if (!check_allow_read(inode)) {
864 struct dir_context dctx = (struct dir_context) {
866 .read_complete_callback = __vfs_readdir_callback
869 if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
879 return DO_STATUS_OR_RETURN(errno);
882 __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
886 struct v_inode* inode;
888 if ((errno = vfs_getfd(fd, &fd_s))) {
892 struct v_file* file = fd_s->file;
893 if (check_directory_node(file->inode)) {
898 if (!check_allow_read(file->inode)) {
906 __vfs_touch_inode(inode, INODE_ACCESSED);
908 if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
909 errno = file->ops->read(inode, buf, count, file->f_pos);
911 errno = pcache_read(inode, buf, count, file->f_pos);
915 file->f_pos += errno;
923 return DO_STATUS(errno);
926 __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
930 if ((errno = vfs_getfd(fd, &fd_s))) {
934 struct v_inode* inode;
935 struct v_file* file = fd_s->file;
937 if ((errno = vfs_check_writable(file->dnode))) {
941 if (check_directory_node(file->inode)) {
949 __vfs_touch_inode(inode, INODE_MODIFY);
950 if ((fd_s->flags & O_APPEND)) {
951 file->f_pos = inode->fsize;
954 if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
955 errno = file->ops->write(inode, buf, count, file->f_pos);
957 errno = pcache_write(inode, buf, count, file->f_pos);
961 file->f_pos += errno;
962 inode->fsize = MAX(inode->fsize, file->f_pos);
971 return DO_STATUS(errno);
974 __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options)
978 if ((errno = vfs_getfd(fd, &fd_s))) {
982 struct v_file* file = fd_s->file;
983 struct v_inode* inode = file->inode;
985 if (!file->ops->seek) {
990 if (!check_allow_read(inode)) {
998 int fpos = file->f_pos;
1000 if (vfs_get_dtype(inode->itype) == DT_DIR) {
1001 options = (options != FSEEK_END) ? options : FSEEK_SET;
1006 overflow = sadd_of((int)file->f_pos, offset, &fpos);
1009 overflow = sadd_of((int)inode->fsize, offset, &fpos);
1020 errno = file->ops->seek(file, fpos);
1023 unlock_inode(inode);
1026 return DO_STATUS(errno);
1030 vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth)
1037 return ENAMETOOLONG;
1042 if (dnode->parent != dnode) {
1043 len = vfs_get_path(dnode->parent, buf, size, depth + 1);
1050 if (!len || buf[len - 1] != VFS_PATH_DELIM) {
1051 buf[len++] = VFS_PATH_DELIM;
1054 size_t cpy_size = MIN(dnode->name.len, size - len);
1055 strncpy(buf + len, dnode->name.value, cpy_size);
1062 vfs_readlink(struct v_dnode* dnode, char* buf, size_t size)
1065 struct v_inode* inode = dnode->inode;
1067 if (!check_symlink_node(inode)) {
1071 if (!inode->ops->read_symlink) {
1075 if (!check_allow_read(inode)) {
1081 int errno = inode->ops->read_symlink(inode, &link);
1083 strncpy(buf, link, MIN(size, (size_t)errno));
1086 unlock_inode(inode);
1091 vfs_get_dtype(int itype)
1093 int dtype = DT_FILE;
1094 if (check_itype(itype, VFS_IFSYMLINK)) {
1095 dtype |= DT_SYMLINK;
1098 if (check_itype(itype, VFS_IFDIR)) {
1108 __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
1112 if ((errno = vfs_getfd(fd, &fd_s))) {
1116 struct v_dnode* dnode;
1117 errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
1124 return DO_STATUS(errno);
1127 __DEFINE_LXSYSCALL3(int, readlink, const char*, path, char*, buf, size_t, size)
1130 struct v_dnode* dnode;
1131 if (!(errno = vfs_walk_proc(path, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1132 errno = vfs_readlink(dnode, buf, size);
1139 return DO_STATUS(errno);
1142 __DEFINE_LXSYSCALL4(
1143 int, readlinkat, int, dirfd, const char*, pathname, char*, buf, size_t, size)
1147 if ((errno = vfs_getfd(dirfd, &fd_s))) {
1151 pathname = pathname ? pathname : "";
1153 struct v_dnode* dnode;
1154 if (!(errno = vfs_walk(
1155 fd_s->file->dnode, pathname, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1156 errno = vfs_readlink(fd_s->file->dnode, buf, size);
1164 return DO_STATUS(errno);
1169 When we perform operation that could affect the layout of
1170 directory (i.e., rename, mkdir, rmdir). We must lock the parent dir
1171 whenever possible. This will blocking any ongoing path walking to reach
1172 it hence avoid any partial state.
1175 __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
1178 struct v_dnode* dnode;
1179 if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1180 return DO_STATUS(errno);
1185 if (!__check_unlinkable(dnode)) {
1190 if ((errno = vfs_check_writable(dnode))) {
1194 if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
1199 if (dnode->ref_count > 1 || dnode->inode->open_count) {
1204 if (!llist_empty(&dnode->children)) {
1209 struct v_dnode* parent = dnode->parent;
1217 lock_inode(parent->inode);
1219 if (check_directory_node(dnode->inode)) {
1220 errno = parent->inode->ops->rmdir(parent->inode, dnode);
1222 vfs_dcache_remove(dnode);
1228 unlock_inode(parent->inode);
1229 unlock_dnode(parent);
1232 unlock_dnode(dnode);
1233 return DO_STATUS(errno);
1236 __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
1239 struct v_dnode *parent, *dir;
1240 char name_value[VFS_NAME_MAXLEN];
1241 struct hstr name = HHSTR(name_value, 0, 0);
1243 if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
1247 if (!(errno = vfs_walk(parent, name_value, &dir, NULL, 0))) {
1252 if ((errno = vfs_check_writable(parent))) {
1256 if (!(dir = vfs_d_alloc(parent, &name))) {
1261 struct v_inode* inode = parent->inode;
1266 if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
1268 } else if (!inode->ops->mkdir) {
1270 } else if (!check_directory_node(inode)) {
1272 } else if (!(errno = inode->ops->mkdir(inode, dir))) {
1273 vfs_dcache_add(parent, dir);
1280 unlock_inode(inode);
1281 unlock_dnode(parent);
1283 return DO_STATUS(errno);
1287 __vfs_do_unlink(struct v_dnode* dnode)
1290 struct v_inode* inode = dnode->inode;
1292 if (dnode->ref_count > 1) {
1296 if (!__check_unlinkable(dnode)) {
1300 if ((errno = vfs_check_writable(dnode))) {
1306 if (inode->open_count) {
1308 } else if (!check_directory_node(inode)) {
1309 errno = inode->ops->unlink(inode, dnode);
1317 unlock_inode(inode);
1322 __DEFINE_LXSYSCALL1(int, unlink, const char*, pathname)
1325 struct v_dnode* dnode;
1326 if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1330 errno = __vfs_do_unlink(dnode);
1333 return DO_STATUS(errno);
1336 __DEFINE_LXSYSCALL2(int, unlinkat, int, fd, const char*, pathname)
1340 if ((errno = vfs_getfd(fd, &fd_s))) {
1344 struct v_dnode* dnode;
1345 if (!(errno = vfs_walk(fd_s->file->dnode, pathname, &dnode, NULL, 0))) {
1346 errno = __vfs_do_unlink(dnode);
1350 return DO_STATUS(errno);
1353 __DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
1356 struct file_locator floc;
1357 struct v_dnode *to_link, *name_file;
1359 errno = __vfs_try_locate_file(oldpath, &floc, 0);
1364 __floc_try_unlock(&floc);
1366 to_link = floc.file;
1367 errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
1372 name_file = floc.file;
1373 errno = vfs_link(to_link, name_file);
1375 vfs_d_free(name_file);
1379 __floc_try_unlock(&floc);
1380 return DO_STATUS(errno);
1383 __DEFINE_LXSYSCALL1(int, fsync, int, fildes)
1388 if (!(errno = vfs_getfd(fildes, &fd_s))) {
1389 errno = vfs_fsync(fd_s->file);
1392 return DO_STATUS(errno);
1396 vfs_dup_fd(struct v_fd* old, struct v_fd** new)
1399 struct v_fd* copied = cake_grab(fd_pile);
1401 memcpy(copied, old, sizeof(struct v_fd));
1403 vfs_ref_file(old->file);
1411 vfs_dup2(int oldfd, int newfd)
1413 if (newfd == oldfd) {
1418 struct v_fd *oldfd_s, *newfd_s;
1419 if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1423 if (!TEST_FD(newfd)) {
1428 newfd_s = __current->fdtable->fds[newfd];
1429 if (newfd_s && (errno = vfs_close(newfd_s->file))) {
1433 if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1434 __current->fdtable->fds[newfd] = newfd_s;
1439 return DO_STATUS(errno);
1442 __DEFINE_LXSYSCALL2(int, dup2, int, oldfd, int, newfd)
1444 return vfs_dup2(oldfd, newfd);
1447 __DEFINE_LXSYSCALL1(int, dup, int, oldfd)
1450 struct v_fd *oldfd_s, *newfd_s;
1451 if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1455 if (!(errno = vfs_alloc_fdslot(&newfd)) &&
1456 !(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1457 __current->fdtable->fds[newfd] = newfd_s;
1462 return DO_STATUS(errno);
1465 __DEFINE_LXSYSCALL2(
1466 int, symlink, const char*, pathname, const char*, link_target)
1469 struct file_locator floc;
1470 struct v_dnode *file;
1471 struct v_inode *f_ino;
1473 errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
1479 errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
1485 f_ino = file->inode;
1489 errno = vfs_check_writable(file);
1494 if (!f_ino->ops->set_symlink) {
1501 errno = f_ino->ops->set_symlink(f_ino, link_target);
1503 unlock_inode(f_ino);
1506 __floc_try_unlock(&floc);
1507 return DO_STATUS(errno);
1511 vfs_do_chdir_nolock(struct proc_info* proc, struct v_dnode* dnode)
1513 if (!check_directory_node(dnode->inode)) {
1518 vfs_unref_dnode(proc->cwd);
1521 vfs_ref_dnode(dnode);
1528 vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode)
1534 errno = vfs_do_chdir_nolock(proc, dnode);
1536 unlock_dnode(dnode);
1541 __DEFINE_LXSYSCALL1(int, chdir, const char*, path)
1543 struct v_dnode* dnode;
1546 if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1550 errno = vfs_do_chdir((struct proc_info*)__current, dnode);
1553 return DO_STATUS(errno);
1556 __DEFINE_LXSYSCALL1(int, fchdir, int, fd)
1561 if ((errno = vfs_getfd(fd, &fd_s))) {
1565 errno = vfs_do_chdir((struct proc_info*)__current, fd_s->file->dnode);
1568 return DO_STATUS(errno);
1572 __DEFINE_LXSYSCALL1(int, chroot, const char*, path)
1575 struct v_dnode* dnode;
1576 if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1582 errno = vfs_do_chdir_nolock(__current, dnode);
1584 unlock_dnode(dnode);
1588 __current->root = dnode;
1590 unlock_dnode(dnode);
1593 return DO_STATUS(errno);
1596 __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size)
1607 if (!__current->cwd) {
1608 *buf = VFS_PATH_DELIM;
1611 len = vfs_get_path(__current->cwd, buf, size, 0);
1623 syscall_result(errno);
1628 vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
1631 if (current->inode->id == target->inode->id) {
1636 if ((errno = vfs_check_writable(current))) {
1640 if (current->ref_count > 1 || target->ref_count > 1) {
1644 if (current->super_block != target->super_block) {
1648 struct v_dnode* oldparent = current->parent;
1649 struct v_dnode* newparent = target->parent;
1651 lock_dnode(current);
1654 lock_dnode(oldparent);
1656 lock_dnode(newparent);
1658 if (!llist_empty(&target->children)) {
1660 unlock_dnode(target);
1665 current->inode->ops->rename(current->inode, current, target))) {
1666 unlock_dnode(target);
1670 // re-position current
1671 hstrcpy(¤t->name, &target->name);
1672 vfs_dcache_rehash(newparent, current);
1677 unlock_dnode(target);
1680 unlock_dnode(current);
1682 unlock_dnode(oldparent);
1684 unlock_dnode(newparent);
1689 __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
1691 struct v_dnode *cur, *target_parent, *target;
1692 struct hstr name = HSTR(valloc(VFS_NAME_MAXLEN), 0);
1695 if ((errno = vfs_walk_proc(oldpath, &cur, NULL, 0))) {
1699 if ((errno = vfs_walk(
1700 __current->cwd, newpath, &target_parent, &name, VFS_WALK_PARENT))) {
1704 errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
1705 if (errno == ENOENT) {
1706 target = vfs_d_alloc(target_parent, &name);
1707 vfs_dcache_add(target_parent, target);
1717 errno = vfs_do_rename(cur, target);
1720 vfree((void*)name.value);
1721 return DO_STATUS(errno);
1724 __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat)
1729 if ((errno = vfs_getfd(fd, &fds))) {
1733 struct v_inode* vino = fds->file->inode;
1734 struct device* fdev = vino->sb->dev;
1736 *stat = (struct file_stat){.st_ino = vino->id,
1737 .st_blocks = vino->lb_usage,
1738 .st_size = vino->fsize,
1739 .mode = vino->itype,
1740 .st_ioblksize = PAGE_SIZE,
1741 .st_blksize = vino->sb->blksize};
1743 if (check_device_node(vino)) {
1744 struct device* rdev = resolve_device(vino->data);
1750 stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp,
1751 .unique = rdev->ident.unique,
1752 .index = dev_uid(rdev) };
1756 stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp,
1757 .unique = fdev->ident.unique,
1758 .index = dev_uid(fdev) };
1762 return DO_STATUS(errno);
1765 __DEFINE_LXSYSCALL4(int, fchmodat, int, fd,
1766 const char*, path, int, mode, int, flags)
1769 struct v_dnode *dnode;
1770 struct v_inode* inode;
1772 errno = vfs_walkat(fd, path, flags, &dnode);
1777 errno = vfs_check_writable(dnode);
1782 inode = dnode->inode;
1785 if (!current_is_root()) {
1786 mode = mode & FSACL_RWXMASK;
1790 __vfs_touch_inode(inode, INODE_MODIFY);
1792 unlock_inode(inode);
1795 return DO_STATUS(errno);
1798 __DEFINE_LXSYSCALL5(int, fchownat, int, fd,
1799 const char*, path, uid_t, uid, gid_t, gid, int, flags)
1802 struct v_dnode *dnode;
1803 struct v_inode *inode;
1805 errno = vfs_walkat(fd, path, flags, &dnode);
1810 errno = vfs_check_writable(dnode);
1815 inode = dnode->inode;
1820 __vfs_touch_inode(inode, INODE_MODIFY);
1822 unlock_inode(inode);
1825 return DO_STATUS(errno);
1828 __DEFINE_LXSYSCALL4(int, faccessat, int, fd,
1829 const char*, path, int, amode, int, flags)
1832 struct v_dnode *dnode;
1833 struct v_inode *inode;
1834 struct user_scope* uscope;
1839 errno = vfs_walkat(fd, path, flags, &dnode);
1844 if ((flags & AT_EACCESS)) {
1845 tuid = current_euid();
1846 tgid = current_egid();
1849 uscope = current_user_scope();
1850 tuid = uscope->ruid;
1851 tgid = uscope->rgid;
1854 inode = dnode->inode;
1858 acl &= check_acl_between(inode->uid, inode->gid, tuid, tgid);
1864 return DO_STATUS(errno);