X-Git-Url: https://scm.lunaixsky.com/lunaix-os.git/blobdiff_plain/270869139db617e29a35bb9ded41087bb702f9ac..0fd474df7001837bde53da0e42e83081827c9641:/lunaix-os/kernel/fs/vfs.c diff --git a/lunaix-os/kernel/fs/vfs.c b/lunaix-os/kernel/fs/vfs.c index 415e36b..32014d3 100644 --- a/lunaix-os/kernel/fs/vfs.c +++ b/lunaix-os/kernel/fs/vfs.c @@ -57,14 +57,16 @@ #include +#define INODE_ACCESSED 0 +#define INODE_MODIFY 1 + static struct cake_pile* dnode_pile; static struct cake_pile* inode_pile; static struct cake_pile* file_pile; static struct cake_pile* superblock_pile; static struct cake_pile* fd_pile; -struct v_dnode* vfs_sysroot; -static struct hbucket* dnode_cache; +struct v_dnode* vfs_sysroot = NULL; struct lru_zone *dnode_lru, *inode_lru; @@ -89,8 +91,6 @@ vfs_init() superblock_pile = cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0); - dnode_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket)); - dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode); inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode); @@ -100,19 +100,23 @@ vfs_init() // 创建一个根dnode。 vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty); vfs_sysroot->parent = vfs_sysroot; - atomic_fetch_add(&vfs_sysroot->ref_count, 1); + + vfs_ref_dnode(vfs_sysroot); } static inline struct hbucket* __dcache_hash(struct v_dnode* parent, u32_t* hash) { - u32_t _hash = *hash; - // 确保低位更加随机 + struct hbucket* d_cache; + u32_t _hash; + + d_cache = parent->super_block->d_cache; + _hash = *hash; _hash = _hash ^ (_hash >> VFS_HASHBITS); - // 与parent的指针值做加法,来减小碰撞的可能性。 _hash += (u32_t)__ptr(parent); + *hash = _hash; - return &dnode_cache[_hash & VFS_HASH_MASK]; + return &d_cache[_hash & VFS_HASH_MASK]; } static inline int @@ -151,12 +155,26 @@ vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str) return NULL; } +static void +__vfs_touch_inode(struct v_inode* inode, const int type) +{ + if (type == INODE_MODIFY) { + inode->mtime = clock_unixtime(); + } + + else if (type == INODE_ACCESSED) { + inode->atime = clock_unixtime(); + } + + lru_use_one(inode_lru, &inode->lru); +} + void vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode) { assert(parent); - atomic_fetch_add(&dnode->ref_count, 1); + dnode->ref_count = 1; dnode->parent = parent; llist_append(&parent->children, &dnode->siblings); @@ -175,7 +193,7 @@ vfs_dcache_remove(struct v_dnode* dnode) hlist_delete(&dnode->hash_list); dnode->parent = NULL; - atomic_fetch_sub(&dnode->ref_count, 1); + dnode->ref_count = 0; } void @@ -191,12 +209,12 @@ vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode) int vfs_open(struct v_dnode* dnode, struct v_file** file) { - if (!dnode->inode || !dnode->inode->ops->open) { + struct v_inode* inode = dnode->inode; + + if (!inode || !inode->ops->open) { return ENOTSUP; } - struct v_inode* inode = dnode->inode; - lock_inode(inode); struct v_file* vfile = cake_grab(file_pile); @@ -204,10 +222,10 @@ vfs_open(struct v_dnode* dnode, struct v_file** file) vfile->dnode = dnode; vfile->inode = inode; - vfile->ref_count = ATOMIC_VAR_INIT(1); + vfile->ref_count = 1; vfile->ops = inode->default_fops; - if (check_file_node(inode) && !inode->pg_cache) { + if (check_regfile_node(inode) && !inode->pg_cache) { struct pcache* pcache = vzalloc(sizeof(struct pcache)); pcache_init(pcache); pcache->master = inode; @@ -218,9 +236,8 @@ vfs_open(struct v_dnode* dnode, struct v_file** file) if (errno) { cake_release(file_pile, vfile); } else { - atomic_fetch_add(&dnode->ref_count, 1); + vfs_ref_dnode(dnode); inode->open_count++; - mnt_mkbusy(dnode->mnt); *file = vfile; } @@ -270,12 +287,7 @@ vfs_pclose(struct v_file* file, pid_t pid) { struct v_inode* inode; int errno = 0; - - if (file->ref_count > 1) { - atomic_fetch_sub(&file->ref_count, 1); - return 0; - } - + inode = file->inode; /* @@ -296,29 +308,41 @@ vfs_pclose(struct v_file* file, pid_t pid) * process is writing to this file later after B exit. */ - if (mutex_on_hold(&inode->lock)) { - mutex_unlock_for(&inode->lock, pid); + mutex_unlock_for(&inode->lock, pid); + + if (vfs_check_duped_file(file)) { + vfs_unref_file(file); + return 0; } - lock_inode(inode); - - pcache_commit_all(inode); if ((errno = file->ops->close(file))) { - goto unlock; + goto done; } - atomic_fetch_sub(&file->dnode->ref_count, 1); + vfs_unref_dnode(file->dnode); + cake_release(file_pile, file); + + /* + if the current inode is not being locked by other + threads that does not share same open context, + then we can try to do sync opportunistically + */ + if (mutex_on_hold(&inode->lock)) { + goto done; + } + + lock_inode(inode); + + pcache_commit_all(inode); inode->open_count--; if (!inode->open_count) { __sync_inode_nolock(inode); } - mnt_chillax(file->dnode->mnt); - cake_release(file_pile, file); - -unlock: unlock_inode(inode); + +done: return errno; } @@ -375,7 +399,10 @@ vfs_sb_alloc() struct v_superblock* sb = cake_grab(superblock_pile); memset(sb, 0, sizeof(*sb)); llist_init_head(&sb->sb_list); + sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket)); + sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket)); + sb->ref_count = 1; return sb; } @@ -387,12 +414,12 @@ vfs_sb_ref(struct v_superblock* sb) } void -vfs_sb_free(struct v_superblock* sb) +vfs_sb_unref(struct v_superblock* sb) { assert(sb->ref_count); sb->ref_count--; - if (sb->ref_count) { + if (likely(sb->ref_count)) { return; } @@ -401,6 +428,8 @@ vfs_sb_free(struct v_superblock* sb) } vfree(sb->i_cache); + vfree(sb->d_cache); + cake_release(superblock_pile, sb); } @@ -446,7 +475,6 @@ vfs_d_alloc(struct v_dnode* parent, struct hstr* name) llist_init_head(&dnode->aka_list); mutex_init(&dnode->lock); - dnode->ref_count = ATOMIC_VAR_INIT(0); dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0); hstrcpy(&dnode->name, name); @@ -485,7 +513,7 @@ vfs_d_free(struct v_dnode* dnode) dnode->destruct(dnode); } - vfs_sb_free(dnode->super_block); + vfs_sb_unref(dnode->super_block); vfree((void*)dnode->name.value); cake_release(dnode_pile, dnode); } @@ -558,7 +586,7 @@ vfs_i_free(struct v_inode* inode) inode->destruct(inode); } - vfs_sb_free(inode->sb); + vfs_sb_unref(inode->sb); hlist_delete(&inode->hash_list); cake_release(inode_pile, inode); } @@ -685,6 +713,31 @@ done: return errno; } + +static bool +__check_unlinkable(struct v_dnode* dnode) +{ + int acl; + bool wr_self, wr_parent; + struct v_dnode* parent; + + parent = dnode->parent; + acl = dnode->inode->acl; + + wr_self = check_allow_write(dnode->inode); + wr_parent = check_allow_write(parent->inode); + + if (!fsacl_test(acl, svtx)) { + return wr_self; + } + + if (current_euid() == dnode->inode->uid) { + return true; + } + + return wr_self && wr_parent; +} + int vfs_do_open(const char* path, int options) { @@ -803,6 +856,11 @@ __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent) goto unlock; } + if (!check_allow_read(inode)) { + errno = EPERM; + goto unlock; + } + struct dir_context dctx = (struct dir_context) { .cb_data = dent, .read_complete_callback = __vfs_readdir_callback @@ -825,6 +883,8 @@ __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count) { int errno = 0; struct v_fd* fd_s; + struct v_inode* inode; + if ((errno = vfs_getfd(fd, &fd_s))) { goto done; } @@ -835,23 +895,29 @@ __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count) goto done; } - lock_inode(file->inode); + if (!check_allow_read(file->inode)) { + errno = EPERM; + goto done; + } + + inode = file->inode; + lock_inode(inode); - file->inode->atime = clock_unixtime(); + __vfs_touch_inode(inode, INODE_ACCESSED); - if (check_seqdev_node(file->inode) || (fd_s->flags & FO_DIRECT)) { - errno = file->ops->read(file->inode, buf, count, file->f_pos); + if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) { + errno = file->ops->read(inode, buf, count, file->f_pos); } else { - errno = pcache_read(file->inode, buf, count, file->f_pos); + errno = pcache_read(inode, buf, count, file->f_pos); } if (errno > 0) { file->f_pos += errno; - unlock_inode(file->inode); + unlock_inode(inode); return errno; } - unlock_inode(file->inode); + unlock_inode(inode); done: return DO_STATUS(errno); @@ -880,7 +946,7 @@ __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count) inode = file->inode; lock_inode(inode); - inode->mtime = clock_unixtime(); + __vfs_touch_inode(inode, INODE_MODIFY); if ((fd_s->flags & O_APPEND)) { file->f_pos = inode->fsize; } @@ -921,6 +987,11 @@ __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options) goto done; } + if (!check_allow_read(inode)) { + errno = EPERM; + goto done; + } + lock_inode(inode); int overflow = 0; @@ -1001,6 +1072,10 @@ vfs_readlink(struct v_dnode* dnode, char* buf, size_t size) return ENOTSUP; } + if (!check_allow_read(inode)) { + return EPERM; + } + lock_inode(inode); int errno = inode->ops->read_symlink(inode, &link); @@ -1107,6 +1182,11 @@ __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname) lock_dnode(dnode); + if (!__check_unlinkable(dnode)) { + errno = EPERM; + goto done; + } + if ((errno = vfs_check_writable(dnode))) { goto done; } @@ -1203,7 +1283,7 @@ done: return DO_STATUS(errno); } -int +static int __vfs_do_unlink(struct v_dnode* dnode) { int errno; @@ -1213,6 +1293,10 @@ __vfs_do_unlink(struct v_dnode* dnode) return EBUSY; } + if (!__check_unlinkable(dnode)) { + return EPERM; + } + if ((errno = vfs_check_writable(dnode))) { return errno; } @@ -1316,7 +1400,7 @@ vfs_dup_fd(struct v_fd* old, struct v_fd** new) memcpy(copied, old, sizeof(struct v_fd)); - atomic_fetch_add(&old->file->ref_count, 1); + vfs_ref_file(old->file); *new = copied; @@ -1423,53 +1507,34 @@ done: return DO_STATUS(errno); } -void -vfs_ref_file(struct v_file* file) -{ - atomic_fetch_add(&file->ref_count, 1); -} - -void -vfs_ref_dnode(struct v_dnode* dnode) +static int +vfs_do_chdir_nolock(struct proc_info* proc, struct v_dnode* dnode) { - atomic_fetch_add(&dnode->ref_count, 1); - - if (dnode->mnt) { - mnt_mkbusy(dnode->mnt); + if (!check_directory_node(dnode->inode)) { + return ENOTDIR; } -} -void -vfs_unref_dnode(struct v_dnode* dnode) -{ - atomic_fetch_sub(&dnode->ref_count, 1); - if (dnode->mnt) { - mnt_chillax(dnode->mnt); + if (proc->cwd) { + vfs_unref_dnode(proc->cwd); } + + vfs_ref_dnode(dnode); + proc->cwd = dnode; + + return 0; } -int +static int vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode) { int errno = 0; lock_dnode(dnode); - if (!check_directory_node(dnode->inode)) { - errno = ENOTDIR; - goto done; - } - - if (proc->cwd) { - vfs_unref_dnode(proc->cwd); - } - - vfs_ref_dnode(dnode); - proc->cwd = dnode; + errno = vfs_do_chdir_nolock(proc, dnode); unlock_dnode(dnode); -done: return errno; } @@ -1503,6 +1568,31 @@ done: return DO_STATUS(errno); } + +__DEFINE_LXSYSCALL1(int, chroot, const char*, path) +{ + int errno; + struct v_dnode* dnode; + if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) { + return errno; + } + + lock_dnode(dnode); + + errno = vfs_do_chdir_nolock(__current, dnode); + if (errno) { + unlock_dnode(dnode); + goto done; + } + + __current->root = dnode; + + unlock_dnode(dnode); + +done: + return DO_STATUS(errno); +} + __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size) { int errno = 0; @@ -1643,29 +1733,140 @@ __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat) struct v_inode* vino = fds->file->inode; struct device* fdev = vino->sb->dev; - *stat = (struct file_stat){.st_ino = vino->id, - .st_blocks = vino->lb_usage, - .st_size = vino->fsize, - .mode = vino->itype, - .st_ioblksize = PAGE_SIZE, - .st_blksize = vino->sb->blksize}; + stat->st_ino = vino->id; + stat->st_blocks = vino->lb_usage; + stat->st_size = vino->fsize; + stat->st_blksize = vino->sb->blksize; + stat->st_nlink = vino->link_count; + stat->st_uid = vino->uid; + stat->st_gid = vino->gid; + + stat->st_ctim = vino->ctime; + stat->st_atim = vino->atime; + stat->st_mtim = vino->mtime; + + stat->st_mode = (vino->itype << 16) | vino->acl; + + stat->st_ioblksize = PAGE_SIZE; if (check_device_node(vino)) { struct device* rdev = resolve_device(vino->data); - if (!rdev || rdev->magic != DEV_STRUCT_MAGIC) { + if (!rdev) { errno = EINVAL; goto done; } stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp, .unique = rdev->ident.unique, - .index = rdev->dev_uid}; + .index = dev_uid(rdev) }; } if (fdev) { stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp, .unique = fdev->ident.unique, - .index = fdev->dev_uid}; + .index = dev_uid(fdev) }; + } + +done: + return DO_STATUS(errno); +} + +__DEFINE_LXSYSCALL4(int, fchmodat, int, fd, + const char*, path, int, mode, int, flags) +{ + int errno; + struct v_dnode *dnode; + struct v_inode* inode; + + errno = vfs_walkat(fd, path, flags, &dnode); + if (errno) { + goto done; + } + + errno = vfs_check_writable(dnode); + if (errno) { + return errno; + } + + inode = dnode->inode; + lock_inode(inode); + + if (!current_is_root()) { + mode = mode & FSACL_RWXMASK; + } + + inode->acl = mode; + __vfs_touch_inode(inode, INODE_MODIFY); + + unlock_inode(inode); + +done: + return DO_STATUS(errno); +} + +__DEFINE_LXSYSCALL5(int, fchownat, int, fd, + const char*, path, uid_t, uid, gid_t, gid, int, flags) +{ + int errno; + struct v_dnode *dnode; + struct v_inode *inode; + + errno = vfs_walkat(fd, path, flags, &dnode); + if (errno) { + goto done; + } + + errno = vfs_check_writable(dnode); + if (errno) { + return errno; + } + + inode = dnode->inode; + lock_inode(inode); + + inode->uid = uid; + inode->gid = gid; + __vfs_touch_inode(inode, INODE_MODIFY); + + unlock_inode(inode); + +done: + return DO_STATUS(errno); +} + +__DEFINE_LXSYSCALL4(int, faccessat, int, fd, + const char*, path, int, amode, int, flags) +{ + int errno, acl; + struct v_dnode *dnode; + struct v_inode *inode; + struct user_scope* uscope; + + uid_t tuid; + gid_t tgid; + + errno = vfs_walkat(fd, path, flags, &dnode); + if (errno) { + goto done; + } + + if ((flags & AT_EACCESS)) { + tuid = current_euid(); + tgid = current_egid(); + } + else { + uscope = current_user_scope(); + tuid = uscope->ruid; + tgid = uscope->rgid; + } + + inode = dnode->inode; + + acl = inode->acl; + acl &= amode; + acl &= check_acl_between(inode->uid, inode->gid, tuid, tgid); + if (!acl) { + errno = EACCESS; } done: