From 871af48a7d8d1a8cca7b27e0e15d1dfa030bd172 Mon Sep 17 00:00:00 2001 From: Lunaixsky Date: Mon, 27 Jan 2025 12:15:27 +0000 Subject: [PATCH 1/1] Fix: stale dnode caching instance cause locked-up upon accessing (#52) * demote dnode cache to superblock level * refactor the vmnt and mnt_point semantics * change hashing function for lowering standard deviation of key distribution amongst small amount of buckets (<32) * vfs: ref counter fix up --- lunaix-os/includes/lunaix/fs.h | 86 +++++++++++++++++++++++------- lunaix-os/kernel/fs/mount.c | 97 ++++++++++++++++++++-------------- lunaix-os/kernel/fs/vfs.c | 76 ++++++++++---------------- lunaix-os/libs/hash.c | 11 ++-- 4 files changed, 159 insertions(+), 111 deletions(-) diff --git a/lunaix-os/includes/lunaix/fs.h b/lunaix-os/includes/lunaix/fs.h index a33b1d0..6feb2aa 100644 --- a/lunaix-os/includes/lunaix/fs.h +++ b/lunaix-os/includes/lunaix/fs.h @@ -14,8 +14,6 @@ #include #include -#include - #include #define VFS_NAME_MAXLEN 128 @@ -126,6 +124,8 @@ struct v_superblock struct filesystem* fs; struct blkbuf_cache* blks; struct hbucket* i_cache; + struct hbucket* d_cache; + void* data; unsigned int ref_count; size_t blksize; @@ -211,7 +211,7 @@ struct v_file struct v_dnode* dnode; struct llist_header* f_list; u32_t f_pos; - atomic_ulong ref_count; + unsigned long ref_count; void* data; struct v_file_ops* ops; // for caching }; @@ -285,9 +285,11 @@ struct v_dnode struct llist_header aka_list; struct llist_header children; struct llist_header siblings; + struct v_superblock* super_block; struct v_mount* mnt; - atomic_ulong ref_count; + + unsigned long ref_count; void* data; @@ -430,15 +432,15 @@ struct v_superblock* vfs_sb_alloc(); void -vfs_sb_free(struct v_superblock* sb); +vfs_sb_unref(struct v_superblock* sb); void vfs_sb_ref(struct v_superblock* sb); #define vfs_assign_sb(sb_accessor, sb) \ ({ \ - if (sb_accessor) { \ - vfs_sb_free(sb_accessor); \ + if (likely(sb_accessor)) { \ + vfs_sb_unref(sb_accessor); \ } \ vfs_sb_ref(((sb_accessor) = (sb))); \ }) @@ -455,6 +457,20 @@ vfs_d_assign_sb(struct v_dnode* dnode, struct v_superblock* sb) vfs_assign_sb(dnode->super_block, sb); } +static inline void +vfs_d_assign_vmnt(struct v_dnode* dnode, struct v_mount* vmnt) +{ + if (dnode->mnt) { + assert_msg(dnode->mnt->mnt_point != dnode, + "vmnt must be detached first"); + } + + dnode->mnt = vmnt; + + if (likely(vmnt)) + vfs_d_assign_sb(dnode, vmnt->super_block); +} + static inline void vfs_vmnt_assign_sb(struct v_mount* vmnt, struct v_superblock* sb) { @@ -462,7 +478,7 @@ vfs_vmnt_assign_sb(struct v_mount* vmnt, struct v_superblock* sb) } struct v_dnode* -vfs_d_alloc(); +vfs_d_alloc(struct v_dnode* parent, struct hstr* name); void vfs_d_free(struct v_dnode* dnode); @@ -488,15 +504,6 @@ vfs_getfd(int fd, struct v_fd** fd_s); int vfs_get_dtype(int itype); -void -vfs_ref_dnode(struct v_dnode* dnode); - -void -vfs_ref_file(struct v_file* file); - -void -vfs_unref_dnode(struct v_dnode* dnode); - int vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth); @@ -540,8 +547,49 @@ mnt_chillax(struct v_mount* mnt); int vfs_mount_root(const char* fs_name, struct device* device); -struct v_mount* -vfs_create_mount(struct v_mount* parent, struct v_dnode* mnt_point); +static inline bool +mnt_check_busy(struct v_mount* mnt) +{ + return mnt->busy_counter > 1; +} + +static inline void +vfs_ref_dnode(struct v_dnode* dnode) +{ + dnode->ref_count++; + + if (likely(dnode->mnt)) { + mnt_mkbusy(dnode->mnt); + } +} + +static inline void +vfs_unref_dnode(struct v_dnode* dnode) +{ + dnode->ref_count--; + + if (likely(dnode->mnt)) { + mnt_chillax(dnode->mnt); + } +} + +static inline void +vfs_ref_file(struct v_file* file) +{ + file->ref_count++; +} + +static inline void +vfs_unref_file(struct v_file* file) +{ + file->ref_count--; +} + +static inline bool +vfs_check_duped_file(struct v_file* file) +{ + return file->ref_count > 1; +} int vfs_check_writable(struct v_dnode* dnode); diff --git a/lunaix-os/kernel/fs/mount.c b/lunaix-os/kernel/fs/mount.c index c673e84..282ae78 100644 --- a/lunaix-os/kernel/fs/mount.c +++ b/lunaix-os/kernel/fs/mount.c @@ -11,8 +11,16 @@ LOG_MODULE("fs") struct llist_header all_mnts = { .next = &all_mnts, .prev = &all_mnts }; -struct v_mount* -vfs_create_mount(struct v_mount* parent, struct v_dnode* mnt_point) +static void +__vfs_attach_vmnt(struct v_dnode* mnt_point, struct v_mount* vmnt) +{ + vmnt->mnt_point = mnt_point; + vfs_d_assign_vmnt(mnt_point, vmnt); + vfs_ref_dnode(mnt_point); +} + +static struct v_mount* +__vfs_create_mount(struct v_mount* parent, struct v_superblock* mnt_sb) { struct v_mount* mnt = vzalloc(sizeof(struct v_mount)); if (!mnt) { @@ -25,8 +33,7 @@ vfs_create_mount(struct v_mount* parent, struct v_dnode* mnt_point) mutex_init(&mnt->lock); mnt->parent = parent; - mnt->mnt_point = mnt_point; - vfs_vmnt_assign_sb(mnt, mnt_point->super_block); + vfs_vmnt_assign_sb(mnt, mnt_sb); if (parent) { mnt_mkbusy(parent); @@ -34,28 +41,40 @@ vfs_create_mount(struct v_mount* parent, struct v_dnode* mnt_point) llist_append(&parent->submnts, &mnt->sibmnts); mutex_unlock(&mnt->parent->lock); } - - atomic_fetch_add(&mnt_point->ref_count, 1); return mnt; } -void -__vfs_release_vmnt(struct v_mount* mnt) +static void +__vfs_detach_vmnt(struct v_mount* mnt) { assert(llist_empty(&mnt->submnts)); + vfs_unref_dnode(mnt->mnt_point); + assert(!mnt->busy_counter); + if (mnt->parent) { mnt_chillax(mnt->parent); } + mnt->mnt_point->mnt = NULL; + llist_delete(&mnt->sibmnts); llist_delete(&mnt->list); - atomic_fetch_sub(&mnt->mnt_point->ref_count, 1); vfree(mnt); } -int +static inline void +__detach_node_cache_ref(struct hbucket* bucket) +{ + if (!bucket->head) { + return; + } + + bucket->head->pprev = 0; +} + +static int __vfs_do_unmount(struct v_mount* mnt) { int errno = 0; @@ -67,17 +86,18 @@ __vfs_do_unmount(struct v_mount* mnt) // detached the inodes from cache, and let lru policy to recycle them for (size_t i = 0; i < VFS_HASHTABLE_SIZE; i++) { - struct hbucket* bucket = &sb->i_cache[i]; - if (!bucket->head) { - continue; - } - bucket->head->pprev = 0; + __detach_node_cache_ref(&sb->i_cache[i]); + __detach_node_cache_ref(&sb->d_cache[i]); } - mnt->mnt_point->mnt = mnt->parent; + struct v_dnode *pos, *next; + llist_for_each(pos, next, &mnt->mnt_point->children, siblings) + { + vfs_d_free(pos); + } - vfs_sb_free(sb); - __vfs_release_vmnt(mnt); + __vfs_detach_vmnt(mnt); + vfs_d_assign_vmnt(mnt->mnt_point, mnt->parent); return errno; } @@ -163,28 +183,31 @@ vfs_mount_fsat(struct filesystem* fs, } int errno = 0; - char* dev_name = "sys"; - char* fsname = HSTR_VAL(fs->fs_name); + char* dev_name; + char* fsname; + struct v_mount *parent_mnt, *vmnt; + struct v_superblock *sb; - struct v_mount* parent_mnt = mnt_point->mnt; - struct v_superblock *sb = vfs_sb_alloc(), - *old_sb = mnt_point->super_block; + fsname = HSTR_VAL(fs->fs_name); + parent_mnt = mnt_point->mnt; + sb = vfs_sb_alloc(); - if (device) { - dev_name = device->name_val; - } + dev_name = device ? device->name_val : "sys"; // prepare v_superblock for fs::mount invoke sb->dev = device; sb->fs = fs; sb->root = mnt_point; - vfs_d_assign_sb(mnt_point, sb); + + vmnt = __vfs_create_mount(parent_mnt, sb); - if (!(mnt_point->mnt = vfs_create_mount(parent_mnt, mnt_point))) { + if (!vmnt) { errno = ENOMEM; goto cleanup; } + __vfs_attach_vmnt(mnt_point, vmnt); + mnt_point->mnt->flags = options; if (!(errno = fs->mount(sb, mnt_point))) { kprintf("mount: dev=%s, fs=%s, mode=%d", @@ -193,16 +216,14 @@ vfs_mount_fsat(struct filesystem* fs, goto cleanup; } - vfs_sb_free(old_sb); return errno; cleanup: ERROR("failed mount: dev=%s, fs=%s, mode=%d, err=%d", dev_name, fsname, options, errno); - vfs_d_assign_sb(mnt_point, old_sb); - vfs_sb_free(sb); - __vfs_release_vmnt(mnt_point->mnt); + __vfs_detach_vmnt(mnt_point->mnt); + vfs_d_assign_vmnt(mnt_point, parent_mnt); mnt_point->mnt = parent_mnt; @@ -248,7 +269,9 @@ int vfs_unmount_at(struct v_dnode* mnt_point) { int errno = 0; - struct v_superblock* sb = mnt_point->super_block; + struct v_superblock* sb; + + sb = mnt_point->super_block; if (!sb) { return EINVAL; } @@ -257,15 +280,11 @@ vfs_unmount_at(struct v_dnode* mnt_point) return EINVAL; } - if (mnt_point->mnt->busy_counter) { + if (mnt_check_busy(mnt_point->mnt)) { return EBUSY; } - if (!(errno = __vfs_do_unmount(mnt_point->mnt))) { - atomic_fetch_sub(&mnt_point->ref_count, 1); - } - - return errno; + return __vfs_do_unmount(mnt_point->mnt); } int diff --git a/lunaix-os/kernel/fs/vfs.c b/lunaix-os/kernel/fs/vfs.c index 0eec6bf..1696d6e 100644 --- a/lunaix-os/kernel/fs/vfs.c +++ b/lunaix-os/kernel/fs/vfs.c @@ -64,7 +64,6 @@ static struct cake_pile* superblock_pile; static struct cake_pile* fd_pile; struct v_dnode* vfs_sysroot; -static struct hbucket* dnode_cache; struct lru_zone *dnode_lru, *inode_lru; @@ -89,8 +88,6 @@ vfs_init() superblock_pile = cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0); - dnode_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket)); - dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode); inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode); @@ -100,19 +97,23 @@ vfs_init() // 创建一个根dnode。 vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty); vfs_sysroot->parent = vfs_sysroot; - atomic_fetch_add(&vfs_sysroot->ref_count, 1); + + vfs_ref_dnode(vfs_sysroot); } static inline struct hbucket* __dcache_hash(struct v_dnode* parent, u32_t* hash) { - u32_t _hash = *hash; - // 确保低位更加随机 + struct hbucket* d_cache; + u32_t _hash; + + d_cache = parent->super_block->d_cache; + _hash = *hash; _hash = _hash ^ (_hash >> VFS_HASHBITS); - // 与parent的指针值做加法,来减小碰撞的可能性。 _hash += (u32_t)__ptr(parent); + *hash = _hash; - return &dnode_cache[_hash & VFS_HASH_MASK]; + return &d_cache[_hash & VFS_HASH_MASK]; } static inline int @@ -156,7 +157,7 @@ vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode) { assert(parent); - atomic_fetch_add(&dnode->ref_count, 1); + dnode->ref_count = 1; dnode->parent = parent; llist_append(&parent->children, &dnode->siblings); @@ -175,7 +176,7 @@ vfs_dcache_remove(struct v_dnode* dnode) hlist_delete(&dnode->hash_list); dnode->parent = NULL; - atomic_fetch_sub(&dnode->ref_count, 1); + dnode->ref_count = 0; } void @@ -204,10 +205,10 @@ vfs_open(struct v_dnode* dnode, struct v_file** file) vfile->dnode = dnode; vfile->inode = inode; - vfile->ref_count = ATOMIC_VAR_INIT(1); + vfile->ref_count = 1; vfile->ops = inode->default_fops; - if (check_file_node(inode) && !inode->pg_cache) { + if (check_regfile_node(inode) && !inode->pg_cache) { struct pcache* pcache = vzalloc(sizeof(struct pcache)); pcache_init(pcache); pcache->master = inode; @@ -218,9 +219,8 @@ vfs_open(struct v_dnode* dnode, struct v_file** file) if (errno) { cake_release(file_pile, vfile); } else { - atomic_fetch_add(&dnode->ref_count, 1); + vfs_ref_dnode(dnode); inode->open_count++; - mnt_mkbusy(dnode->mnt); *file = vfile; } @@ -293,8 +293,8 @@ vfs_pclose(struct v_file* file, pid_t pid) mutex_unlock_for(&inode->lock, pid); - if (file->ref_count > 1) { - atomic_fetch_sub(&file->ref_count, 1); + if (vfs_check_duped_file(file)) { + vfs_unref_file(file); return 0; } @@ -302,8 +302,7 @@ vfs_pclose(struct v_file* file, pid_t pid) goto done; } - atomic_fetch_sub(&file->dnode->ref_count, 1); - mnt_chillax(file->dnode->mnt); + vfs_unref_dnode(file->dnode); cake_release(file_pile, file); /* @@ -383,7 +382,10 @@ vfs_sb_alloc() struct v_superblock* sb = cake_grab(superblock_pile); memset(sb, 0, sizeof(*sb)); llist_init_head(&sb->sb_list); + sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket)); + sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket)); + sb->ref_count = 1; return sb; } @@ -395,12 +397,12 @@ vfs_sb_ref(struct v_superblock* sb) } void -vfs_sb_free(struct v_superblock* sb) +vfs_sb_unref(struct v_superblock* sb) { assert(sb->ref_count); sb->ref_count--; - if (sb->ref_count) { + if (likely(sb->ref_count)) { return; } @@ -409,6 +411,8 @@ vfs_sb_free(struct v_superblock* sb) } vfree(sb->i_cache); + vfree(sb->d_cache); + cake_release(superblock_pile, sb); } @@ -454,7 +458,6 @@ vfs_d_alloc(struct v_dnode* parent, struct hstr* name) llist_init_head(&dnode->aka_list); mutex_init(&dnode->lock); - dnode->ref_count = ATOMIC_VAR_INIT(0); dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0); hstrcpy(&dnode->name, name); @@ -493,7 +496,7 @@ vfs_d_free(struct v_dnode* dnode) dnode->destruct(dnode); } - vfs_sb_free(dnode->super_block); + vfs_sb_unref(dnode->super_block); vfree((void*)dnode->name.value); cake_release(dnode_pile, dnode); } @@ -566,7 +569,7 @@ vfs_i_free(struct v_inode* inode) inode->destruct(inode); } - vfs_sb_free(inode->sb); + vfs_sb_unref(inode->sb); hlist_delete(&inode->hash_list); cake_release(inode_pile, inode); } @@ -1324,7 +1327,7 @@ vfs_dup_fd(struct v_fd* old, struct v_fd** new) memcpy(copied, old, sizeof(struct v_fd)); - atomic_fetch_add(&old->file->ref_count, 1); + vfs_ref_file(old->file); *new = copied; @@ -1431,31 +1434,6 @@ done: return DO_STATUS(errno); } -void -vfs_ref_file(struct v_file* file) -{ - atomic_fetch_add(&file->ref_count, 1); -} - -void -vfs_ref_dnode(struct v_dnode* dnode) -{ - atomic_fetch_add(&dnode->ref_count, 1); - - if (dnode->mnt) { - mnt_mkbusy(dnode->mnt); - } -} - -void -vfs_unref_dnode(struct v_dnode* dnode) -{ - atomic_fetch_sub(&dnode->ref_count, 1); - if (dnode->mnt) { - mnt_chillax(dnode->mnt); - } -} - int vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode) { diff --git a/lunaix-os/libs/hash.c b/lunaix-os/libs/hash.c index 3a0555f..7182ab6 100644 --- a/lunaix-os/libs/hash.c +++ b/lunaix-os/libs/hash.c @@ -2,9 +2,12 @@ #include /** - * @brief Simple string hash function + * @brief Simple string hash function (sdbm) * - * ref: https://stackoverflow.com/a/7666577 + * ref: http://www.cse.yorku.ca/~oz/hash.html + * + * sdbm has lower standard deviation in bucket distribution + * than djb2 (previously used) for low bucket size (16, 32). * * @param str * @return unsigned int @@ -15,11 +18,11 @@ strhash_32(const char* str, u32_t truncate_to) if (!str) return 0; - u32_t hash = 5381; + u32_t hash = 0; int c; while ((c = *str++)) - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + hash = (hash << 6) + (hash << 16) + c - hash; return hash >> (HASH_SIZE_BITS - truncate_to); } \ No newline at end of file -- 2.27.0