From 4cf445cb569be5f1f078ea27fd5d1b172402f44f Mon Sep 17 00:00:00 2001 From: Lunaixsky Date: Sat, 3 May 2025 01:45:01 +0100 Subject: [PATCH 1/1] fix: ext2 directory insertion; racing on inode create use nesting lock for v_inode and v_dnode locking. hold the lock on the parent directory upon creating inode prevent contention across multiple process that partakes similar actions fix the edge case of dirent insertion with data block boundary crossing. fix the incorrect calculation between fsblock index and ext2 data block/inode index. new test case: fragfile - random file spammer --- lunaix-os/includes/lunaix/ds/mutex.h | 5 +- lunaix-os/includes/lunaix/fs.h | 8 +- lunaix-os/kernel/ds/mutex.c | 19 ++- lunaix-os/kernel/fs/LConfig | 2 + lunaix-os/kernel/fs/ext2/LConfig | 11 ++ lunaix-os/kernel/fs/ext2/dir.c | 174 ++++++++++++++++----------- lunaix-os/kernel/fs/ext2/ext2.h | 15 ++- lunaix-os/kernel/fs/ext2/file.c | 4 +- lunaix-os/kernel/fs/ext2/group.c | 5 +- lunaix-os/kernel/fs/ext2/inode.c | 148 +++++++++++++++++------ lunaix-os/kernel/fs/vfs.c | 56 ++++----- lunaix-os/usr/LBuild | 2 + lunaix-os/usr/fragfile.c | 57 +++++++++ 13 files changed, 357 insertions(+), 149 deletions(-) create mode 100644 lunaix-os/kernel/fs/ext2/LConfig create mode 100644 lunaix-os/usr/fragfile.c diff --git a/lunaix-os/includes/lunaix/ds/mutex.h b/lunaix-os/includes/lunaix/ds/mutex.h index 510f1fb..304cc6d 100644 --- a/lunaix-os/includes/lunaix/ds/mutex.h +++ b/lunaix-os/includes/lunaix/ds/mutex.h @@ -6,7 +6,7 @@ typedef struct mutex_s { - atomic_ulong lk; + atomic_uint lk; pid_t owner; } mutex_t; @@ -37,4 +37,7 @@ mutex_unlock_nested(mutex_t* mutex); void mutex_unlock_for(mutex_t* mutex, pid_t pid); +bool +mutex_trylock(mutex_t* mutex); + #endif /* __LUNAIX_MUTEX_H */ diff --git a/lunaix-os/includes/lunaix/fs.h b/lunaix-os/includes/lunaix/fs.h index bba939c..3369ae3 100644 --- a/lunaix-os/includes/lunaix/fs.h +++ b/lunaix-os/includes/lunaix/fs.h @@ -62,17 +62,17 @@ ('0' <= (chr) && (chr) <= '9') || (chr) == '.' || (chr) == '_' || \ (chr) == '-' || (chr) == ':') -#define unlock_inode(inode) mutex_unlock(&inode->lock) +#define unlock_inode(inode) mutex_unlock_nested(&inode->lock) #define lock_inode(inode) \ ({ \ - mutex_lock(&inode->lock); \ + mutex_lock_nested(&inode->lock); \ lru_use_one(inode_lru, &inode->lru); \ }) -#define unlock_dnode(dnode) mutex_unlock(&dnode->lock) +#define unlock_dnode(dnode) mutex_unlock_nested(&dnode->lock) #define lock_dnode(dnode) \ ({ \ - mutex_lock(&dnode->lock); \ + mutex_lock_nested(&dnode->lock); \ lru_use_one(dnode_lru, &dnode->lru); \ }) diff --git a/lunaix-os/kernel/ds/mutex.c b/lunaix-os/kernel/ds/mutex.c index 41e131d..4aac422 100644 --- a/lunaix-os/kernel/ds/mutex.c +++ b/lunaix-os/kernel/ds/mutex.c @@ -2,6 +2,12 @@ #include #include +#define __do_lock(mutext) \ + ({ \ + atomic_fetch_add(&mutex->lk, 1);\ + mutex->owner = __current->pid; \ + }) + static inline bool must_inline __mutex_check_owner(mutex_t* mutex) { @@ -15,8 +21,7 @@ __mutext_lock(mutex_t* mutex) preempt_current(); } - atomic_fetch_add(&mutex->lk, 1); - mutex->owner = __current->pid; + __do_lock(mutex); } static inline void must_inline @@ -32,6 +37,16 @@ mutex_lock(mutex_t* mutex) __mutext_lock(mutex); } +bool +mutex_trylock(mutex_t* mutex) +{ + if (atomic_load(&mutex->lk)) + return false; + + __do_lock(mutex); + return true; +} + void mutex_unlock(mutex_t* mutex) { diff --git a/lunaix-os/kernel/fs/LConfig b/lunaix-os/kernel/fs/LConfig index 194e186..880d6c6 100644 --- a/lunaix-os/kernel/fs/LConfig +++ b/lunaix-os/kernel/fs/LConfig @@ -19,3 +19,5 @@ def file_system(): type(bool) default(True) + +include("ext2") \ No newline at end of file diff --git a/lunaix-os/kernel/fs/ext2/LConfig b/lunaix-os/kernel/fs/ext2/LConfig new file mode 100644 index 0000000..f5cb0a6 --- /dev/null +++ b/lunaix-os/kernel/fs/ext2/LConfig @@ -0,0 +1,11 @@ + +@Collection("ext2") +def ext2_fs(): + add_to_collection(file_system) + + @Term("Debug Messages") + def ext2_debug_msg(): + type(bool) + default(False) + + return v(fs_ext2) \ No newline at end of file diff --git a/lunaix-os/kernel/fs/ext2/dir.c b/lunaix-os/kernel/fs/ext2/dir.c index a6af51e..e09a17b 100644 --- a/lunaix-os/kernel/fs/ext2/dir.c +++ b/lunaix-os/kernel/fs/ext2/dir.c @@ -66,89 +66,128 @@ __dirent_realsize(struct ext2b_dirent* dirent) return sizeof(*dirent) - sizeof(dirent->name) + dirent->name_len; } -#define DIRENT_SLOT_MID 0 -#define DIRENT_SLOT_LAST 1 -#define DIRENT_SLOT_EMPTY 2 +#define DIRENT_INSERT 0 +#define DIRENT_APPEND 1 + +#define DIRENT_ALIGNMENT sizeof(int) + +struct dirent_locator +{ + size_t search_size; + + int state; + struct ext2_dnode result; + size_t new_prev_reclen; + size_t db_pos; +}; + + +static inline void must_inline +__init_locator(struct dirent_locator* loc, size_t search_size) +{ + *loc = (struct dirent_locator) { .search_size = search_size }; +} static int -__find_free_dirent_slot(struct v_inode* inode, size_t size, - struct ext2_dnode* e_dnode_out, size_t *reclen) +__find_free_dirent_slot(struct v_inode* inode, struct dirent_locator* loc) { - struct ext2_iterator iter; + struct ext2_iterator dbit; struct ext2b_dirent *dir = NULL; + struct ext2_dnode* result; + bbuf_t prev_buf = bbuf_null; bool found = false; - ext2db_itbegin(&iter, inode); - - size_t sz = 0; + size_t sz = 0, aligned = 0; unsigned int rec = 0, total_rec = 0; + unsigned int dir_size; - while (!found && ext2db_itnext(&iter)) + aligned = ROUNDUP(loc->search_size, DIRENT_ALIGNMENT); + result = &loc->result; + + ext2db_itbegin(&dbit, inode, DBIT_MODE_BLOCK); + + while (!found && ext2db_itnext(&dbit)) { rec = 0; do { - dir = (struct ext2b_dirent*)offset(iter.data, rec); + dir = (struct ext2b_dirent*)offset(dbit.data, rec); sz = dir->rec_len - __dirent_realsize(dir); - sz = ROUNDDOWN(sz, 4); - if (sz >= size) { + sz = ROUNDDOWN(sz, DIRENT_ALIGNMENT); + if ((signed)sz >= (signed)aligned) { found = true; break; } rec += dir->rec_len; total_rec += dir->rec_len; - } while(rec < iter.blksz); + } while(rec < dbit.blksz); if (likely(prev_buf)) { fsblock_put(prev_buf); } - prev_buf = fsblock_take(iter.sel_buf); + prev_buf = fsblock_take(dbit.sel_buf); } + ext2_debug("dr_find_slot: found=%d, blk_off=%d, off=%d, gap=%d, blk=%d/%d", + found, rec, total_rec, sz, dbit.pos - 1, dbit.end_pos); + + loc->db_pos = dbit.pos - 1; + if (blkbuf_nullbuf(prev_buf)) { // this dir is brand new - return DIRENT_SLOT_EMPTY; + loc->state = DIRENT_APPEND; + goto done; } - e_dnode_out->prev = (struct ext2_dnode_sub) { + dir_size = ROUNDUP(__dirent_realsize(dir), 4); + loc->new_prev_reclen = dir_size; + + result->prev = (struct ext2_dnode_sub) { .buf = fsblock_take(prev_buf), .dirent = dir }; if (!found) { // if prev is the last, and no more space left behind. - assert_fs(rec == iter.blksz); + assert_fs(rec == dbit.blksz); - e_dnode_out->self.buf = bbuf_null; - ext2db_itend(&iter); - return itstate_sel(&iter, DIRENT_SLOT_LAST); - } - - unsigned int dir_size; + result->self.buf = bbuf_null; + ext2db_itend(&dbit); - dir_size = ROUNDUP(__dirent_realsize(dir), 4); - *reclen = dir_size; + loc->state = DIRENT_APPEND; + goto done; + } - rec = total_rec + dir_size; - dir = (struct ext2b_dirent*)offset(iter.data, rec); + rec += dir_size; + dir = (struct ext2b_dirent*)offset(dbit.data, rec); - e_dnode_out->self = (struct ext2_dnode_sub) { - .buf = fsblock_take(iter.sel_buf), + result->self = (struct ext2_dnode_sub) { + .buf = fsblock_take(dbit.sel_buf), .dirent = dir }; - ext2db_itend(&iter); - return DIRENT_SLOT_MID; + ext2db_itend(&dbit); + + loc->state = DIRENT_INSERT; + +done: + return itstate_sel(&dbit, 0); } static inline void -__destruct_ext2_dnode(struct ext2_dnode* e_dno) +__release_dnode_blocks(struct ext2_dnode* e_dno) { fsblock_put(e_dno->prev.buf); fsblock_put(e_dno->self.buf); +} + +static inline void +__destruct_ext2_dnode(struct ext2_dnode* e_dno) +{ + __release_dnode_blocks(e_dno); vfree(e_dno); } @@ -452,54 +491,35 @@ ext2dr_insert(struct v_inode* this, struct ext2b_dirent* dirent, { int errno; size_t size, new_reclen, old_reclen; - struct ext2_inode* e_self; struct ext2_dnode* e_dno; struct ext2b_dirent* prev_dirent; + struct dirent_locator locator; bbuf_t buf; - e_self = EXT2_INO(this); - e_dno = vzalloc(sizeof(*e_dno)); - size = __dirent_realsize(dirent); - errno = __find_free_dirent_slot(this, size, e_dno, &new_reclen); + __init_locator(&locator, size); + + errno = __find_free_dirent_slot(this, &locator); if (errno < 0) { goto failed; } - if (errno == DIRENT_SLOT_EMPTY) { - if ((errno = ext2db_acquire(this, 0, &buf))) { + e_dno = &locator.result; + new_reclen = locator.new_prev_reclen; + old_reclen = fsapi_block_size(this->sb); + + if (locator.state != DIRENT_INSERT) + { + if ((errno = ext2db_acquire(this, locator.db_pos, &buf))) goto failed; - } this->fsize += fsapi_block_size(this->sb); ext2ino_update(this); - old_reclen = fsapi_block_size(this->sb); e_dno->self.buf = buf; - e_dno->self.dirent = blkbuf_data(buf); - - goto place_dir; + e_dno->self.dirent = block_buffer(buf, struct ext2b_dirent); } - prev_dirent = e_dno->prev.dirent; - old_reclen = prev_dirent->rec_len; - - if (errno == DIRENT_SLOT_LAST) { - // prev is last record - if ((errno = ext2db_alloc(this, &buf))) { - goto failed; - } - - this->fsize += fsapi_block_size(this->sb); - ext2ino_update(this); - - new_reclen = __dirent_realsize(prev_dirent); - new_reclen = ROUNDUP(new_reclen, sizeof(int)); - e_dno->self = (struct ext2_dnode_sub) { - .buf = buf, - .dirent = block_buffer(buf, struct ext2b_dirent) - }; - } /* --- +--------+ --- @@ -519,17 +539,29 @@ ext2dr_insert(struct v_inode* this, struct ext2b_dirent* dirent, +--------+ */ - old_reclen -= new_reclen; - prev_dirent->rec_len = new_reclen; - fsblock_dirty(e_dno->prev.buf); + else + { + prev_dirent = e_dno->prev.dirent; + old_reclen = prev_dirent->rec_len; + old_reclen -= new_reclen; + + prev_dirent->rec_len = new_reclen; + fsblock_dirty(e_dno->prev.buf); + } + + ext2_debug("dr_insert: state=%d, blk=%d, prev_rlen=%d, new_rlen=%d", + locator.state, locator.db_pos, new_reclen, old_reclen); -place_dir: - dirent->rec_len = ROUNDUP(old_reclen, sizeof(int)); + assert_fs(new_reclen > 0); + assert_fs(old_reclen > 0); + + dirent->rec_len = old_reclen; + memcpy(e_dno->self.dirent, dirent, size); fsblock_dirty(e_dno->self.buf); if (!e_dno_out) { - __destruct_ext2_dnode(e_dno); + __release_dnode_blocks(e_dno); } else { *e_dno_out = e_dno; @@ -538,7 +570,7 @@ place_dir: return errno; failed: - __destruct_ext2_dnode(e_dno); + __release_dnode_blocks(e_dno); return errno; } diff --git a/lunaix-os/kernel/fs/ext2/ext2.h b/lunaix-os/kernel/fs/ext2/ext2.h index 1f12315..93996cd 100644 --- a/lunaix-os/kernel/fs/ext2/ext2.h +++ b/lunaix-os/kernel/fs/ext2/ext2.h @@ -7,6 +7,13 @@ #include #include +#ifdef CONFIG_EXT2_DEBUG_MSG +# include +# define ext2_debug(fmt, ...) kprintf_v("ext2", fmt, ##__VA_ARGS__) +#else +# define ext2_debug(fmt, ...) +#endif + #define FEAT_COMPRESSION 0b00000001 #define FEAT_RESIZE_INO 0b00000010 #define FEAT_FILETYPE 0b00000100 @@ -269,7 +276,8 @@ struct ext2_inode bbuf_t buf; // partial inotab that holds this inode unsigned int inds_lgents; // log2(# of block in an indirection level) unsigned int ino_id; - size_t indirect_blocks; + size_t nr_fsblks; + size_t nr_indblks; size_t isize; struct ext2b_inode* ino; // raw ext2 inode @@ -405,8 +413,11 @@ ext2ino_linkto(struct ext2_inode* e_ino, struct ext2b_dirent* dirent) fsblock_dirty(e_ino->buf); } +#define DBIT_MODE_ISIZE 0 +#define DBIT_MODE_BLOCK 1 + void -ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode); +ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode, int mode); void ext2db_itend(struct ext2_iterator* iter); diff --git a/lunaix-os/kernel/fs/ext2/file.c b/lunaix-os/kernel/fs/ext2/file.c index 0fe4d3d..d94371d 100644 --- a/lunaix-os/kernel/fs/ext2/file.c +++ b/lunaix-os/kernel/fs/ext2/file.c @@ -96,7 +96,7 @@ ext2_inode_read(struct v_inode *inode, blksz = e_sb->block_size; end = fpos + len; - ext2db_itbegin(&iter, inode); + ext2db_itbegin(&iter, inode, DBIT_MODE_ISIZE); ext2db_itffw(&iter, fpos / blksz); while (fpos < end && ext2db_itnext(&iter)) { @@ -134,7 +134,7 @@ ext2_inode_read_page(struct v_inode *inode, void *buffer, size_t fpos) n = PAGE_SIZE / e_sb->block_size; transfer_sz = MIN(PAGE_SIZE, e_sb->block_size); - ext2db_itbegin(&iter, inode); + ext2db_itbegin(&iter, inode, DBIT_MODE_ISIZE); ext2db_itffw(&iter, blk_start); while (n-- && ext2db_itnext(&iter)) diff --git a/lunaix-os/kernel/fs/ext2/group.c b/lunaix-os/kernel/fs/ext2/group.c index b77d6b7..277d690 100644 --- a/lunaix-os/kernel/fs/ext2/group.c +++ b/lunaix-os/kernel/fs/ext2/group.c @@ -89,8 +89,8 @@ __try_load_bitmap(struct v_superblock* vsb, struct ext2_sbinfo* ext2sb; struct ext2_bmp* bmp; struct llist_header* flist, *flist_entry; + unsigned int bmp_blk_id, bmp_size; bbuf_t buf; - unsigned int blk_id, bmp_blk_id, bmp_size; ext2sb = EXT2_SB(vsb); @@ -111,8 +111,7 @@ __try_load_bitmap(struct v_superblock* vsb, flist = &ext2sb->free_list_sel[type]; flist_entry = &gd->free_list_sel[type]; - blk_id = ext2_datablock(vsb, bmp_blk_id); - buf = fsblock_get(vsb, blk_id); + buf = fsblock_get(vsb, bmp_blk_id); if (blkbuf_errbuf(buf)) { return false; } diff --git a/lunaix-os/kernel/fs/ext2/inode.c b/lunaix-os/kernel/fs/ext2/inode.c index 5871d07..b784b38 100644 --- a/lunaix-os/kernel/fs/ext2/inode.c +++ b/lunaix-os/kernel/fs/ext2/inode.c @@ -128,8 +128,65 @@ __btlb_flushall(struct ext2_inode* e_inode) } } +/** + * Obtain the number of indirect blocks that contains + * pointers to next level blocks. + * + * Let N be the number of ids that a data block can hold, + * then the total number of data blocks assigned (reserved) + * to the inode: + * + * i_blocks = 12 + (N + 1) + (N^2 + N + 1) + (N^3 + N^2 + N + 1) + */ +static int +__get_nr_indblks(struct ext2_sbinfo* sb, size_t fsblks) +{ + ssize_t blks; + int nr_ents; + int nr_inds, n, acc_nr; + + blks = (ssize_t)fsblks; + nr_ents = sb->block_size / sizeof(int); + acc_nr = 1; + + if (blks <= 12) + return 0; + + blks -= 12; + + if (blks > 0) // order-1 indirection + { + n = MIN(ICEIL(blks, nr_ents), acc_nr); + blks -= n * nr_ents; + + nr_inds += 1; + acc_nr *= nr_ents; + } + + if (blks > 0) // order-2 indirection + { + n = MIN(ICEIL(blks, nr_ents), acc_nr); + blks -= n * nr_ents; + + nr_inds += n + 1; + acc_nr *= nr_ents; + } + + if (blks > 0) // order-3 indirection + { + n = MAX(ICEIL(blks, nr_ents), acc_nr); + blks -= n * nr_ents; + + nr_inds += n + ICEIL(n, nr_ents) + 1; + } + + assert_fs(blks <= 0); + + return nr_inds; +} + void -ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode) +ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode, int mode) { struct ext2_inode* e_ino; @@ -138,8 +195,12 @@ ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode) .pos = 0, .inode = inode, .blksz = inode->sb->blksize, - .end_pos = ICEIL(e_ino->isize, inode->sb->blksize) }; + + if (mode == DBIT_MODE_ISIZE) + iter->end_pos = ICEIL(e_ino->isize, inode->sb->blksize); + else + iter->end_pos = e_ino->nr_fsblks - e_ino->nr_indblks; } void @@ -186,14 +247,13 @@ ext2db_itnext(struct ext2_iterator* iter) fsblock_put(iter->sel_buf); } - buf = ext2db_get(iter->inode, iter->pos); + buf = ext2db_get(iter->inode, iter->pos++); iter->sel_buf = buf; if (!buf || !ext2_itcheckbuf(iter)) { return false; } - iter->pos++; iter->data = blkbuf_data(buf); return true; @@ -371,7 +431,7 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index) struct ext2b_inode* b_inode; struct ext2_inode* inode; unsigned int ind_ents; - size_t inds_blks; + size_t nr_linked; sb = gd->sb; b_inode = __get_raw_inode(vsb, gd, &ino_tab, ino_index); @@ -391,11 +451,11 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index) } if (b_inode->i_blocks) { - inds_blks = (size_t)b_inode->i_blocks; - inds_blks -= ICEIL(inode->isize, 512); - inds_blks /= (sb->block_size / 512); + nr_linked = (size_t)b_inode->i_blocks; + nr_linked /= (sb->block_size / 512); - inode->indirect_blocks = inds_blks; + inode->nr_fsblks = nr_linked; + inode->nr_indblks = __get_nr_indblks(sb, nr_linked); } ind_ents = sb->block_size / sizeof(int); @@ -404,6 +464,8 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index) inode->inds_lgents = ilog2(ind_ents); inode->ino_id = gd->ino_base + to_ext2ino_id(ino_index); + ext2_debug("ino(%d): isize=%lu, nr_blk=%lu, nr_inds=%lu", + inode->ino_id, inode->isize, inode->nr_fsblks, inode->nr_indblks); return inode; } @@ -654,7 +716,9 @@ __update_inode_size(struct v_inode* inode, size_t size) { struct ext2b_inode* b_ino; struct ext2_inode* e_ino; + struct ext2_sbinfo* sb; + sb = EXT2_SB(inode->sb); e_ino = EXT2_INO(inode); b_ino = e_ino->ino; @@ -668,8 +732,8 @@ __update_inode_size(struct v_inode* inode, size_t size) b_ino->i_size = size; } - b_ino->i_blocks = ICEIL(size, 512); - b_ino->i_blocks += e_ino->indirect_blocks; + b_ino->i_blocks = e_ino->nr_fsblks * (sb->block_size / 512); + fsblock_dirty(e_ino->buf); } int @@ -738,6 +802,9 @@ ext2_link(struct v_inode* this, struct v_dnode* new_name) new_name->data = e_dno; vfs_assign_inode(new_name, this); + // linking a dnode to parent could result new data block allocated + ext2_sync_inode(parent); + done: return errno; } @@ -760,6 +827,8 @@ ext2_unlink(struct v_inode* this, struct v_dnode* name) return errno; } + // unlink a dnode from parent will not free the allocated data blocks + // rather, it leads to fragmentation return ext2ino_free(this); } @@ -784,6 +853,9 @@ __walkstate_set_stack(struct walk_state* state, int depth, state->stack.indices[depth] = index; } +#define WALKMODE_ALLOC 0b01 +#define WALKMODE_NOBTLB 0b10 + /** * @brief Walk the indrection chain given the position of data block * relative to the inode. Upon completed, walk_state will be @@ -794,20 +866,26 @@ __walkstate_set_stack(struct walk_state* state, int depth, * (i.e., a leaf block), then the state is the indirect block that * containing the ID of that leaf block. * - * If `resolve` is set, it will resolve any absence encountered - * during the walk by allocating and chaining indirect block. - * It require the file system is mounted writable. + * Two modes can be specified to alter the walk process: + * + * WALKMODE_ALLOC + * resolve any absence encountered + * during the walk by allocating and chaining indirect block + * + * WALKMODE_NOBTLB + * Ignore the cached result, always perform a complete walk. + * This does not by pass the cache entirely, lower level caches + * like block buffer (blkio request cache) will be used transparently * * @param inode inode to walk * @param pos flattened data block position to be located * @param state contain the walk result - * @param resolve whether to auto allocate the indirection structure during - * walk if `pos` is not exist. + * @param mode walk mode * @return int */ static int __walk_indirects(struct v_inode* inode, unsigned int pos, - struct walk_state* state, bool resolve, bool full_walk) + struct walk_state* state, int mode) { int errno; int inds, stride, shifts, level; @@ -816,12 +894,13 @@ __walk_indirects(struct v_inode* inode, unsigned int pos, struct ext2b_inode* b_inode; struct v_superblock* vsb; bbuf_t table, next_table; + bool alloc; e_inode = EXT2_INO(inode); b_inode = e_inode->ino; vsb = inode->sb; level = 0; - resolve = resolve && !EXT2_SB(vsb)->read_only; + alloc = (mode & WALKMODE_ALLOC) && !EXT2_SB(vsb)->read_only; if (pos < 12) { index = pos; @@ -847,7 +926,7 @@ __walk_indirects(struct v_inode* inode, unsigned int pos, } // bTLB cache the last level indirect block - if (!full_walk && (table = __btlb_hit(e_inode, pos))) { + if (!(mode & WALKMODE_NOBTLB) && (table = __btlb_hit(e_inode, pos))) { level = inds; index = pos & ((1 << stride) - 1); slotref = &block_buffer(table, u32_t)[index]; @@ -867,7 +946,7 @@ __walk_indirects(struct v_inode* inode, unsigned int pos, next = *slotref; if (!next) { - if (!resolve) { + if (!alloc) { goto _return; } @@ -876,7 +955,6 @@ __walk_indirects(struct v_inode* inode, unsigned int pos, return errno; } - e_inode->indirect_blocks++; *slotref = fsblock_id(next_table); fsblock_dirty(table); } @@ -894,7 +972,6 @@ __walk_indirects(struct v_inode* inode, unsigned int pos, assert(shifts >= 0); index = (pos & mask) >> shifts; - slotref = &block_buffer(table, u32_t)[index]; shifts -= stride; @@ -927,7 +1004,7 @@ ext2db_get(struct v_inode* inode, unsigned int data_pos) ext2walk_init_state(&state); - errno = __walk_indirects(inode, data_pos, &state, false, false); + errno = __walk_indirects(inode, data_pos, &state, 0); if (errno) { return (bbuf_t)INVL_BUFFER; } @@ -953,7 +1030,7 @@ ext2db_acquire(struct v_inode* inode, unsigned int data_pos, bbuf_t* out) ext2walk_init_state(&state); - errno = __walk_indirects(inode, data_pos, &state, true, false); + errno = __walk_indirects(inode, data_pos, &state, WALKMODE_ALLOC); if (errno) { return errno; } @@ -987,36 +1064,36 @@ done: int ext2db_alloc(struct v_inode* inode, bbuf_t* out) { - int free_ino_idx; + int next_free; struct ext2_gdesc* gd; struct ext2_inode* e_inode; struct v_superblock* vsb; - free_ino_idx = ALLOC_FAIL; + next_free = ALLOC_FAIL; e_inode = EXT2_INO(inode); vsb = inode->sb; gd = e_inode->blk_grp; - free_ino_idx = ext2gd_alloc_block(gd); + next_free = ext2gd_alloc_block(gd); // locality alloc failed, try entire fs - if (!valid_bmp_slot(free_ino_idx)) { - free_ino_idx = ext2db_alloc_slot(vsb, &gd); + if (!valid_bmp_slot(next_free)) { + next_free = ext2db_alloc_slot(vsb, &gd); } - if (!valid_bmp_slot(free_ino_idx)) { + if (!valid_bmp_slot(next_free)) { return EDQUOT; } - free_ino_idx += gd->base; - free_ino_idx = ext2_datablock(vsb, free_ino_idx); - free_ino_idx = to_ext2ino_id(free_ino_idx); + next_free += gd->base; + next_free = ext2_datablock(vsb, next_free); - bbuf_t buf = fsblock_get(vsb, free_ino_idx); + bbuf_t buf = fsblock_get(vsb, next_free); if (blkbuf_errbuf(buf)) { return EIO; } + e_inode->nr_fsblks++; *out = buf; return 0; } @@ -1067,7 +1144,6 @@ ext2ino_resizing(struct v_inode* inode, size_t new_size) } __update_inode_size(inode, new_size); - fsblock_dirty(e_ino->buf); if (check_symlink_node(inode)) { return 0; @@ -1080,7 +1156,7 @@ ext2ino_resizing(struct v_inode* inode, size_t new_size) ext2walk_init_state(&state); pos = new_size / fsapi_block_size(inode->sb); - errno = __walk_indirects(inode, pos, &state, false, true); + errno = __walk_indirects(inode, pos, &state, WALKMODE_NOBTLB); if (errno) { return errno; } diff --git a/lunaix-os/kernel/fs/vfs.c b/lunaix-os/kernel/fs/vfs.c index 32014d3..9cc78f4 100644 --- a/lunaix-os/kernel/fs/vfs.c +++ b/lunaix-os/kernel/fs/vfs.c @@ -290,6 +290,11 @@ vfs_pclose(struct v_file* file, pid_t pid) inode = file->inode; + if (vfs_check_duped_file(file)) { + vfs_unref_file(file); + return 0; + } + /* * Prevent dead lock. * This happened when process is terminated while blocking on read. @@ -307,13 +312,11 @@ vfs_pclose(struct v_file* file, pid_t pid) * than A. And this will cause a probable race condition on A if other * process is writing to this file later after B exit. */ - mutex_unlock_for(&inode->lock, pid); - - if (vfs_check_duped_file(file)) { - vfs_unref_file(file); - return 0; - } + + // now regain lock for inode syncing + + lock_inode(inode); if ((errno = file->ops->close(file))) { goto done; @@ -322,17 +325,6 @@ vfs_pclose(struct v_file* file, pid_t pid) vfs_unref_dnode(file->dnode); cake_release(file_pile, file); - /* - if the current inode is not being locked by other - threads that does not share same open context, - then we can try to do sync opportunistically - */ - if (mutex_on_hold(&inode->lock)) { - goto done; - } - - lock_inode(inode); - pcache_commit_all(inode); inode->open_count--; @@ -340,9 +332,8 @@ vfs_pclose(struct v_file* file, pid_t pid) __sync_inode_nolock(inode); } - unlock_inode(inode); - done: + unlock_inode(inode); return errno; } @@ -671,27 +662,33 @@ __vfs_try_locate_file(const char* path, return errno; } + lock_dnode(fdir); + errno = vfs_walk(fdir, name.value, &file, NULL, woption); if (errno && errno != ENOENT) { - goto done; + goto error; + } + + if (!errno && (options & FLOC_MKNAME)) { + errno = EEXIST; + goto error; } if (!errno) { - if ((options & FLOC_MKNAME)) { - errno = EEXIST; - } + // the file present, no need to hold the directory lock + unlock_dnode(fdir); goto done; } // errno == ENOENT if (!options) { - goto done; + goto error; } errno = vfs_check_writable(fdir); if (errno) { - goto done; + goto error; } floc->fresh = true; @@ -699,17 +696,20 @@ __vfs_try_locate_file(const char* path, file = vfs_d_alloc(fdir, &name); if (!file) { - return ENOMEM; + errno = ENOMEM; + goto error; } - lock_dnode(fdir); - vfs_dcache_add(fdir, file); done: floc->dir = fdir; floc->file = file; + + return errno; +error: + unlock_dnode(fdir); return errno; } diff --git a/lunaix-os/usr/LBuild b/lunaix-os/usr/LBuild index 4b0617c..320bbeb 100644 --- a/lunaix-os/usr/LBuild +++ b/lunaix-os/usr/LBuild @@ -9,6 +9,8 @@ sources([ "maze", "mkdir", "rm", + "testfork", + "fragfile", ]) compile_opts([ diff --git a/lunaix-os/usr/fragfile.c b/lunaix-os/usr/fragfile.c new file mode 100644 index 0000000..8ceb2d5 --- /dev/null +++ b/lunaix-os/usr/fragfile.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include + +static char alphabets[] = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "01234567890"; + +int main() +{ + unsigned int buf[4096]; + char name[8]; + int fd = open("/dev/rand", O_RDONLY); + + if (mkdir("testdir") && errno != EEXIST) + { + printf("Unable to mkdir %d\n", errno); + _exit(1); + } + + if (chdir("testdir")) + { + printf("Unable to chdir %d\n", errno); + _exit(1); + } + + int cnt = 0; + for (int i = 0; i < 1; i++) + { + int n = read(fd, buf, 4096 * sizeof(int)); + int j = 0, k = 0; + while (j < 4096) { + name[k++] = alphabets[buf[j++] % 63]; + + if (k < 7) { + continue; + } + + k = 0; + cnt++; + name[7] = 0; + + printf("[%03d] creating: %s\n", cnt, name); + int fd2 = open(name, O_RDONLY | O_CREAT); + if (fd2 < 0) { + printf("Unable to open %d\n", errno); + continue; + } + + close(fd2); + } + } + + return 0; +} \ No newline at end of file -- 2.27.0