Fix file system racing and ext2 directory insertion (#58)

author Lunaixsky <lunaixsky@qq.com>

Sun, 4 May 2025 18:33:51 +0000 (19:33 +0100)

committer GitHub <noreply@github.com>

Sun, 4 May 2025 18:33:51 +0000 (19:33 +0100)
author Lunaixsky <lunaixsky@qq.com>
Sun, 4 May 2025 18:33:51 +0000 (19:33 +0100)
committer GitHub <noreply@github.com>
Sun, 4 May 2025 18:33:51 +0000 (19:33 +0100)
diff --git a/lunaix-os/includes/lunaix/ds/mutex.h b/lunaix-os/includes/lunaix/ds/mutex.h

index 510f1fb6b13a2825c4154a9cf23c60fa917064b0..304cc6de7cb1ae5a6830cf2722b9acd89a0fa648 100644 (file)
--- a/lunaix-os/includes/lunaix/ds/mutex.h
+++ b/lunaix-os/includes/lunaix/ds/mutex.h
@@ -6,7 +6,7 @@
  
  typedef struct mutex_s
  {
-    atomic_ulong lk;
+    atomic_uint lk;
      pid_t owner;
  } mutex_t;
  
@@ -37,4 +37,7 @@ mutex_unlock_nested(mutex_t* mutex);
  void
  mutex_unlock_for(mutex_t* mutex, pid_t pid);
  
+bool
+mutex_trylock(mutex_t* mutex);
+
  #endif /* __LUNAIX_MUTEX_H */
diff --git a/lunaix-os/includes/lunaix/ds/rwlock.h b/lunaix-os/includes/lunaix/ds/rwlock.h

index 7dda9ad263abb1e1f30b5349c137c5dbbc0d54b8..ac0d0393b2e15555ff97f331ce3507b59169becf 100644 (file)
--- a/lunaix-os/includes/lunaix/ds/rwlock.h
+++ b/lunaix-os/includes/lunaix/ds/rwlock.h
@@ -13,6 +13,9 @@ typedef struct rwlock_s
      waitq_t waiting_writers;
  } rwlock_t;
  
+void
+rwlock_init(rwlock_t* rwlock);
+
  void
  rwlock_begin_read(rwlock_t* rwlock);
  
diff --git a/lunaix-os/includes/lunaix/ds/spinlock.h b/lunaix-os/includes/lunaix/ds/spinlock.h

index 557f310bf37245dcfa73b3d58afd60119df76d2e..cea310a4fab1f4434a19c809bd16a2e8746dd05a 100644 (file)
--- a/lunaix-os/includes/lunaix/ds/spinlock.h
+++ b/lunaix-os/includes/lunaix/ds/spinlock.h
@@ -8,6 +8,9 @@ struct spinlock
      volatile bool flag;
  };
  
+#define DEFINE_SPINLOCK(name)   \
+    struct spinlock name = { .flag = false }
+
  typedef struct spinlock spinlock_t;
  
  /*
diff --git a/lunaix-os/includes/lunaix/fs.h b/lunaix-os/includes/lunaix/fs.h

index bba939cadc997221427692dad2caebfec4d41e11..003f41058248e490a6db4cf1a33f7ee5dd93870b 100644 (file)
--- a/lunaix-os/includes/lunaix/fs.h
+++ b/lunaix-os/includes/lunaix/fs.h
@@ -1,8 +1,6 @@
  #ifndef __LUNAIX_VFS_H
  #define __LUNAIX_VFS_H
  
-#include <lunaix/clock.h>
-#include <lunaix/device.h>
  #include <lunaix/ds/btrie.h>
  #include <lunaix/ds/hashtable.h>
  #include <lunaix/ds/hstr.h>
@@ -10,6 +8,10 @@
  #include <lunaix/ds/llist.h>
  #include <lunaix/ds/lru.h>
  #include <lunaix/ds/mutex.h>
+#include <lunaix/ds/rwlock.h>
+
+#include <lunaix/clock.h>
+#include <lunaix/device.h>
  #include <lunaix/status.h>
  #include <lunaix/spike.h>
  #include <lunaix/bcache.h>
@@ -62,20 +64,25 @@
       ('0' <= (chr) && (chr) <= '9') || (chr) == '.' || (chr) == '_' ||         \
       (chr) == '-' || (chr) == ':')
  
-#define unlock_inode(inode) mutex_unlock(&inode->lock)
+#define unlock_inode(inode) mutex_unlock_nested(&inode->lock)
  #define lock_inode(inode)                                                      \
      ({                                                                         \
-        mutex_lock(&inode->lock);                                              \
+        mutex_lock_nested(&inode->lock);                                       \
          lru_use_one(inode_lru, &inode->lru);                                   \
      })
  
-#define unlock_dnode(dnode) mutex_unlock(&dnode->lock)
+#define unlock_dnode(dnode) mutex_unlock_nested(&dnode->lock)
  #define lock_dnode(dnode)                                                      \
      ({                                                                         \
-        mutex_lock(&dnode->lock);                                              \
+        mutex_lock_nested(&dnode->lock);                                       \
          lru_use_one(dnode_lru, &dnode->lru);                                   \
      })
  
+#define dnode_atomic(dnode, ops)    \
+    do { lock_dnode(dnode); ops; unlock_dnode(dnode); } while(0)
+
+#define locked_node(node) mutex_on_hold(&(node)->lock)
+
  #define assert_fs(cond) assert_p(cond, "FS")
  #define fail_fs(msg) fail_p(msg, "FS")
  
@@ -117,6 +124,28 @@ struct fs_iter
      struct filesystem* fs;
  };
  
+struct vncache
+{
+    struct hbucket* pool;
+    rwlock_t lock;
+};
+#define cache_atomic_read(cache, ops)           \
+    do {                                        \
+        rwlock_begin_read(&(cache)->lock);      \
+        ops;                                    \
+        rwlock_end_read(&(cache)->lock);        \
+    } while (0)
+
+#define cache_atomic_write(cache, ops)          \
+    do {                                        \
+        rwlock_begin_write(&(cache)->lock);     \
+        ops;                                    \
+        rwlock_end_write(&(cache)->lock);       \
+    } while (0)
+
+#define dnode_cache(dnode)   (&(dnode)->super_block->d_cache)
+#define inode_cache(inode)   (&(inode)->sb->i_cache)
+
  struct v_superblock
  {
      struct llist_header sb_list;
@@ -124,8 +153,8 @@ struct v_superblock
      struct v_dnode* root;
      struct filesystem* fs;
      struct blkbuf_cache* blks;
-    struct hbucket* i_cache;
-    struct hbucket* d_cache;
+    struct vncache i_cache;
+    struct vncache d_cache;
      
      void* data;
      unsigned int ref_count;
@@ -305,7 +334,10 @@ struct v_dnode
  struct v_fdtable
  {
      struct v_fd* fds[VFS_MAX_FD];
+    mutex_t lock;   // inter-threads contention
  };
+#define lock_fdtable(fdtab)     mutex_lock(&(fdtab)->lock)
+#define unlock_fdtable(fdtab)   mutex_unlock(&(fdtab)->lock)
  
  struct pcache
  {
@@ -364,6 +396,21 @@ fsm_itend(struct fs_iter* iterator)
      iterator->fs = NULL;
  }
  
+void
+vfs_vncache_init(struct vncache* cache);
+
+void
+vfs_vncache_free(struct vncache* cache);
+
+void
+vfs_vncache_add(struct vncache* cache, size_t key, struct hlist_node* node);
+
+#define vncache_lock_read(cache)    rwlock_begin_read(&(cache)->lock);
+#define vncache_unlock_read(cache)  rwlock_end_read(&(cache)->lock);
+
+#define vncache_lock_write(cache)    rwlock_begin_write(&(cache)->lock);
+#define vncache_unlock_write(cache)  rwlock_end_write(&(cache)->lock);
+
  void
  vfs_init();
  
@@ -655,6 +702,18 @@ void
  xattr_addcache(struct v_inode* inode, struct v_xattr_entry* xattr);
  
  
+/* --- fdtable --- */
+
+struct v_fdtable*
+fdtable_create();
+
+void
+fdtable_copy(struct v_fdtable* dest, struct v_fdtable* src);
+
+void
+fdtable_free(struct v_fdtable* table);
+
+
  /* --- misc stuff --- */
  
  #define check_itype(to_check, itype)    \
diff --git a/lunaix-os/kernel/ds/mutex.c b/lunaix-os/kernel/ds/mutex.c

index 41e131d6d57db71bdfec25a412bf125da10dde45..4aac422191e5f3cc21721e18c59c7e9ba23c2642 100644 (file)
--- a/lunaix-os/kernel/ds/mutex.c
+++ b/lunaix-os/kernel/ds/mutex.c
@@ -2,6 +2,12 @@
  #include <lunaix/process.h>
  #include <lunaix/kpreempt.h>
  
+#define __do_lock(mutext)               \
+    ({                                  \
+        atomic_fetch_add(&mutex->lk, 1);\
+        mutex->owner = __current->pid;  \
+    })
+
  static inline bool must_inline
  __mutex_check_owner(mutex_t* mutex)
  {
@@ -15,8 +21,7 @@ __mutext_lock(mutex_t* mutex)
          preempt_current();
      }
  
-    atomic_fetch_add(&mutex->lk, 1);
-    mutex->owner = __current->pid;
+    __do_lock(mutex);
  }
  
  static inline void must_inline
@@ -32,6 +37,16 @@ mutex_lock(mutex_t* mutex)
      __mutext_lock(mutex);
  }
  
+bool
+mutex_trylock(mutex_t* mutex)
+{
+    if (atomic_load(&mutex->lk))
+        return false;
+
+    __do_lock(mutex);
+    return true;
+}
+
  void
  mutex_unlock(mutex_t* mutex)
  {
diff --git a/lunaix-os/kernel/fs/LConfig b/lunaix-os/kernel/fs/LConfig

index 194e1868a7ab0737b1fcfe093f08108e954b67c5..880d6c68a102c502c6d7f7d66c8d9f8a83d58806 100644 (file)
--- a/lunaix-os/kernel/fs/LConfig
+++ b/lunaix-os/kernel/fs/LConfig
@@ -19,3 +19,5 @@ def file_system():
          type(bool)
          default(True)
  
+
+include("ext2")
+\ No newline at end of file
diff --git a/lunaix-os/kernel/fs/ext2/LConfig b/lunaix-os/kernel/fs/ext2/LConfig

new file mode 100644 (file)

index 0000000..f5cb0a6
--- /dev/null
+++ b/lunaix-os/kernel/fs/ext2/LConfig
@@ -0,0 +1,11 @@
+
+@Collection("ext2")
+def ext2_fs():
+    add_to_collection(file_system)
+
+    @Term("Debug Messages")
+    def ext2_debug_msg():
+        type(bool)
+        default(False)
+
+    return v(fs_ext2)
+\ No newline at end of file
diff --git a/lunaix-os/kernel/fs/ext2/alloc.c b/lunaix-os/kernel/fs/ext2/alloc.c

index 29961410ff24b4d55030906ecc0ef5266be818a9..62dfb6b92bb657103fb5ca124f670df75c9cab5d 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/alloc.c
+++ b/lunaix-os/kernel/fs/ext2/alloc.c
@@ -4,12 +4,20 @@ static inline unsigned int
  __ext2_global_slot_alloc(struct v_superblock* vsb, int type_sel, 
                           struct ext2_gdesc** gd_out)
  {
+    int alloc;
      struct ext2_sbinfo* sb;
      struct ext2_gdesc *pos;
      struct llist_header *header;
      
+    alloc = ALLOC_FAIL;
      sb = EXT2_SB(vsb);
+
+    ext2sb_lock(sb);
      header = &sb->free_list_sel[type_sel];
+
+    // we have used up all avaliable inodes/blocks
+    if (llist_empty(header))
+        goto done;
      
      if (type_sel == GDESC_INO_SEL) {
          pos = list_entry(header->next, struct ext2_gdesc, free_grps_ino);
@@ -18,12 +26,14 @@ __ext2_global_slot_alloc(struct v_superblock* vsb, int type_sel,
          pos = list_entry(header->next, struct ext2_gdesc, free_grps_blk);
      }
  
-    int alloc = ext2gd_alloc_slot(pos, type_sel);
+    alloc = ext2gd_alloc_slot(pos, type_sel);
  
      if (valid_bmp_slot(alloc)) {
          *gd_out = pos;
      }
  
+done:
+    ext2sb_unlock(sb);
      return alloc;
  }
  
@@ -45,13 +55,15 @@ ext2gd_alloc_slot(struct ext2_gdesc* gd, int type_sel)
      struct ext2_bmp* bmp;
      struct ext2_sbinfo *sb;
      int alloc;
+
+    ext2gd_lock(gd);
      
      sb = gd->sb;
      bmp = &gd->bmps[type_sel];
-    alloc = ext2bmp_alloc_one(bmp);
+    alloc = ext2bmp_alloc_nolock(bmp);
      
      if (alloc < 0) {
-        return alloc;
+        goto done;
      }
  
      if (!ext2bmp_check_free(bmp)) {
@@ -66,8 +78,11 @@ ext2gd_alloc_slot(struct ext2_gdesc* gd, int type_sel)
          sb->raw->s_free_blk_cnt--;
      }
  
-    fsblock_dirty(gd->buf);
-    fsblock_dirty(sb->buf);
+    ext2gd_schedule_sync(gd);
+    ext2sb_schedule_sync(sb);
+
+done:
+    ext2gd_unlock(gd);
      return alloc;
  }
  
@@ -77,7 +92,9 @@ ext2gd_free_slot(struct ext2_gdesc* gd, int type_sel, int slot)
      struct llist_header *free_ent, *free_list;
      struct ext2_sbinfo *sb;
  
-    ext2bmp_free_one(&gd->bmps[type_sel], slot);
+    ext2gd_lock(gd);
+
+    ext2bmp_free_nolock(&gd->bmps[type_sel], slot);
  
      sb = gd->sb;
      free_ent  = &gd->free_list_sel[slot];
@@ -86,6 +103,7 @@ ext2gd_free_slot(struct ext2_gdesc* gd, int type_sel, int slot)
          llist_append(free_list, free_ent);
      }
  
+    // FIXME might need arch-depedent impl for atomic operations
      if (type_sel == GDESC_INO_SEL) {
          gd->info->bg_free_ino_cnt++;
          sb->raw->s_free_ino_cnt++;
@@ -94,6 +112,8 @@ ext2gd_free_slot(struct ext2_gdesc* gd, int type_sel, int slot)
          sb->raw->s_free_blk_cnt++;
      }
  
-    fsblock_dirty(gd->buf);
-    fsblock_dirty(sb->buf);
+    ext2gd_schedule_sync(gd);
+    ext2sb_schedule_sync(sb);
+
+    ext2gd_unlock(gd);
  }
 \ No newline at end of file
diff --git a/lunaix-os/kernel/fs/ext2/dir.c b/lunaix-os/kernel/fs/ext2/dir.c

index a6af51ee93aa5f64b718a9662fe234b3ee01592d..9058c8a5e8dffdb66334cf15d15037a2c5b706e4 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/dir.c
+++ b/lunaix-os/kernel/fs/ext2/dir.c
@@ -57,98 +57,138 @@ done:
  _ret:
      fsblock_put(prev_buf);
      ext2dr_itend(&iter);
+    
      return itstate_sel(&iter, errno);
  }
  
-static size_t
+static inline size_t
  __dirent_realsize(struct ext2b_dirent* dirent)
  {
      return sizeof(*dirent) - sizeof(dirent->name) + dirent->name_len;
  }
  
-#define DIRENT_SLOT_MID     0
-#define DIRENT_SLOT_LAST    1
-#define DIRENT_SLOT_EMPTY   2
+#define DIRENT_INSERT     0
+#define DIRENT_APPEND     1
+
+#define DIRENT_ALIGNMENT    sizeof(int)
+
+struct dirent_locator
+{
+    size_t search_size;
+
+    int state;
+    struct ext2_dnode result;
+    size_t new_prev_reclen;
+    size_t db_pos;
+};
+
+
+static inline void must_inline
+__init_locator(struct dirent_locator* loc, size_t search_size)
+{
+    *loc = (struct dirent_locator) { .search_size = search_size };
+}
  
  static int
-__find_free_dirent_slot(struct v_inode* inode, size_t size, 
-                        struct ext2_dnode* e_dnode_out, size_t *reclen)
+__find_free_dirent_slot(struct v_inode* inode, struct dirent_locator* loc)
  {
-    struct ext2_iterator iter;
+    struct ext2_iterator dbit;
      struct ext2b_dirent *dir = NULL;
+    struct ext2_dnode* result;
+    
      bbuf_t prev_buf = bbuf_null;
      bool found = false;
  
-    ext2db_itbegin(&iter, inode);
-
-    size_t sz = 0;
+    size_t sz = 0, aligned = 0;
      unsigned int rec = 0, total_rec = 0;
+    unsigned int dir_size;
+
+    aligned = ROUNDUP(loc->search_size, DIRENT_ALIGNMENT);
+    result  = &loc->result;
+
+    ext2db_itbegin(&dbit, inode, DBIT_MODE_BLOCK);
  
-    while (!found && ext2db_itnext(&iter))
+    while (!found && ext2db_itnext(&dbit))
      {
          rec = 0;
          do {
-            dir = (struct ext2b_dirent*)offset(iter.data, rec);
+            dir = (struct ext2b_dirent*)offset(dbit.data, rec);
  
              sz = dir->rec_len - __dirent_realsize(dir);
-            sz = ROUNDDOWN(sz, 4);
-            if (sz >= size) {
+            sz = ROUNDDOWN(sz, DIRENT_ALIGNMENT);
+            if ((signed)sz >= (signed)aligned) {
                  found = true;
                  break;
              }
  
              rec += dir->rec_len;
              total_rec += dir->rec_len;
-        } while(rec < iter.blksz);
+        } while(rec < dbit.blksz);
  
          if (likely(prev_buf)) {
              fsblock_put(prev_buf);
          }
          
-        prev_buf = fsblock_take(iter.sel_buf);
+        prev_buf = fsblock_take(dbit.sel_buf);
      }
  
+    ext2_debug("dr_find_slot: found=%d, blk_off=%d, off=%d, gap=%d, blk=%d/%d", 
+                found, rec, total_rec, sz, dbit.pos - 1, dbit.end_pos);
+
+    loc->db_pos = dbit.pos - 1;
+
      if (blkbuf_nullbuf(prev_buf)) {
          // this dir is brand new
-        return DIRENT_SLOT_EMPTY;
+        loc->state = DIRENT_APPEND;
+        goto done;
      }
  
-    e_dnode_out->prev = (struct ext2_dnode_sub) {
+    dir_size = ROUNDUP(__dirent_realsize(dir), 4);
+    loc->new_prev_reclen = dir_size;
+
+    result->prev = (struct ext2_dnode_sub) {
          .buf = fsblock_take(prev_buf),
          .dirent = dir
      };
  
      if (!found) {
          // if prev is the last, and no more space left behind.
-        assert_fs(rec == iter.blksz);
+        assert_fs(rec == dbit.blksz);
          
-        e_dnode_out->self.buf = bbuf_null;
-        ext2db_itend(&iter);
-        return itstate_sel(&iter, DIRENT_SLOT_LAST);
-    }
+        result->self.buf = bbuf_null;
+        ext2db_itend(&dbit);
  
-    unsigned int dir_size;
-
-    dir_size = ROUNDUP(__dirent_realsize(dir), 4);
-    *reclen = dir_size;
+        loc->state = DIRENT_APPEND;
+        goto done;
+    }
  
-    rec = total_rec + dir_size;
-    dir = (struct ext2b_dirent*)offset(iter.data, rec);
+    rec += dir_size;
+    dir = (struct ext2b_dirent*)offset(dbit.data, rec);
      
-    e_dnode_out->self = (struct ext2_dnode_sub) {
-        .buf = fsblock_take(iter.sel_buf),
+    result->self = (struct ext2_dnode_sub) {
+        .buf = fsblock_take(dbit.sel_buf),
          .dirent = dir
      };
  
-    ext2db_itend(&iter);
-    return DIRENT_SLOT_MID;
+    ext2db_itend(&dbit);
+
+    loc->state = DIRENT_INSERT;
+
+done:
+    return itstate_sel(&dbit, 0);
  }
  
  static inline void
-__destruct_ext2_dnode(struct ext2_dnode* e_dno)
+__release_dnode_blocks(struct ext2_dnode* e_dno)
  {
      fsblock_put(e_dno->prev.buf);
      fsblock_put(e_dno->self.buf);
+}
+
+static inline void
+__destruct_ext2_dnode(struct ext2_dnode* e_dno)
+{
+    __release_dnode_blocks(e_dno);
      vfree(e_dno);
  }
  
@@ -452,54 +492,35 @@ ext2dr_insert(struct v_inode* this, struct ext2b_dirent* dirent,
  {
      int errno;
      size_t size, new_reclen, old_reclen;
-    struct ext2_inode* e_self;
      struct ext2_dnode*  e_dno;
      struct ext2b_dirent* prev_dirent;
+    struct dirent_locator locator;
      bbuf_t buf;
  
-    e_self = EXT2_INO(this);
-    e_dno  = vzalloc(sizeof(*e_dno));
-    
      size = __dirent_realsize(dirent);
-    errno = __find_free_dirent_slot(this, size, e_dno, &new_reclen);
+    __init_locator(&locator, size);
+    
+    errno = __find_free_dirent_slot(this, &locator);
      if (errno < 0) {
          goto failed;
      }
  
-    if (errno == DIRENT_SLOT_EMPTY) {
-        if ((errno = ext2db_acquire(this, 0, &buf))) {
+    e_dno = &locator.result;
+    new_reclen = locator.new_prev_reclen;
+    old_reclen = fsapi_block_size(this->sb);
+
+    if (locator.state != DIRENT_INSERT) 
+    {
+        if ((errno = ext2db_acquire(this, locator.db_pos, &buf)))
              goto failed;
-        }
  
          this->fsize += fsapi_block_size(this->sb);
          ext2ino_update(this);
          
-        old_reclen = fsapi_block_size(this->sb);
          e_dno->self.buf = buf;
-        e_dno->self.dirent = blkbuf_data(buf);
-
-        goto place_dir;
+        e_dno->self.dirent = block_buffer(buf, struct ext2b_dirent);
      }
  
-    prev_dirent = e_dno->prev.dirent;
-    old_reclen = prev_dirent->rec_len;
-
-    if (errno == DIRENT_SLOT_LAST) {
-        // prev is last record
-        if ((errno = ext2db_alloc(this, &buf))) {
-            goto failed;
-        }
-
-        this->fsize += fsapi_block_size(this->sb);
-        ext2ino_update(this);
-
-        new_reclen = __dirent_realsize(prev_dirent);
-        new_reclen = ROUNDUP(new_reclen, sizeof(int));
-        e_dno->self = (struct ext2_dnode_sub) {
-            .buf = buf,
-            .dirent = block_buffer(buf, struct ext2b_dirent)
-        };
-    }
  
      /*
                     --- +--------+ ---
@@ -519,17 +540,29 @@ ext2dr_insert(struct v_inode* this, struct ext2b_dirent* dirent,
                         +--------+
      */
  
-    old_reclen -= new_reclen;
-    prev_dirent->rec_len = new_reclen;
-    fsblock_dirty(e_dno->prev.buf);
+    else
+    {
+        prev_dirent = e_dno->prev.dirent;
+        old_reclen  = prev_dirent->rec_len;
+        old_reclen -= new_reclen;
+
+        prev_dirent->rec_len = new_reclen;
+        fsblock_dirty(e_dno->prev.buf);
+    }
+
+    ext2_debug("dr_insert: state=%d, blk=%d, prev_rlen=%d, new_rlen=%d", 
+                locator.state, locator.db_pos, new_reclen, old_reclen);
  
-place_dir:
-    dirent->rec_len = ROUNDUP(old_reclen, sizeof(int));
+    assert_fs(new_reclen > 0);
+    assert_fs(old_reclen > 0);
+
+    dirent->rec_len = old_reclen;
+    
      memcpy(e_dno->self.dirent, dirent, size);
      fsblock_dirty(e_dno->self.buf);
  
      if (!e_dno_out) {
-        __destruct_ext2_dnode(e_dno);
+        __release_dnode_blocks(e_dno);
      }
      else {
          *e_dno_out = e_dno;
@@ -538,7 +571,7 @@ place_dir:
      return errno;
  
  failed:
-    __destruct_ext2_dnode(e_dno);
+    __release_dnode_blocks(e_dno);
      return errno;
  }
  
diff --git a/lunaix-os/kernel/fs/ext2/ext2.h b/lunaix-os/kernel/fs/ext2/ext2.h

index 1f123151eac9ef08527bce3757202dfc339637df..8a6e8523a7e7c41609fee71c21b843ec7a8f3972 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/ext2.h
+++ b/lunaix-os/kernel/fs/ext2/ext2.h
@@ -6,6 +6,14 @@
  #include <lunaix/ds/llist.h>
  #include <lunaix/ds/hashtable.h>
  #include <lunaix/ds/lru.h>
+#include <lunaix/ds/mutex.h>
+
+#ifdef CONFIG_EXT2_DEBUG_MSG
+#   include <lunaix/syslog.h>
+#   define ext2_debug(fmt, ...)  kprintf_v("ext2", fmt, ##__VA_ARGS__)
+#else
+#   define ext2_debug(fmt, ...)
+#endif
  
  #define FEAT_COMPRESSION      0b00000001
  #define FEAT_RESIZE_INO       0b00000010
@@ -202,6 +210,8 @@ struct ext2_sbinfo
          struct llist_header gds;
          GDESC_FREE_LISTS;
      };
+
+    mutex_t lock;
  };
  #define EXT2_SB(vsb) (fsapi_impl_data(vsb, struct ext2_sbinfo))
  
@@ -234,6 +244,8 @@ struct ext2_gdesc
      struct ext2_sbinfo* sb;
      bbuf_t buf;
      bcobj_t cache_ref;
+
+    mutex_t lock;
  };
  
  /*
@@ -269,7 +281,8 @@ struct ext2_inode
      bbuf_t buf;                  // partial inotab that holds this inode
      unsigned int inds_lgents;       // log2(# of block in an indirection level)
      unsigned int ino_id;
-    size_t indirect_blocks;
+    size_t nr_fsblks;
+    size_t nr_indblks;
      size_t isize;
  
      struct ext2b_inode* ino;        // raw ext2 inode
@@ -290,6 +303,8 @@ struct ext2_inode
      // prefetched block for 1st order of indirection
      bbuf_t ind_ord1;
      char* symlink;
+
+    // No lock required, it shares lock context with v_inode.
  };
  #define EXT2_INO(v_inode) (fsapi_impl_data(v_inode, struct ext2_inode))
  
@@ -303,6 +318,8 @@ struct ext2_dnode
  {
      struct ext2_dnode_sub self;
      struct ext2_dnode_sub prev;
+
+    // No lock required, it shares lock context with v_dnode.
  };
  #define EXT2_DNO(v_dnode) (fsapi_impl_data(v_dnode, struct ext2_dnode))
  
@@ -336,7 +353,6 @@ struct ext2_iterator
  struct ext2_file
  {
      struct ext2_iterator iter;
-    struct ext2_inode* b_ino;
  };
  #define EXT2_FILE(v_file) (fsapi_impl_data(v_file, struct ext2_file))
  
@@ -371,6 +387,27 @@ ext2_feature(struct v_superblock* vsb, unsigned int feat)
      return !!(EXT2_SB(vsb)->all_feature & feat);
  }
  
+/* ************   Superblock   ************ */
+
+static inline void
+ext2sb_schedule_sync(struct ext2_sbinfo* sb)
+{
+    fsblock_dirty(sb->buf);
+}
+
+static inline void must_inline
+ext2sb_lock(struct ext2_sbinfo* sb)
+{
+    mutex_lock(&sb->lock);
+}
+
+static inline void must_inline
+ext2sb_unlock(struct ext2_sbinfo* sb)
+{
+    mutex_unlock(&sb->lock);
+}
+
+
  /* ************   Inodes   ************ */
  
  void
@@ -405,8 +442,20 @@ ext2ino_linkto(struct ext2_inode* e_ino, struct ext2b_dirent* dirent)
      fsblock_dirty(e_ino->buf);
  }
  
+static inline void
+ext2ino_schedule_sync(struct ext2_inode* ino)
+{
+    fsblock_dirty(ino->buf);
+}
+
+
+/* ************* Data blocks ************* */
+
+#define DBIT_MODE_ISIZE 0
+#define DBIT_MODE_BLOCK 1
+
  void
-ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode);
+ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode, int mode);
  
  void
  ext2db_itend(struct ext2_iterator* iter);
@@ -486,14 +535,33 @@ void
  ext2gd_release_gdt(struct v_superblock* vsb);
  
  int
-ext2gd_take(struct v_superblock* vsb, 
+ext2gd_take_at(struct v_superblock* vsb, 
                 unsigned int index, struct ext2_gdesc** out);
  
+static inline struct ext2_gdesc*
+ext2gd_take(struct ext2_gdesc* gd) {
+    bcache_refonce(gd->cache_ref);
+
+    return gd;
+}
+
  static inline void
  ext2gd_put(struct ext2_gdesc* gd) {
      bcache_return(gd->cache_ref);
  }
  
+static inline void must_inline
+ext2gd_lock(struct ext2_gdesc* gd)
+{
+    mutex_lock(&gd->lock);
+}
+
+static inline void must_inline
+ext2gd_unlock(struct ext2_gdesc* gd)
+{
+    mutex_unlock(&gd->lock);
+}
+
  
  /* ************ Directory ************ */
  
@@ -599,23 +667,6 @@ ext2_get_symlink(struct v_inode *this, const char **path_out);
  int
  ext2_set_symlink(struct v_inode *this, const char *target);
  
-/* ***********   Bitmap   *********** */
-
-void
-ext2bmp_init(struct ext2_bmp* e_bmp, bbuf_t bmp_buf, unsigned int nr_bits);
-
-bool
-ext2bmp_check_free(struct ext2_bmp* e_bmp);
-
-int
-ext2bmp_alloc_one(struct ext2_bmp* e_bmp);
-
-void
-ext2bmp_free_one(struct ext2_bmp* e_bmp, unsigned int pos);
-
-void
-ext2bmp_discard(struct ext2_bmp* e_bmp);
-
  /* ***********   Allocations   *********** */
  
  #define ALLOC_FAIL -1
@@ -656,6 +707,13 @@ ext2gd_free_block(struct ext2_gdesc* gd, int slot)
      ext2gd_free_slot(gd, GDESC_BLK_SEL, slot);
  }
  
+static inline void
+ext2gd_schedule_sync(struct ext2_gdesc* gd)
+{
+    fsblock_dirty(gd->buf);
+    fsblock_dirty(gd->ino_bmp.raw);
+    fsblock_dirty(gd->blk_bmp.raw);
+}
  
  /**
   * @brief Allocate a free inode
@@ -705,4 +763,26 @@ int
  ext2db_alloc_slot(struct v_superblock* vsb, struct ext2_gdesc** gd_out);
  
  
+/* ***********   Bitmap   *********** */
+
+void
+ext2bmp_init(struct ext2_bmp* e_bmp, bbuf_t bmp_buf, unsigned int nr_bits);
+
+int
+ext2bmp_alloc_nolock(struct ext2_bmp* e_bmp);
+
+void
+ext2bmp_free_nolock(struct ext2_bmp* e_bmp, unsigned int pos);
+
+void
+ext2bmp_discard_nolock(struct ext2_bmp* e_bmp);
+
+static inline bool
+ext2bmp_check_free(struct ext2_bmp* e_bmp)
+{
+    assert(e_bmp->raw);
+
+    return valid_bmp_slot(e_bmp->next_free);
+}
+
  #endif /* __LUNAIX_EXT2_H */
diff --git a/lunaix-os/kernel/fs/ext2/file.c b/lunaix-os/kernel/fs/ext2/file.c

index 0fe4d3d72fda26cfe7eb0e0fe616d1ff1c852935..bd14785d6391934fc1a8ebbd7bde4701b78ad874 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/file.c
+++ b/lunaix-os/kernel/fs/ext2/file.c
@@ -12,7 +12,6 @@ ext2_open_inode(struct v_inode* inode, struct v_file* file)
      struct ext2_file* e_file;
  
      e_file = valloc(sizeof(*e_file));
-    e_file->b_ino = EXT2_INO(inode);
      
      file->data = e_file;
  
@@ -96,7 +95,7 @@ ext2_inode_read(struct v_inode *inode,
      blksz = e_sb->block_size;
      end = fpos + len;
  
-    ext2db_itbegin(&iter, inode);
+    ext2db_itbegin(&iter, inode, DBIT_MODE_ISIZE);
      ext2db_itffw(&iter, fpos / blksz);
  
      while (fpos < end && ext2db_itnext(&iter)) {
@@ -134,7 +133,7 @@ ext2_inode_read_page(struct v_inode *inode, void *buffer, size_t fpos)
      n = PAGE_SIZE / e_sb->block_size;
      transfer_sz = MIN(PAGE_SIZE, e_sb->block_size);
  
-    ext2db_itbegin(&iter, inode);
+    ext2db_itbegin(&iter, inode, DBIT_MODE_ISIZE);
      ext2db_itffw(&iter, blk_start);
  
      while (n-- && ext2db_itnext(&iter)) 
diff --git a/lunaix-os/kernel/fs/ext2/group.c b/lunaix-os/kernel/fs/ext2/group.c

index b77d6b7c0b70d711eabe24eb7d5ec56859cb93b3..d11e510030c4d401ca8a14e5e95222bac453aa38 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/group.c
+++ b/lunaix-os/kernel/fs/ext2/group.c
@@ -89,8 +89,8 @@ __try_load_bitmap(struct v_superblock* vsb,
      struct ext2_sbinfo* ext2sb;
      struct ext2_bmp* bmp;
      struct llist_header* flist, *flist_entry;
+    unsigned int bmp_blk_id, bmp_size;
      bbuf_t buf;
-    unsigned int blk_id, bmp_blk_id, bmp_size;
  
      ext2sb = EXT2_SB(vsb);
  
@@ -111,8 +111,7 @@ __try_load_bitmap(struct v_superblock* vsb,
      flist = &ext2sb->free_list_sel[type];
      flist_entry = &gd->free_list_sel[type];
  
-    blk_id = ext2_datablock(vsb, bmp_blk_id);
-    buf    = fsblock_get(vsb, blk_id);
+    buf = fsblock_get(vsb, bmp_blk_id);
      if (blkbuf_errbuf(buf)) {
          return false;
      }
@@ -127,7 +126,7 @@ __try_load_bitmap(struct v_superblock* vsb,
  }
  
  int
-ext2gd_take(struct v_superblock* vsb, 
+ext2gd_take_at(struct v_superblock* vsb, 
                 unsigned int index, struct ext2_gdesc** out)
  {
      bbuf_t part, buf;
@@ -170,6 +169,8 @@ ext2gd_take(struct v_superblock* vsb,
          .ino_base = index * ext2sb->raw->s_ino_per_grp
      };
  
+    mutex_init(&gd->lock);
+
      *out = gd;
      
      if (!ext2sb->read_only) {
@@ -236,16 +237,8 @@ ext2bmp_init(struct ext2_bmp* e_bmp, bbuf_t bmp_buf, unsigned int nr_bits)
      __ext2bmp_update_next_free_cell(e_bmp);
  }
  
-bool
-ext2bmp_check_free(struct ext2_bmp* e_bmp)
-{
-    assert(e_bmp->raw);
-
-    return valid_bmp_slot(e_bmp->next_free);
-}
-
  int
-ext2bmp_alloc_one(struct ext2_bmp* e_bmp)
+ext2bmp_alloc_nolock(struct ext2_bmp* e_bmp)
  {
      assert(e_bmp->raw);
      
@@ -276,7 +269,7 @@ ext2bmp_alloc_one(struct ext2_bmp* e_bmp)
  }
  
  void
-ext2bmp_free_one(struct ext2_bmp* e_bmp, unsigned int pos)
+ext2bmp_free_nolock(struct ext2_bmp* e_bmp, unsigned int pos)
  {
      assert(e_bmp->raw);
  
@@ -292,7 +285,7 @@ ext2bmp_free_one(struct ext2_bmp* e_bmp, unsigned int pos)
  }
  
  void
-ext2bmp_discard(struct ext2_bmp* e_bmp)
+ext2bmp_discard_nolock(struct ext2_bmp* e_bmp)
  {
      assert(e_bmp->raw);
  
diff --git a/lunaix-os/kernel/fs/ext2/inode.c b/lunaix-os/kernel/fs/ext2/inode.c

index 5871d07d8f6822b79c479c3f30e0dfc305487eeb..0956b1e71c5355fb40f76a94f794aa9a78d87b8d 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/inode.c
+++ b/lunaix-os/kernel/fs/ext2/inode.c
@@ -128,8 +128,65 @@ __btlb_flushall(struct ext2_inode* e_inode)
      }
  }
  
+/**
+ * Obtain the number of indirect blocks that contains 
+ * pointers to next level blocks.
+ * 
+ * Let N be the number of ids that a data block can hold,
+ * then the total number of data blocks assigned (reserved)
+ * to the inode:
+ * 
+ * i_blocks = 12 + (N + 1) + (N^2 + N + 1) + (N^3 + N^2 + N + 1)
+ */
+static int
+__get_nr_indblks(struct ext2_sbinfo* sb, size_t fsblks)
+{
+    ssize_t blks; 
+    int nr_ents;
+    int nr_inds, n, acc_nr;
+
+    blks    = (ssize_t)fsblks;
+    nr_ents = sb->block_size / sizeof(int);
+    acc_nr  = 1;
+
+    if (blks <= 12)
+        return 0;
+
+    blks -= 12;
+
+    if (blks > 0) // order-1 indirection
+    {
+        n = MIN(ICEIL(blks, nr_ents), acc_nr);
+        blks  -= n * nr_ents;
+        
+        nr_inds += 1;
+        acc_nr  *= nr_ents;
+    }
+
+    if (blks > 0) // order-2 indirection
+    {
+        n = MIN(ICEIL(blks, nr_ents), acc_nr);
+        blks  -= n * nr_ents;
+        
+        nr_inds += n + 1;
+        acc_nr  *= nr_ents;
+    }
+
+    if (blks > 0) // order-3 indirection
+    {
+        n = MAX(ICEIL(blks, nr_ents), acc_nr);
+        blks  -= n * nr_ents;
+
+        nr_inds += n + ICEIL(n, nr_ents) + 1;
+    }
+
+    assert_fs(blks <= 0);
+
+    return nr_inds;
+}
+
  void
-ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode)
+ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode, int mode)
  {
      struct ext2_inode* e_ino;
  
@@ -138,8 +195,12 @@ ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode)
          .pos = 0,
          .inode = inode,
          .blksz = inode->sb->blksize,
-        .end_pos = ICEIL(e_ino->isize, inode->sb->blksize)
      };
+
+    if (mode == DBIT_MODE_ISIZE)
+        iter->end_pos = ICEIL(e_ino->isize, inode->sb->blksize);
+    else
+        iter->end_pos = e_ino->nr_fsblks - e_ino->nr_indblks;
  }
  
  void
@@ -186,14 +247,13 @@ ext2db_itnext(struct ext2_iterator* iter)
          fsblock_put(iter->sel_buf);
      }
  
-    buf = ext2db_get(iter->inode, iter->pos);
+    buf = ext2db_get(iter->inode, iter->pos++);
      iter->sel_buf = buf;
  
      if (!buf || !ext2_itcheckbuf(iter)) {
          return false;
      }
  
-    iter->pos++;
      iter->data = blkbuf_data(buf);
  
      return true;
@@ -331,7 +391,7 @@ __get_group_desc(struct v_superblock* vsb, int ino,
      sb = EXT2_SB(vsb);
  
      blkgrp_id = to_fsblock_id(ino) / sb->raw->s_ino_per_grp;
-    return ext2gd_take(vsb, blkgrp_id, gd_out);
+    return ext2gd_take_at(vsb, blkgrp_id, gd_out);
  }
  
  static struct ext2b_inode*
@@ -371,7 +431,7 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index)
      struct ext2b_inode* b_inode;
      struct ext2_inode* inode;
      unsigned int ind_ents;
-    size_t inds_blks;
+    size_t nr_linked;
  
      sb = gd->sb;
      b_inode = __get_raw_inode(vsb, gd, &ino_tab, ino_index);
@@ -383,7 +443,7 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index)
      inode->btlb      = vzalloc(sizeof(struct ext2_btlb));
      inode->buf       = ino_tab;
      inode->ino       = b_inode;
-    inode->blk_grp   = gd;
+    inode->blk_grp   = ext2gd_take(gd);
      inode->isize     = b_inode->i_size;
  
      if (ext2_feature(vsb, FEAT_LARGE_FILE)) {
@@ -391,11 +451,11 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index)
      }
  
      if (b_inode->i_blocks) {
-        inds_blks  = (size_t)b_inode->i_blocks;
-        inds_blks -= ICEIL(inode->isize, 512);
-        inds_blks /= (sb->block_size / 512);
+        nr_linked  = (size_t)b_inode->i_blocks;
+        nr_linked /= (sb->block_size / 512);
  
-        inode->indirect_blocks = inds_blks;
+        inode->nr_fsblks = nr_linked;
+        inode->nr_indblks = __get_nr_indblks(sb, nr_linked);
      }
  
      ind_ents = sb->block_size / sizeof(int);
@@ -404,6 +464,8 @@ __create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index)
      inode->inds_lgents = ilog2(ind_ents);
      inode->ino_id = gd->ino_base + to_ext2ino_id(ino_index);
  
+    ext2_debug("ino(%d): isize=%lu, nr_blk=%lu, nr_inds=%lu",
+                    inode->ino_id, inode->isize, inode->nr_fsblks, inode->nr_indblks);
      return inode;
  }
  
@@ -530,7 +592,7 @@ __free_block_at(struct v_superblock *vsb, unsigned int block_pos)
      sb = EXT2_SB(vsb);
      gd_index = block_pos / sb->raw->s_blk_per_grp;
  
-    if ((errno = ext2gd_take(vsb, gd_index, &gd))) {
+    if ((errno = ext2gd_take_at(vsb, gd_index, &gd))) {
          return errno;
      }
  
@@ -654,7 +716,9 @@ __update_inode_size(struct v_inode* inode, size_t size)
  {
      struct ext2b_inode* b_ino;
      struct ext2_inode*  e_ino;
+    struct ext2_sbinfo* sb;
  
+    sb    = EXT2_SB(inode->sb);
      e_ino = EXT2_INO(inode);
      b_ino = e_ino->ino;
  
@@ -668,8 +732,8 @@ __update_inode_size(struct v_inode* inode, size_t size)
          b_ino->i_size  = size;
      }
  
-    b_ino->i_blocks = ICEIL(size, 512);
-    b_ino->i_blocks += e_ino->indirect_blocks;
+    b_ino->i_blocks = e_ino->nr_fsblks * (sb->block_size / 512);
+    fsblock_dirty(e_ino->buf);
  }
  
  int
@@ -738,6 +802,9 @@ ext2_link(struct v_inode* this, struct v_dnode* new_name)
      new_name->data = e_dno;
      vfs_assign_inode(new_name, this);
  
+    // linking a dnode to parent could result new data block allocated
+    ext2_sync_inode(parent);
+
  done:
      return errno;
  }
@@ -760,6 +827,8 @@ ext2_unlink(struct v_inode* this, struct v_dnode* name)
          return errno;
      }
  
+    // unlink a dnode from parent will not free the allocated data blocks
+    //  rather, it leads to fragmentation
      return ext2ino_free(this);
  }
  
@@ -784,6 +853,9 @@ __walkstate_set_stack(struct walk_state* state, int depth,
      state->stack.indices[depth] = index;
  }
  
+#define WALKMODE_ALLOC  0b01
+#define WALKMODE_NOBTLB 0b10
+
  /**
   * @brief Walk the indrection chain given the position of data block
   *        relative to the inode. Upon completed, walk_state will be
@@ -794,20 +866,26 @@ __walkstate_set_stack(struct walk_state* state, int depth,
   *        (i.e., a leaf block), then the state is the indirect block that
   *        containing the ID of that leaf block.
   *        
- *        If `resolve` is set, it will resolve any absence encountered
- *        during the walk by allocating and chaining indirect block.
- *        It require the file system is mounted writable.
+ *        Two modes can be specified to alter the walk process:
+ * 
+ *        WALKMODE_ALLOC
+ *          resolve any absence encountered
+ *          during the walk by allocating and chaining indirect block
+ *        
+ *        WALKMODE_NOBTLB
+ *          Ignore the cached result, always perform a complete walk.
+ *          This does not by pass the cache entirely, lower level caches
+ *          like block buffer (blkio request cache) will be used transparently
   * 
   * @param inode     inode to walk
   * @param pos       flattened data block position to be located
   * @param state     contain the walk result
- * @param resolve   whether to auto allocate the indirection structure during 
- *                  walk if `pos` is not exist.
+ * @param mode      walk mode
   * @return int 
   */
  static int
  __walk_indirects(struct v_inode* inode, unsigned int pos,
-                 struct walk_state* state, bool resolve, bool full_walk)
+                 struct walk_state* state, int mode)
  {
      int errno;
      int inds, stride, shifts, level;
@@ -816,12 +894,13 @@ __walk_indirects(struct v_inode* inode, unsigned int pos,
      struct ext2b_inode* b_inode;
      struct v_superblock* vsb;
      bbuf_t table, next_table;
+    bool alloc;
  
      e_inode = EXT2_INO(inode);
      b_inode = e_inode->ino;
      vsb = inode->sb;
      level = 0;
-    resolve = resolve && !EXT2_SB(vsb)->read_only;
+    alloc = (mode & WALKMODE_ALLOC) && !EXT2_SB(vsb)->read_only;
  
      if (pos < 12) {
          index = pos;
@@ -847,7 +926,7 @@ __walk_indirects(struct v_inode* inode, unsigned int pos,
      }
  
      // bTLB cache the last level indirect block
-    if (!full_walk && (table = __btlb_hit(e_inode, pos))) {
+    if (!(mode & WALKMODE_NOBTLB) && (table = __btlb_hit(e_inode, pos))) {
          level = inds;
          index = pos & ((1 << stride) - 1);
          slotref = &block_buffer(table, u32_t)[index];
@@ -867,7 +946,7 @@ __walk_indirects(struct v_inode* inode, unsigned int pos,
  
          next = *slotref;
          if (!next) {
-            if (!resolve) {
+            if (!alloc) {
                  goto _return;
              }
  
@@ -876,7 +955,6 @@ __walk_indirects(struct v_inode* inode, unsigned int pos,
                  return errno;
              }
  
-            e_inode->indirect_blocks++;
              *slotref = fsblock_id(next_table);
              fsblock_dirty(table);
          }
@@ -894,7 +972,6 @@ __walk_indirects(struct v_inode* inode, unsigned int pos,
          assert(shifts >= 0);
  
          index = (pos & mask) >> shifts;
-
          slotref = &block_buffer(table, u32_t)[index];
  
          shifts -= stride;
@@ -927,7 +1004,7 @@ ext2db_get(struct v_inode* inode, unsigned int data_pos)
  
      ext2walk_init_state(&state);
  
-    errno = __walk_indirects(inode, data_pos, &state, false, false);
+    errno = __walk_indirects(inode, data_pos, &state, 0);
      if (errno) {
          return (bbuf_t)INVL_BUFFER;
      }
@@ -953,7 +1030,7 @@ ext2db_acquire(struct v_inode* inode, unsigned int data_pos, bbuf_t* out)
  
      ext2walk_init_state(&state);
  
-    errno = __walk_indirects(inode, data_pos, &state, true, false);
+    errno = __walk_indirects(inode, data_pos, &state, WALKMODE_ALLOC);
      if (errno) {
          return errno;
      }
@@ -987,36 +1064,36 @@ done:
  int
  ext2db_alloc(struct v_inode* inode, bbuf_t* out)
  {
-    int free_ino_idx;
+    int next_free;
      struct ext2_gdesc* gd;
      struct ext2_inode* e_inode;
      struct v_superblock* vsb;
  
-    free_ino_idx = ALLOC_FAIL;
+    next_free = ALLOC_FAIL;
      e_inode = EXT2_INO(inode);
      vsb = inode->sb;
  
      gd = e_inode->blk_grp;
-    free_ino_idx = ext2gd_alloc_block(gd);
+    next_free = ext2gd_alloc_block(gd);
  
      // locality alloc failed, try entire fs
-    if (!valid_bmp_slot(free_ino_idx)) {
-        free_ino_idx = ext2db_alloc_slot(vsb, &gd);
+    if (!valid_bmp_slot(next_free)) {
+        next_free = ext2db_alloc_slot(vsb, &gd);
      }
  
-    if (!valid_bmp_slot(free_ino_idx)) {
+    if (!valid_bmp_slot(next_free)) {
          return EDQUOT;
      }
  
-    free_ino_idx += gd->base;
-    free_ino_idx = ext2_datablock(vsb, free_ino_idx);
-    free_ino_idx = to_ext2ino_id(free_ino_idx);
+    next_free += gd->base;
+    next_free = ext2_datablock(vsb, next_free);
      
-    bbuf_t buf = fsblock_get(vsb, free_ino_idx);
+    bbuf_t buf = fsblock_get(vsb, next_free);
      if (blkbuf_errbuf(buf)) {
          return EIO;
      }
  
+    e_inode->nr_fsblks++;
      *out = buf;
      return 0;
  }
@@ -1067,7 +1144,6 @@ ext2ino_resizing(struct v_inode* inode, size_t new_size)
      }
  
      __update_inode_size(inode, new_size);
-    fsblock_dirty(e_ino->buf);
  
      if (check_symlink_node(inode)) {
          return 0;
@@ -1080,7 +1156,7 @@ ext2ino_resizing(struct v_inode* inode, size_t new_size)
      ext2walk_init_state(&state);
  
      pos   = new_size / fsapi_block_size(inode->sb);
-    errno = __walk_indirects(inode, pos, &state, false, true);
+    errno = __walk_indirects(inode, pos, &state, WALKMODE_NOBTLB);
      if (errno) {
          return errno;
      }
diff --git a/lunaix-os/kernel/fs/ext2/mount.c b/lunaix-os/kernel/fs/ext2/mount.c

index 05fbdce029ea6243dd0f6c6fbde4b6bd1b2be01a..0d328e796df31a079cdc820391eeea9dd68ac66b 100644 (file)
--- a/lunaix-os/kernel/fs/ext2/mount.c
+++ b/lunaix-os/kernel/fs/ext2/mount.c
@@ -170,6 +170,8 @@ ext2_mount(struct v_superblock* vsb, struct v_dnode* mnt)
      ext2sb->raw = rawsb;
      ext2sb->all_feature = __translate_feature(rawsb);
  
+    mutex_init(&ext2sb->lock);
+
      fsapi_set_vsb_ops(vsb, &vsb_ops);
      fsapi_complete_vsb_setup(vsb, ext2sb);
  
@@ -188,6 +190,9 @@ ext2_mount(struct v_superblock* vsb, struct v_dnode* mnt)
          ext2sb->raw = offset(blkbuf_data(buf), EXT2_BASE_BLKSZ);
      }
  
+    ext2sb->raw->s_mnt_cnt++;
+    ext2sb->raw->s_mtime = clock_unixtime();
+
      ext2sb->buf = buf;
      vfree(rawsb);
      return 0;
diff --git a/lunaix-os/kernel/fs/mount.c b/lunaix-os/kernel/fs/mount.c

index b62b77faa8788d273888a7828933a56b0c1ffa13..1dee55a1d946626b6bf95e903153fc48b7d54d46 100644 (file)
--- a/lunaix-os/kernel/fs/mount.c
+++ b/lunaix-os/kernel/fs/mount.c
@@ -86,8 +86,8 @@ __vfs_do_unmount(struct v_mount* mnt)
  
      // detached the inodes from cache, and let lru policy to recycle them
      for (size_t i = 0; i < VFS_HASHTABLE_SIZE; i++) {
-        __detach_node_cache_ref(&sb->i_cache[i]);
-        __detach_node_cache_ref(&sb->d_cache[i]);
+        __detach_node_cache_ref(&sb->i_cache.pool[i]);
+        __detach_node_cache_ref(&sb->d_cache.pool[i]);
      }
  
      struct v_dnode *pos, *next;
diff --git a/lunaix-os/kernel/fs/vfs.c b/lunaix-os/kernel/fs/vfs.c

index 32014d34a871e2783ff696ef0bffdce663b4386e..2402cc57438e7b95a63944dd7dc7779656fe7928 100644 (file)
--- a/lunaix-os/kernel/fs/vfs.c
+++ b/lunaix-os/kernel/fs/vfs.c
@@ -102,21 +102,54 @@ vfs_init()
      vfs_sysroot->parent = vfs_sysroot;
      
      vfs_ref_dnode(vfs_sysroot);
+    lru_remove(dnode_lru, &vfs_sysroot->lru);
+}
+
+void
+vfs_vncache_init(struct vncache* cache)
+{
+    cache->pool = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
+    rwlock_init(&cache->lock);
+}
+
+void
+vfs_vncache_free(struct vncache* cache)
+{
+    // clear all other reader/writer
+    rwlock_begin_write(&cache->lock);
+    vfree(cache->pool);
+    
+    // already freed, so as the lock
+}
+
+void
+vfs_vncache_add(struct vncache* cache, size_t key, struct hlist_node* node)
+{
+    struct hbucket* slot;
+
+    cache_atomic_write(cache, 
+    {
+        slot = &cache->pool[key & VFS_HASH_MASK];
+        hlist_delete(node);
+        hlist_add(&slot->head, node);
+    });
  }
  
  static inline struct hbucket*
-__dcache_hash(struct v_dnode* parent, u32_t* hash)
+__dcache_hash_nolock(struct v_dnode* parent, u32_t* hash)
  {
+    struct v_superblock* sb;
      struct hbucket* d_cache;
      u32_t _hash;
+
+    sb = parent->super_block;
      
-    d_cache = parent->super_block->d_cache;
      _hash = *hash;
      _hash = _hash ^ (_hash >> VFS_HASHBITS);
      _hash += (u32_t)__ptr(parent);
  
      *hash = _hash;
-    return &d_cache[_hash & VFS_HASH_MASK];
+    return &sb->d_cache.pool[_hash & VFS_HASH_MASK];
  }
  
  static inline int
@@ -135,6 +168,11 @@ __sync_inode_nolock(struct v_inode* inode)
  struct v_dnode*
  vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
  {
+    u32_t hash;
+    struct hbucket* slot;
+    struct v_dnode *pos, *n;
+    struct vncache *dcache;
+ 
      if (!str->len || HSTR_EQ(str, &vfs_dot))
          return parent;
  
@@ -142,16 +180,23 @@ vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
          return parent->parent;
      }
  
-    u32_t hash = str->hash;
-    struct hbucket* slot = __dcache_hash(parent, &hash);
+    hash = str->hash;
+    dcache = dnode_cache(parent);
+    
+    vncache_lock_read(dcache);
  
-    struct v_dnode *pos, *n;
+    slot = __dcache_hash_nolock(parent, &hash);
      hashtable_bucket_foreach(slot, pos, n, hash_list)
      {
-        if (pos->name.hash == hash && pos->parent == parent) {
-            return pos;
+        if (pos->name.hash != hash || pos->parent != parent) {
+            continue;
          }
+
+        vncache_unlock_read(dcache);
+        return pos;
      }
+
+    vncache_unlock_read(dcache);
      return NULL;
  }
  
@@ -172,14 +217,21 @@ __vfs_touch_inode(struct v_inode* inode, const int type)
  void
  vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
  {
+    struct hbucket* bucket;
+    struct vncache* cache;
+
      assert(parent);
+    assert(locked_node(parent));
  
      dnode->ref_count = 1;
      dnode->parent = parent;
      llist_append(&parent->children, &dnode->siblings);
  
-    struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
-    hlist_add(&bucket->head, &dnode->hash_list);
+    cache_atomic_write(dnode_cache(parent), 
+    {
+        bucket = __dcache_hash_nolock(parent, &dnode->name.hash);
+        hlist_add(&bucket->head, &dnode->hash_list);
+    });
  }
  
  void
@@ -190,7 +242,12 @@ vfs_dcache_remove(struct v_dnode* dnode)
  
      llist_delete(&dnode->siblings);
      llist_delete(&dnode->aka_list);
-    hlist_delete(&dnode->hash_list);
+    lru_remove(dnode_lru, &dnode->lru);
+
+    cache_atomic_write(dnode_cache(dnode),
+    {
+        hlist_delete(&dnode->hash_list);
+    });
  
      dnode->parent = NULL;
      dnode->ref_count = 0;
@@ -200,10 +257,14 @@ void
  vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
  {
      assert(new_parent);
+    assert(locked_node(new_parent));
  
-    hstr_rehash(&dnode->name, HSTR_FULL_HASH);
-    vfs_dcache_remove(dnode);
-    vfs_dcache_add(new_parent, dnode);
+    dnode_atomic(dnode, 
+    {
+        hstr_rehash(&dnode->name, HSTR_FULL_HASH);
+        vfs_dcache_remove(dnode);
+        vfs_dcache_add(new_parent, dnode);
+    });
  }
  
  int
@@ -250,6 +311,8 @@ vfs_open(struct v_dnode* dnode, struct v_file** file)
  void
  vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
  {
+    lock_dnode(assign_to);
+
      if (assign_to->inode) {
          llist_delete(&assign_to->aka_list);
          assign_to->inode->link_count--;
@@ -258,26 +321,33 @@ vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
      llist_append(&inode->aka_dnodes, &assign_to->aka_list);
      assign_to->inode = inode;
      inode->link_count++;
+
+    unlock_dnode(assign_to);
  }
  
  int
  vfs_link(struct v_dnode* to_link, struct v_dnode* name)
  {
      int errno;
+    struct v_inode* inode;
+
+    inode = to_link->inode;
  
      if ((errno = vfs_check_writable(to_link))) {
          return errno;
      }
  
-    lock_inode(to_link->inode);
+    lock_inode(inode);
+
      if (to_link->super_block->root != name->super_block->root) {
          errno = EXDEV;
-    } else if (!to_link->inode->ops->link) {
+    } else if (!inode->ops->link) {
          errno = ENOTSUP;
-    } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
-        vfs_assign_inode(name, to_link->inode);
+    } else if (!(errno = inode->ops->link(inode, name))) {
+        vfs_assign_inode(name, inode);
      }
-    unlock_inode(to_link->inode);
+
+    unlock_inode(inode);
  
      return errno;
  }
@@ -290,6 +360,11 @@ vfs_pclose(struct v_file* file, pid_t pid)
  
      inode = file->inode;
  
+    if (vfs_check_duped_file(file)) {
+        vfs_unref_file(file);
+        return 0;
+    }
+
      /*
       * Prevent dead lock.
       * This happened when process is terminated while blocking on read.
@@ -307,13 +382,11 @@ vfs_pclose(struct v_file* file, pid_t pid)
       * than A. And this will cause a probable race condition on A if other
       * process is writing to this file later after B exit.
      */
-
      mutex_unlock_for(&inode->lock, pid);
-    
-    if (vfs_check_duped_file(file)) {
-        vfs_unref_file(file);
-        return 0;
-    }
+
+    // now regain lock for inode syncing
+
+    lock_inode(inode);
  
      if ((errno = file->ops->close(file))) {
          goto done;
@@ -322,17 +395,6 @@ vfs_pclose(struct v_file* file, pid_t pid)
      vfs_unref_dnode(file->dnode);
      cake_release(file_pile, file);
  
-    /*
-        if the current inode is not being locked by other 
-        threads that does not share same open context,
-        then we can try to do sync opportunistically
-    */
-    if (mutex_on_hold(&inode->lock)) {
-        goto done;
-    }
-    
-    lock_inode(inode);
-
      pcache_commit_all(inode);
      inode->open_count--;
  
@@ -340,9 +402,8 @@ vfs_pclose(struct v_file* file, pid_t pid)
          __sync_inode_nolock(inode);
      }
  
-    unlock_inode(inode);
-
  done:
+    unlock_inode(inode);
      return errno;
  }
  
@@ -384,12 +445,22 @@ vfs_fsync(struct v_file* file)
  int
  vfs_alloc_fdslot(int* fd)
  {
+    struct v_fdtable* fdtab;
+
+    fdtab = __current->fdtable;
+    lock_fdtable(fdtab);
+
      for (size_t i = 0; i < VFS_MAX_FD; i++) {
-        if (!__current->fdtable->fds[i]) {
-            *fd = i;
-            return 0;
+        if (__current->fdtable->fds[i]) {
+            continue;
          }
+
+        *fd = i;
+        unlock_fdtable(fdtab);
+        return 0;
      }
+
+    unlock_fdtable(fdtab);
      return EMFILE;
  }
  
@@ -399,9 +470,9 @@ vfs_sb_alloc()
      struct v_superblock* sb = cake_grab(superblock_pile);
      memset(sb, 0, sizeof(*sb));
      llist_init_head(&sb->sb_list);
-    
-    sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
-    sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
+
+    vfs_vncache_init(&sb->i_cache);
+    vfs_vncache_init(&sb->d_cache);
  
      sb->ref_count = 1;
      return sb;
@@ -427,25 +498,36 @@ vfs_sb_unref(struct v_superblock* sb)
          sb->ops.release(sb);
      }
  
-    vfree(sb->i_cache);
-    vfree(sb->d_cache);
-    
+    vfs_vncache_free(&sb->i_cache);
+    vfs_vncache_free(&sb->d_cache);
+
      cake_release(superblock_pile, sb);
  }
  
-static int
+static inline bool
+__dnode_evictable(struct v_dnode* dnode)
+{
+    return dnode->ref_count == 1 
+        && llist_empty(&dnode->children);
+}
+
+static bool
  __vfs_try_evict_dnode(struct lru_node* obj)
  {
      struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
  
-    if (!dnode->ref_count) {
-        vfs_d_free(dnode);
-        return 1;
+    if (mutex_on_hold(&dnode->lock))
+        return false;
+
+    if (!__dnode_evictable(dnode)) {
+        return false;
      }
-    return 0;
+
+    vfs_d_free(dnode);
+    return true;
  }
  
-static int
+static bool
  __vfs_try_evict_inode(struct lru_node* obj)
  {
      struct v_inode* inode = container_of(obj, struct v_inode, lru);
@@ -493,13 +575,14 @@ void
  vfs_d_free(struct v_dnode* dnode)
  {
      assert(dnode->ref_count == 1);
-
+    
      if (dnode->inode) {
          assert(dnode->inode->link_count > 0);
          dnode->inode->link_count--;
      }
  
      vfs_dcache_remove(dnode);
+
      // Make sure the children de-referencing their parent.
      // With lru presented, the eviction will be propagated over the entire
      // detached subtree eventually
@@ -514,6 +597,7 @@ vfs_d_free(struct v_dnode* dnode)
      }
  
      vfs_sb_unref(dnode->super_block);
+    
      vfree((void*)dnode->name.value);
      cake_release(dnode_pile, dnode);
  }
@@ -521,26 +605,32 @@ vfs_d_free(struct v_dnode* dnode)
  struct v_inode*
  vfs_i_find(struct v_superblock* sb, u32_t i_id)
  {
-    struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
-    struct v_inode *pos, *n;
-    hashtable_bucket_foreach(slot, pos, n, hash_list)
+    struct hbucket* slot;
+    struct v_inode *pos, *n, *found = NULL;
+
+    cache_atomic_read(&sb->i_cache, 
      {
-        if (pos->id == i_id) {
+        slot = &sb->i_cache.pool[i_id & VFS_HASH_MASK];
+
+        hashtable_bucket_foreach(slot, pos, n, hash_list)
+        {
+            if (pos->id != i_id) {
+                continue;
+            }
+
              lru_use_one(inode_lru, &pos->lru);
-            return pos;
+            found = pos;
+            break;
          }
-    }
+    });
  
-    return NULL;
+    return found;
  }
  
  void
  vfs_i_addhash(struct v_inode* inode)
  {
-    struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
-
-    hlist_delete(&inode->hash_list);
-    hlist_add(&slot->head, &inode->hash_list);
+    vfs_vncache_add(inode_cache(inode), inode->id, &inode->hash_list);
  }
  
  struct v_inode*
@@ -569,6 +659,7 @@ vfs_i_alloc(struct v_superblock* sb)
  
      vfs_i_assign_sb(inode, sb);
      lru_use_one(inode_lru, &inode->lru);
+    
      return inode;
  }
  
@@ -579,6 +670,7 @@ vfs_i_free(struct v_inode* inode)
          pcache_release(inode->pg_cache);
          vfree(inode->pg_cache);
      }
+
      // we don't need to sync inode.
      // If an inode can be free, then it must be properly closed.
      // Hence it must be synced already!
@@ -587,7 +679,10 @@ vfs_i_free(struct v_inode* inode)
      }
  
      vfs_sb_unref(inode->sb);
+    
      hlist_delete(&inode->hash_list);
+    lru_remove(inode_lru, &inode->lru);
+
      cake_release(inode_pile, inode);
  }
  
@@ -605,10 +700,19 @@ vfs_i_free(struct v_inode* inode)
  int
  vfs_getfd(int fd, struct v_fd** fd_s)
  {
-    if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
-        return 0;
+    struct v_fdtable* fdtab;
+
+    if (!TEST_FD(fd)) {
+        return EBADF;
      }
-    return EBADF;
+
+    fdtab = __current->fdtable;
+
+    lock_fdtable(fdtab);
+    *fd_s = __current->fdtable->fds[fd];
+    unlock_fdtable(fdtab);
+
+    return !*fd_s ? EBADF : 0;
  }
  
  static int
@@ -671,27 +775,33 @@ __vfs_try_locate_file(const char* path,
          return errno;
      }
  
+    lock_dnode(fdir);
+
      errno = vfs_walk(fdir, name.value, &file, NULL, woption);
  
      if (errno && errno != ENOENT) {
-        goto done;
+        goto error;
+    }
+
+    if (!errno && (options & FLOC_MKNAME)) {
+        errno = EEXIST;
+        goto error;
      }
      
      if (!errno) {
-        if ((options & FLOC_MKNAME)) {
-            errno = EEXIST;
-        }
+        // the file present, no need to hold the directory lock
+        unlock_dnode(fdir);
          goto done;
      }
  
      // errno == ENOENT
      if (!options) {
-        goto done;
+        goto error;
      }
  
      errno = vfs_check_writable(fdir);
      if (errno) {
-        goto done;
+        goto error;
      }
  
      floc->fresh = true;
@@ -699,17 +809,20 @@ __vfs_try_locate_file(const char* path,
      file = vfs_d_alloc(fdir, &name);
  
      if (!file) {
-        return ENOMEM;
+        errno = ENOMEM;
+        goto error;
      }
  
-    lock_dnode(fdir);
-
      vfs_dcache_add(fdir, file);
  
  done:
      floc->dir   = fdir;
      floc->file  = file;
+    
+    return errno;
  
+error:
+    unlock_dnode(fdir);
      return errno;
  }
  
@@ -869,6 +982,7 @@ __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
      if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
          goto unlock;
      }
+
      dent->d_offset++;
      fd_s->file->f_pos++;
  
@@ -1105,6 +1219,42 @@ vfs_get_dtype(int itype)
      return dtype;
  }
  
+struct v_fdtable*
+fdtable_create()
+{
+    struct v_fdtable* fdtab;
+
+    fdtab = vzalloc(sizeof(struct v_fdtable));
+    mutex_init(&fdtab->lock);
+
+    return fdtab;
+}
+
+void
+fdtable_copy(struct v_fdtable* dest, struct v_fdtable* src)
+{
+    lock_fdtable(dest);
+    lock_fdtable(src);
+    
+    for (size_t i = 0; i < VFS_MAX_FD; i++) {
+        struct v_fd* fd = src->fds[i];
+        if (!fd)
+            continue;
+        vfs_dup_fd(fd, &dest->fds[i]);
+    }
+
+    unlock_fdtable(dest);
+    unlock_fdtable(src);
+}
+
+void
+fdtable_free(struct v_fdtable* table)
+{
+    assert(!mutex_on_hold(&table->lock));
+
+    vfree(table);
+}
+
  __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
  {
      int errno;
@@ -1114,11 +1264,12 @@ __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
      }
  
      struct v_dnode* dnode;
-    errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
  
-    if (errno >= 0) {
-        return errno;
-    }
+    dnode = fd_s->file->dnode;
+
+    lock_dnode(dnode);
+    errno = vfs_get_path(dnode, buf, size, 0);
+    unlock_dnode(dnode);
  
  done:
      return DO_STATUS(errno);
@@ -1235,10 +1386,13 @@ done:
  
  __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
  {
-    int errno = 0;
+    int errno;
+    struct hstr name;
+    struct v_inode* inode;
      struct v_dnode *parent, *dir;
      char name_value[VFS_NAME_MAXLEN];
-    struct hstr name = HHSTR(name_value, 0, 0);
+
+    name = HHSTR(name_value, 0, 0);
  
      if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
          goto done;
@@ -1258,7 +1412,7 @@ __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
          goto done;
      }
  
-    struct v_inode* inode = parent->inode;
+    inode = parent->inode;
  
      lock_dnode(parent);
      lock_inode(inode);
@@ -1410,12 +1564,14 @@ vfs_dup_fd(struct v_fd* old, struct v_fd** new)
  int
  vfs_dup2(int oldfd, int newfd)
  {
+    int errno;
+    struct v_fdtable* fdtab;
+    struct v_fd *oldfd_s, *newfd_s;
+    
      if (newfd == oldfd) {
          return newfd;
      }
  
-    int errno;
-    struct v_fd *oldfd_s, *newfd_s;
      if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
          goto done;
      }
@@ -1425,16 +1581,26 @@ vfs_dup2(int oldfd, int newfd)
          goto done;
      }
  
-    newfd_s = __current->fdtable->fds[newfd];
+    fdtab = __current->fdtable;
+    lock_fdtable(fdtab);
+
+    newfd_s = fdtab->fds[newfd];
      if (newfd_s && (errno = vfs_close(newfd_s->file))) {
-        goto done;
+        goto unlock_and_done;
      }
  
-    if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
-        __current->fdtable->fds[newfd] = newfd_s;
-        return newfd;
+    if ((errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
+        goto unlock_and_done;
      }
  
+    fdtab->fds[newfd] = newfd_s;
+    
+    unlock_fdtable(fdtab);
+    return newfd;
+
+unlock_and_done:
+    unlock_fdtable(fdtab);
+
  done:
      return DO_STATUS(errno);
  }
@@ -1650,6 +1816,7 @@ vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
  
      lock_dnode(current);
      lock_dnode(target);
+
      if (oldparent)
          lock_dnode(oldparent);
      if (newparent)
@@ -1678,6 +1845,7 @@ vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
  
  cleanup:
      unlock_dnode(current);
+    
      if (oldparent)
          unlock_dnode(oldparent);
      if (newparent)
diff --git a/lunaix-os/kernel/process/fork.c b/lunaix-os/kernel/process/fork.c

index 674e5c64b43199ad5651cde82d00010f94e0aca3..d68a8f830d3768553d66f97ef2599dabfcdf255b 100644 (file)
--- a/lunaix-os/kernel/process/fork.c
+++ b/lunaix-os/kernel/process/fork.c
@@ -45,18 +45,6 @@ region_maybe_cow(struct mm_region* region)
      tlb_flush_vmr_all(region);
  }
  
-static inline void
-__dup_fdtable(struct proc_info* pcb)
-{
-    for (size_t i = 0; i < VFS_MAX_FD; i++) {
-        struct v_fd* fd = __current->fdtable->fds[i];
-        if (!fd)
-            continue;
-        vfs_dup_fd(fd, &pcb->fdtable->fds[i]);
-    }
-}
-
-
  static void
  __dup_kernel_stack(struct thread* thread, ptr_t vm_mnt)
  {
@@ -172,7 +160,7 @@ dup_proc()
          vfs_ref_dnode(pcb->cwd);
      }
  
-    __dup_fdtable(pcb);
+    fdtable_copy(pcb->fdtable, __current->fdtable);
      uscope_copy(&pcb->uscope, current_user_scope());
  
      struct proc_mm* mm = vmspace(pcb);
diff --git a/lunaix-os/usr/LBuild b/lunaix-os/usr/LBuild

index 4b0617cbab5417a0b167cb0f7a0ca6ed6b9c236a..320bbebd3c3da3711a373d254f1e6ca143d9a0bd 100644 (file)
--- a/lunaix-os/usr/LBuild
+++ b/lunaix-os/usr/LBuild
@@ -9,6 +9,8 @@ sources([
      "maze",
      "mkdir",
      "rm",
+    "testfork",
+    "fragfile",
  ])
  
  compile_opts([
diff --git a/lunaix-os/usr/fragfile.c b/lunaix-os/usr/fragfile.c

new file mode 100644 (file)

index 0000000..a67b94a
--- /dev/null
+++ b/lunaix-os/usr/fragfile.c
@@ -0,0 +1,71 @@
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <lunaix/status.h>
+
+static char alphabets[] = "abcdefghijklmnopqrstuvwxyz"
+                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                          "01234567890";
+
+#define NR_BUFSIZE   4096
+#define NR_NAME_LEN  8
+#define NR_REPEAT    5
+
+int main()
+{
+    unsigned int buf[NR_BUFSIZE];
+    char name[NR_NAME_LEN + 1];
+    int fd = open("/dev/rand", O_RDONLY);
+
+    if (mkdir("testdir") && errno != EEXIST)
+    {
+        printf("Unable to mkdir %d\n", errno);
+        _exit(1);
+    }
+
+    if (chdir("testdir"))
+    {
+        printf("Unable to chdir %d\n", errno);
+        _exit(1);
+    }
+
+    int nr_total = NR_REPEAT * NR_BUFSIZE / NR_NAME_LEN;
+
+    int cnt = 0;
+    for (int i = 0; i < NR_REPEAT; i++)
+    {
+        int n = read(fd, buf, 4096 * sizeof(int));
+        int j = 0, k = 0;
+        while (j < 4096) {
+            name[k++] = alphabets[buf[j++] % 63];
+
+            if (k < NR_NAME_LEN) {
+                continue;
+            }
+
+            k = 0;
+            cnt++;
+            name[NR_NAME_LEN] = 0;
+
+            printf("[%04d/%04d] creating: %s\r", cnt, nr_total, name);
+            int fd2 = open(name, O_RDONLY | O_CREAT);
+            
+            if (fd2 < 0) 
+            {
+                printf("\n");
+                if (errno == EDQUOT) {
+                    printf("Out of quota\n");
+                    return 0;
+                }
+
+                printf("Unable to open %d\n", errno);
+                continue;
+            }
+
+            close(fd2);
+        }
+    }
+    printf("\n");
+    return 0;
+}
+\ No newline at end of file
author	Lunaixsky <lunaixsky@qq.com>
	Sun, 4 May 2025 18:33:51 +0000 (19:33 +0100)
committer	GitHub <noreply@github.com>
	Sun, 4 May 2025 18:33:51 +0000 (19:33 +0100)
lunaix-os/includes/lunaix/ds/mutex.h		patch \| blob \| history
lunaix-os/includes/lunaix/ds/rwlock.h		patch \| blob \| history
lunaix-os/includes/lunaix/ds/spinlock.h		patch \| blob \| history
lunaix-os/includes/lunaix/fs.h		patch \| blob \| history
lunaix-os/kernel/ds/mutex.c		patch \| blob \| history
lunaix-os/kernel/fs/LConfig		patch \| blob \| history
lunaix-os/kernel/fs/ext2/LConfig	[new file with mode: 0644]	patch \| blob
lunaix-os/kernel/fs/ext2/alloc.c		patch \| blob \| history
lunaix-os/kernel/fs/ext2/dir.c		patch \| blob \| history
lunaix-os/kernel/fs/ext2/ext2.h		patch \| blob \| history
lunaix-os/kernel/fs/ext2/file.c		patch \| blob \| history
lunaix-os/kernel/fs/ext2/group.c		patch \| blob \| history
lunaix-os/kernel/fs/ext2/inode.c		patch \| blob \| history
lunaix-os/kernel/fs/ext2/mount.c		patch \| blob \| history
lunaix-os/kernel/fs/mount.c		patch \| blob \| history
lunaix-os/kernel/fs/vfs.c		patch \| blob \| history
lunaix-os/kernel/process/fork.c		patch \| blob \| history
lunaix-os/usr/LBuild		patch \| blob \| history
lunaix-os/usr/fragfile.c	[new file with mode: 0644]	patch \| blob