refactor: mount system reworked

[lunaix-os.git] / lunaix-os / kernel / fs / vfs.c
diff --git a/lunaix-os/kernel/fs/vfs.c b/lunaix-os/kernel/fs/vfs.c

index 958c76197279bccee66e26b08175a86cd3c82c77..e219c52ad6a23597aed9de5f49a258c389a0df6f 100644 (file)
--- a/lunaix-os/kernel/fs/vfs.c
+++ b/lunaix-os/kernel/fs/vfs.c
@@ -16,24 +16,25 @@
  
      It is overseen by Twilight Sparkle ;)
  
  
      It is overseen by Twilight Sparkle ;)
  
- 1. Get inodes hooked into lru
- 2. Get dnodes hooked into lru
+ 1. Get inodes hooked into lru (CHECKED)
+ 2. Get dnodes hooked into lru (CHECKED)
   3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
   3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
- 4. (lru) Add a callback function (or destructor) for eviction.
+ 4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
          [good idea] or a constructor/destructor pattern in cake allocator ?
   5. (mount) Figure out a way to identify a busy mount point before unmount
              maybe a unified mount_point structure that maintain a referencing
              counter on any dnodes within the subtree? Such a counter will only
              increament if a file is opened or a dnode is being used as working
          [good idea] or a constructor/destructor pattern in cake allocator ?
   5. (mount) Figure out a way to identify a busy mount point before unmount
              maybe a unified mount_point structure that maintain a referencing
              counter on any dnodes within the subtree? Such a counter will only
              increament if a file is opened or a dnode is being used as working
-            directory and decreamenting conversely.
+            directory and decreamenting conversely. (CHECKED)
   6. (mount) Ability to track all mount points (including sub-mounts)
   6. (mount) Ability to track all mount points (including sub-mounts)
-            so we can be confident to clean up everything when we unmount.
+            so we can be confident to clean up everything when we
+            unmount. (CHECKED)
   7. (mount) Figure out a way to acquire the device represented by a dnode.
              so it can be used to mount. (e.g. we wish to get `struct device*`
              out of the dnode at /dev/sda)
              [tip] we should pay attention at twifs and add a private_data field
   7. (mount) Figure out a way to acquire the device represented by a dnode.
              so it can be used to mount. (e.g. we wish to get `struct device*`
              out of the dnode at /dev/sda)
              [tip] we should pay attention at twifs and add a private_data field
-            under struct v_dnode?
- 8. (mount) Then, we should refactor on mount/unmount mechanism.
+            under struct v_dnode? (CHECKED)
+ 8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
   9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
                      image file using a so called "loopback" pseudo device. Maybe
                      we can do similar thing in Lunaix? A block device emulation
   9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
                      image file using a so called "loopback" pseudo device. Maybe
                      we can do similar thing in Lunaix? A block device emulation
@@ -56,16 +57,20 @@
  #include <lunaix/fs/twifs.h>
  
  #define PATH_DELIM '/'
  #include <lunaix/fs/twifs.h>
  
  #define PATH_DELIM '/'
-#define HASHTABLE_BITS 10
-#define HASHTABLE_SIZE (1 << HASHTABLE_BITS)
-#define HASH_MASK (HASHTABLE_SIZE - 1)
-#define HASHBITS (32 - HASHTABLE_BITS)
  
  
-#define lock_inode(inode) mutex_lock(&inode->lock)
  #define unlock_inode(inode) mutex_unlock(&inode->lock)
  #define unlock_inode(inode) mutex_unlock(&inode->lock)
+#define lock_inode(inode)                                                      \
+    ({                                                                         \
+        mutex_lock(&inode->lock);                                              \
+        lru_use_one(inode_lru, &inode->lru);                                   \
+    })
  
  
-#define lock_dnode(dnode) mutex_lock(&dnode->lock)
  #define unlock_dnode(dnode) mutex_unlock(&dnode->lock)
  #define unlock_dnode(dnode) mutex_unlock(&dnode->lock)
+#define lock_dnode(dnode)                                                      \
+    ({                                                                         \
+        mutex_lock(&dnode->lock);                                              \
+        lru_use_one(dnode_lru, &dnode->lru);                                   \
+    })
  
  static struct cake_pile* dnode_pile;
  static struct cake_pile* inode_pile;
  
  static struct cake_pile* dnode_pile;
  static struct cake_pile* inode_pile;
@@ -73,8 +78,10 @@ static struct cake_pile* file_pile;
  static struct cake_pile* superblock_pile;
  static struct cake_pile* fd_pile;
  
  static struct cake_pile* superblock_pile;
  static struct cake_pile* fd_pile;
  
-static struct v_superblock* root_sb;
-static struct hbucket *dnode_cache, *inode_cache;
+static struct v_dnode* sysroot;
+static struct hbucket* dnode_cache;
+
+static struct lru_zone *dnode_lru, *inode_lru;
  
  struct hstr vfs_ddot = HSTR("..", 2);
  struct hstr vfs_dot = HSTR(".", 1);
  
  struct hstr vfs_ddot = HSTR("..", 2);
  struct hstr vfs_dot = HSTR(".", 1);
@@ -86,6 +93,12 @@ vfs_sb_alloc();
  void
  vfs_sb_free(struct v_superblock* sb);
  
  void
  vfs_sb_free(struct v_superblock* sb);
  
+static int
+__vfs_try_evict_dnode(struct lru_node* obj);
+
+static int
+__vfs_try_evict_inode(struct lru_node* obj);
+
  void
  vfs_init()
  {
  void
  vfs_init()
  {
@@ -97,16 +110,17 @@ vfs_init()
      superblock_pile =
        cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
  
      superblock_pile =
        cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
  
-    dnode_cache = vzalloc(HASHTABLE_SIZE * sizeof(struct hbucket));
-    inode_cache = vzalloc(HASHTABLE_SIZE * sizeof(struct hbucket));
+    dnode_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
+
+    dnode_lru = lru_new_zone(__vfs_try_evict_dnode);
+    inode_lru = lru_new_zone(__vfs_try_evict_inode);
  
      hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
      hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
  
  
      hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
      hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
  
-    // 创建一个根superblock，用来蕴含我们的根目录。
-    root_sb = vfs_sb_alloc();
-    root_sb->root = vfs_d_alloc();
-    root_sb->root->inode = vfs_i_alloc(root_sb, 0);
+    // 创建一个根dnode。
+    sysroot = vfs_d_alloc(NULL, &vfs_empty);
+    atomic_fetch_add(&sysroot->ref_count, 1);
  }
  
  inline struct hbucket*
  }
  
  inline struct hbucket*
@@ -116,9 +130,9 @@ __dcache_hash(struct v_dnode* parent, uint32_t* hash)
      // 与parent的指针值做加法，来减小碰撞的可能性。
      _hash += (uint32_t)parent;
      // 确保低位更加随机
      // 与parent的指针值做加法，来减小碰撞的可能性。
      _hash += (uint32_t)parent;
      // 确保低位更加随机
-    _hash = _hash ^ (_hash >> HASHBITS);
+    _hash = _hash ^ (_hash >> VFS_HASHBITS);
      *hash = _hash;
      *hash = _hash;
-    return &dnode_cache[_hash & HASH_MASK];
+    return &dnode_cache[_hash & VFS_HASH_MASK];
  }
  
  struct v_dnode*
  }
  
  struct v_dnode*
@@ -150,6 +164,7 @@ vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
      atomic_fetch_add(&dnode->ref_count, 1);
      dnode->parent = parent;
      llist_append(&parent->children, &dnode->siblings);
      atomic_fetch_add(&dnode->ref_count, 1);
      dnode->parent = parent;
      llist_append(&parent->children, &dnode->siblings);
+
      struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
      hlist_add(&bucket->head, &dnode->hash_list);
  }
      struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
      hlist_add(&bucket->head, &dnode->hash_list);
  }
@@ -188,7 +203,7 @@ __vfs_walk(struct v_dnode* start,
          if ((walk_options & VFS_WALK_FSRELATIVE) && start) {
              start = start->super_block->root;
          } else {
          if ((walk_options & VFS_WALK_FSRELATIVE) && start) {
              start = start->super_block->root;
          } else {
-            start = root_sb->root;
+            start = sysroot;
          }
          i++;
      }
          }
          i++;
      }
@@ -200,7 +215,7 @@ __vfs_walk(struct v_dnode* start,
      struct hstr name = HSTR(name_content, 0);
  
      char current = path[i++], lookahead;
      struct hstr name = HSTR(name_content, 0);
  
      char current = path[i++], lookahead;
-    while (current) {
+    while (current && current_level) {
          lookahead = path[i++];
          if (current != PATH_DELIM) {
              if (j >= VFS_NAME_MAXLEN - 1) {
          lookahead = path[i++];
          if (current != PATH_DELIM) {
              if (j >= VFS_NAME_MAXLEN - 1) {
@@ -239,9 +254,12 @@ __vfs_walk(struct v_dnode* start,
          dnode = vfs_dcache_lookup(current_level, &name);
  
          if (!dnode) {
          dnode = vfs_dcache_lookup(current_level, &name);
  
          if (!dnode) {
-            dnode = vfs_d_alloc();
+            dnode = vfs_d_alloc(current_level, &name);
  
  
-            hstrcpy(&dnode->name, &name);
+            if (!dnode) {
+                errno = ENOMEM;
+                goto error;
+            }
  
              lock_inode(current_level->inode);
  
  
              lock_inode(current_level->inode);
  
@@ -257,15 +275,13 @@ __vfs_walk(struct v_dnode* start,
                  }
              }
  
                  }
              }
  
+            vfs_dcache_add(current_level, dnode);
              unlock_inode(current_level->inode);
  
              if (errno) {
                  unlock_dnode(current_level);
              unlock_inode(current_level->inode);
  
              if (errno) {
                  unlock_dnode(current_level);
-                vfree(dnode->name.value);
-                goto error;
+                goto cleanup;
              }
              }
-
-            vfs_dcache_add(current_level, dnode);
          }
  
          unlock_dnode(current_level);
          }
  
          unlock_dnode(current_level);
@@ -279,8 +295,9 @@ __vfs_walk(struct v_dnode* start,
      *dentry = current_level;
      return 0;
  
      *dentry = current_level;
      return 0;
  
-error:
+cleanup:
      vfs_d_free(dnode);
      vfs_d_free(dnode);
+error:
      *dentry = NULL;
      return errno;
  }
      *dentry = NULL;
      return errno;
  }
@@ -299,15 +316,18 @@ vfs_walk(struct v_dnode* start,
      int errno = __vfs_walk(start, path, &interim, component, options);
      int counter = 0;
  
      int errno = __vfs_walk(start, path, &interim, component, options);
      int counter = 0;
  
-    while (!errno) {
+    while (!errno && interim->inode && (options & VFS_WALK_NOFOLLOW)) {
          if (counter >= VFS_MAX_SYMLINK) {
              errno = ELOOP;
              continue;
          }
          if ((interim->inode->itype & VFS_IFSYMLINK) &&
          if (counter >= VFS_MAX_SYMLINK) {
              errno = ELOOP;
              continue;
          }
          if ((interim->inode->itype & VFS_IFSYMLINK) &&
-            !(options & VFS_WALK_NOFOLLOW) &&
              interim->inode->ops.read_symlink) {
              interim->inode->ops.read_symlink) {
+
+            lock_inode(interim->inode);
              errno = interim->inode->ops.read_symlink(interim->inode, &pathname);
              errno = interim->inode->ops.read_symlink(interim->inode, &pathname);
+            unlock_inode(interim->inode);
+
              if (errno) {
                  break;
              }
              if (errno) {
                  break;
              }
@@ -323,86 +343,6 @@ vfs_walk(struct v_dnode* start,
      return errno;
  }
  
      return errno;
  }
  
-int
-vfs_mount(const char* target, const char* fs_name, struct device* device)
-{
-    int errno;
-    struct v_dnode* mnt;
-
-    if (!(errno = vfs_walk(__current->cwd, target, &mnt, NULL, 0))) {
-        errno = vfs_mount_at(fs_name, device, mnt);
-    }
-
-    return errno;
-}
-
-int
-vfs_unmount(const char* target)
-{
-    int errno;
-    struct v_dnode* mnt;
-
-    if (!(errno = vfs_walk(__current->cwd, target, &mnt, NULL, 0))) {
-        errno = vfs_unmount_at(mnt);
-    }
-
-    return errno;
-}
-
-int
-vfs_mount_at(const char* fs_name,
-             struct device* device,
-             struct v_dnode* mnt_point)
-{
-    if (!(mnt_point->inode->itype & VFS_IFDIR)) {
-        return ENOTDIR;
-    }
-
-    struct filesystem* fs = fsm_get(fs_name);
-    if (!fs) {
-        return ENODEV;
-    }
-
-    struct v_superblock* sb = vfs_sb_alloc();
-    sb->dev = device;
-    sb->fs_id = fs->fs_id;
-
-    int errno = 0;
-    if (!(errno = fs->mount(sb, mnt_point))) {
-        sb->fs = fs;
-        sb->root = mnt_point;
-        mnt_point->super_block = sb;
-        llist_append(&root_sb->sb_list, &sb->sb_list);
-    }
-
-    return errno;
-}
-
-int
-vfs_unmount_at(struct v_dnode* mnt_point)
-{
-    // FIXME deal with the detached dcache subtree
-    int errno = 0;
-    struct v_superblock* sb = mnt_point->super_block;
-    if (!sb) {
-        return EINVAL;
-    }
-
-    if (sb->root != mnt_point) {
-        return EINVAL;
-    }
-
-    if (!(errno = sb->fs->unmount(sb))) {
-        struct v_dnode* fs_root = sb->root;
-        vfs_dcache_remove(fs_root);
-
-        llist_delete(&sb->sb_list);
-        vfs_sb_free(sb);
-        vfs_d_free(fs_root);
-    }
-    return errno;
-}
-
  int
  vfs_open(struct v_dnode* dnode, struct v_file** file)
  {
  int
  vfs_open(struct v_dnode* dnode, struct v_file** file)
  {
@@ -411,6 +351,9 @@ vfs_open(struct v_dnode* dnode, struct v_file** file)
      }
  
      struct v_inode* inode = dnode->inode;
      }
  
      struct v_inode* inode = dnode->inode;
+
+    lock_inode(inode);
+
      struct v_file* vfile = cake_grab(file_pile);
      memset(vfile, 0, sizeof(*vfile));
  
      struct v_file* vfile = cake_grab(file_pile);
      memset(vfile, 0, sizeof(*vfile));
  
@@ -432,13 +375,26 @@ vfs_open(struct v_dnode* dnode, struct v_file** file)
      } else {
          atomic_fetch_add(&dnode->ref_count, 1);
          inode->open_count++;
      } else {
          atomic_fetch_add(&dnode->ref_count, 1);
          inode->open_count++;
+        mnt_mkbusy(dnode->mnt);
  
          *file = vfile;
      }
  
  
          *file = vfile;
      }
  
+    unlock_inode(inode);
+
      return errno;
  }
  
      return errno;
  }
  
+void
+vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
+{
+    if (assign_to->inode) {
+        assign_to->inode->link_count--;
+    }
+    assign_to->inode = inode;
+    inode->link_count++;
+}
+
  int
  vfs_link(struct v_dnode* to_link, struct v_dnode* name)
  {
  int
  vfs_link(struct v_dnode* to_link, struct v_dnode* name)
  {
@@ -450,8 +406,7 @@ vfs_link(struct v_dnode* to_link, struct v_dnode* name)
      } else if (!to_link->inode->ops.link) {
          errno = ENOTSUP;
      } else if (!(errno = to_link->inode->ops.link(to_link->inode, name))) {
      } else if (!to_link->inode->ops.link) {
          errno = ENOTSUP;
      } else if (!(errno = to_link->inode->ops.link(to_link->inode, name))) {
-        name->inode = to_link->inode;
-        to_link->inode->link_count++;
+        vfs_assign_inode(name, to_link->inode);
      }
      unlock_inode(to_link->inode);
  
      }
      unlock_inode(to_link->inode);
  
@@ -465,6 +420,7 @@ vfs_close(struct v_file* file)
      if (!file->ops.close || !(errno = file->ops.close(file))) {
          atomic_fetch_sub(&file->dnode->ref_count, 1);
          file->inode->open_count--;
      if (!file->ops.close || !(errno = file->ops.close(file))) {
          atomic_fetch_sub(&file->dnode->ref_count, 1);
          file->inode->open_count--;
+        mnt_chillax(file->dnode->mnt);
  
          pcache_commit_all(file->inode);
          cake_release(file_pile, file);
  
          pcache_commit_all(file->inode);
          cake_release(file_pile, file);
@@ -506,19 +462,53 @@ vfs_sb_alloc()
      struct v_superblock* sb = cake_grab(superblock_pile);
      memset(sb, 0, sizeof(*sb));
      llist_init_head(&sb->sb_list);
      struct v_superblock* sb = cake_grab(superblock_pile);
      memset(sb, 0, sizeof(*sb));
      llist_init_head(&sb->sb_list);
+    sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
      return sb;
  }
  
  void
  vfs_sb_free(struct v_superblock* sb)
  {
      return sb;
  }
  
  void
  vfs_sb_free(struct v_superblock* sb)
  {
+    vfree(sb->i_cache);
      cake_release(superblock_pile, sb);
  }
  
      cake_release(superblock_pile, sb);
  }
  
+static int
+__vfs_try_evict_dnode(struct lru_node* obj)
+{
+    struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
+
+    if (!dnode->ref_count) {
+        vfs_d_free(dnode);
+        return 1;
+    }
+    return 0;
+}
+
+static int
+__vfs_try_evict_inode(struct lru_node* obj)
+{
+    struct v_inode* inode = container_of(obj, struct v_inode, lru);
+
+    if (!inode->link_count && !inode->open_count) {
+        vfs_i_free(inode);
+        return 1;
+    }
+    return 0;
+}
+
  struct v_dnode*
  struct v_dnode*
-vfs_d_alloc()
+vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
  {
      struct v_dnode* dnode = cake_grab(dnode_pile);
  {
      struct v_dnode* dnode = cake_grab(dnode_pile);
+    if (!dnode) {
+        lru_evict_half(dnode_lru);
+
+        if (!(dnode = cake_grab(dnode_pile))) {
+            return NULL;
+        }
+    }
+
      memset(dnode, 0, sizeof(*dnode));
      llist_init_head(&dnode->children);
      llist_init_head(&dnode->siblings);
      memset(dnode, 0, sizeof(*dnode));
      llist_init_head(&dnode->children);
      llist_init_head(&dnode->siblings);
@@ -527,20 +517,28 @@ vfs_d_alloc()
      dnode->ref_count = ATOMIC_VAR_INIT(0);
      dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
  
      dnode->ref_count = ATOMIC_VAR_INIT(0);
      dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
  
+    hstrcpy(&dnode->name, name);
+
+    if (parent) {
+        dnode->super_block = parent->super_block;
+    }
+
+    lru_use_one(dnode_lru, &dnode->lru);
+
      return dnode;
  }
  
  void
  vfs_d_free(struct v_dnode* dnode)
  {
      return dnode;
  }
  
  void
  vfs_d_free(struct v_dnode* dnode)
  {
-    if (dnode->ref_count) {
-        // it can be only freed if no one is refering
-        return;
-    }
-    if (dnode->inode && dnode->inode->link_count) {
+    assert(dnode->ref_count == 1);
+
+    if (dnode->inode) {
+        assert(dnode->inode->link_count > 0);
          dnode->inode->link_count--;
      }
  
          dnode->inode->link_count--;
      }
  
+    vfs_dcache_remove(dnode);
      // Make sure the children de-referencing their parent.
      // With lru presented, the eviction will be propagated over the entire
      // detached subtree eventually
      // Make sure the children de-referencing their parent.
      // With lru presented, the eviction will be propagated over the entire
      // detached subtree eventually
@@ -555,41 +553,52 @@ vfs_d_free(struct v_dnode* dnode)
  }
  
  struct v_inode*
  }
  
  struct v_inode*
-vfs_i_alloc(dev_t device_id, uint32_t inode_id)
+vfs_i_alloc(struct v_superblock* sb,
+            uint32_t inode_id,
+            void (*init)(struct v_inode* inode, void* data),
+            void* data)
  {
  {
-    // 我们这里假设每个文件系统与设备是一一对应（毕竟一个分区不可能有两个不同的文件系统）
-    // 而每个文件系统所产生的 v_inode 缓存必须要和其他文件系统产生的区分开来。
-    // 这也就是说，每个 v_inode 的 id
-    // 必须要由设备ID，和该虚拟inode缓存所对应的物理inode
-    // 相对于其所在的文件系统的id，进行组成！
-    inode_id = hash_32(inode_id ^ device_id, HASH_SIZE_BITS);
-    inode_id = (inode_id >> HASHBITS) ^ inode_id;
-
-    struct hbucket* slot = &inode_cache[inode_id & HASH_MASK];
+    // 每个超级块儿维护一个inode缓存哈希表。
+    // 他们的hash value自然就是inode id了。
+    struct hbucket* slot = &sb->i_cache[inode_id & VFS_HASH_MASK];
      struct v_inode *pos, *n;
      hashtable_bucket_foreach(slot, pos, n, hash_list)
      {
          if (pos->id == inode_id) {
      struct v_inode *pos, *n;
      hashtable_bucket_foreach(slot, pos, n, hash_list)
      {
          if (pos->id == inode_id) {
-            return pos;
+            goto done;
+        }
+    }
+
+    if (!(pos = cake_grab(inode_pile))) {
+        lru_evict_half(inode_lru);
+        if (!(pos = cake_grab(inode_pile))) {
+            return NULL;
          }
      }
  
          }
      }
  
-    pos = cake_grab(inode_pile);
      memset(pos, 0, sizeof(*pos));
  
      pos->id = inode_id;
      memset(pos, 0, sizeof(*pos));
  
      pos->id = inode_id;
-    pos->link_count = 1;
  
      mutex_init(&pos->lock);
  
  
      mutex_init(&pos->lock);
  
+    init(pos, data);
+
      hlist_add(&slot->head, &pos->hash_list);
  
      hlist_add(&slot->head, &pos->hash_list);
  
+done:
+    lru_use_one(inode_lru, &pos->lru);
      return pos;
  }
  
  void
  vfs_i_free(struct v_inode* inode)
  {
      return pos;
  }
  
  void
  vfs_i_free(struct v_inode* inode)
  {
+    if (inode->pg_cache) {
+        pcache_release(inode->pg_cache);
+        vfree(inode->pg_cache);
+    }
+    inode->ops.sync(inode);
      hlist_delete(&inode->hash_list);
      cake_release(inode_pile, inode);
  }
      hlist_delete(&inode->hash_list);
      cake_release(inode_pile, inode);
  }
@@ -632,18 +641,23 @@ __vfs_try_locate_file(const char* path,
      }
  
      struct v_dnode* parent = *fdir;
      }
  
      struct v_dnode* parent = *fdir;
-    struct v_dnode* file_new = vfs_d_alloc();
-    hstrcpy(&file_new->name, &name);
+    struct v_dnode* file_new = vfs_d_alloc(parent, &name);
  
  
-    if (!(errno = parent->inode->ops.create(parent->inode, file_new))) {
-        *file = file_new;
+    if (!file_new) {
+        return ENOMEM;
+    }
+
+    lock_dnode(parent);
  
  
+    if (!(errno = parent->inode->ops.create(parent->inode, file_new))) {
          vfs_dcache_add(parent, file_new);
          vfs_dcache_add(parent, file_new);
-        llist_append(&parent->children, &file_new->siblings);
+        *file = file_new;
      } else {
          vfs_d_free(file_new);
      }
  
      } else {
          vfs_d_free(file_new);
      }
  
+    unlock_dnode(parent);
+
      return errno;
  }
  
      return errno;
  }
  
@@ -782,13 +796,14 @@ __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
      __SYSCALL_INTERRUPTIBLE(
        { errno = file->ops.read(file->inode, buf, count, file->f_pos); })
  
      __SYSCALL_INTERRUPTIBLE(
        { errno = file->ops.read(file->inode, buf, count, file->f_pos); })
  
-    unlock_inode(file->inode);
-
      if (errno > 0) {
          file->f_pos += errno;
      if (errno > 0) {
          file->f_pos += errno;
+        unlock_inode(file->inode);
          return errno;
      }
  
          return errno;
      }
  
+    unlock_inode(file->inode);
+
  done:
      return DO_STATUS(errno);
  }
  done:
      return DO_STATUS(errno);
  }
@@ -814,13 +829,14 @@ __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
      __SYSCALL_INTERRUPTIBLE(
        { errno = file->ops.write(file->inode, buf, count, file->f_pos); })
  
      __SYSCALL_INTERRUPTIBLE(
        { errno = file->ops.write(file->inode, buf, count, file->f_pos); })
  
-    unlock_inode(file->inode);
-
      if (errno > 0) {
          file->f_pos += errno;
      if (errno > 0) {
          file->f_pos += errno;
+        unlock_inode(file->inode);
          return errno;
      }
  
          return errno;
      }
  
+    unlock_inode(file->inode);
+
  done:
      return DO_STATUS(errno);
  }
  done:
      return DO_STATUS(errno);
  }
@@ -988,15 +1004,12 @@ __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
  
      lock_dnode(dnode);
  
  
      lock_dnode(dnode);
  
-    if (dnode->parent)
-        lock_dnode(dnode->parent);
-
      if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
          errno = EROFS;
          goto done;
      }
  
      if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
          errno = EROFS;
          goto done;
      }
  
-    if (dnode->ref_count || dnode->inode->open_count) {
+    if (dnode->ref_count > 1 || dnode->inode->open_count) {
          errno = EBUSY;
          goto done;
      }
          errno = EBUSY;
          goto done;
      }
@@ -1006,39 +1019,52 @@ __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
          goto done;
      }
  
          goto done;
      }
  
-    lock_inode(dnode->inode);
+    struct v_dnode* parent = dnode->parent;
+
+    if (!parent) {
+        errno = EINVAL;
+        goto done;
+    }
+
+    lock_dnode(parent);
+    lock_inode(parent->inode);
  
      if ((dnode->inode->itype & VFS_IFDIR)) {
  
      if ((dnode->inode->itype & VFS_IFDIR)) {
-        errno = dnode->inode->ops.rmdir(dnode->inode);
+        errno = parent->inode->ops.rmdir(parent->inode, dnode);
          if (!errno) {
              vfs_dcache_remove(dnode);
          if (!errno) {
              vfs_dcache_remove(dnode);
-            unlock_inode(dnode->inode);
-            vfs_d_free(dnode);
-
-            goto done;
          }
      } else {
          errno = ENOTDIR;
      }
  
          }
      } else {
          errno = ENOTDIR;
      }
  
-    unlock_inode(dnode->inode);
+    unlock_inode(parent->inode);
+    unlock_dnode(parent);
  
  done:
      unlock_dnode(dnode);
  
  done:
      unlock_dnode(dnode);
-    if (dnode->parent)
-        unlock_dnode(dnode->parent);
      return DO_STATUS(errno);
  }
  
  __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
  {
      return DO_STATUS(errno);
  }
  
  __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
  {
-    struct v_dnode *parent, *dir = vfs_d_alloc();
-    int errno =
-      vfs_walk(__current->cwd, path, &parent, &dir->name, VFS_WALK_PARENT);
-    if (errno) {
+    int errno = 0;
+    struct v_dnode *parent, *dir;
+    char name_value[VFS_NAME_MAXLEN];
+    struct hstr name = HHSTR(name_value, 0, 0);
+
+    if (!dir) {
+        errno = ENOMEM;
+        goto done;
+    }
+
+    if ((errno =
+           vfs_walk(__current->cwd, path, &parent, &name, VFS_WALK_PARENT))) {
          goto done;
      }
  
          goto done;
      }
  
+    dir = vfs_d_alloc(parent, &name);
+
      lock_dnode(parent);
      lock_inode(parent->inode);
  
      lock_dnode(parent);
      lock_inode(parent->inode);
  
@@ -1049,7 +1075,7 @@ __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
      } else if (!(parent->inode->itype & VFS_IFDIR)) {
          errno = ENOTDIR;
      } else if (!(errno = parent->inode->ops.mkdir(parent->inode, dir))) {
      } else if (!(parent->inode->itype & VFS_IFDIR)) {
          errno = ENOTDIR;
      } else if (!(errno = parent->inode->ops.mkdir(parent->inode, dir))) {
-        llist_append(&parent->children, &dir->siblings);
+        vfs_dcache_add(parent, dir);
          goto cleanup;
      }
  
          goto cleanup;
      }
  
@@ -1081,8 +1107,6 @@ __vfs_do_unlink(struct v_dnode* dnode)
          //  symlink case
          errno = inode->ops.unlink(inode);
          if (!errno) {
          //  symlink case
          errno = inode->ops.unlink(inode);
          if (!errno) {
-            inode->link_count--;
-            vfs_dcache_remove(dnode);
              vfs_d_free(dnode);
          }
      } else {
              vfs_d_free(dnode);
          }
      } else {
@@ -1244,14 +1268,14 @@ __DEFINE_LXSYSCALL2(int,
          errno = EROFS;
          goto done;
      }
          errno = EROFS;
          goto done;
      }
-    if (!dnode->inode->ops.symlink) {
+    if (!dnode->inode->ops.set_symlink) {
          errno = ENOTSUP;
          goto done;
      }
  
      lock_inode(dnode->inode);
  
          errno = ENOTSUP;
          goto done;
      }
  
      lock_inode(dnode->inode);
  
-    errno = dnode->inode->ops.symlink(dnode->inode, link_target);
+    errno = dnode->inode->ops.set_symlink(dnode->inode, link_target);
  
      unlock_inode(dnode->inode);
  
  
      unlock_inode(dnode->inode);
  
@@ -1272,10 +1296,12 @@ __vfs_do_chdir(struct v_dnode* dnode)
      }
  
      if (__current->cwd) {
      }
  
      if (__current->cwd) {
-        atomic_fetch_add(&__current->cwd->ref_count, 1);
+        atomic_fetch_sub(&__current->cwd->ref_count, 1);
+        mnt_chillax(__current->cwd->mnt);
      }
  
      }
  
-    atomic_fetch_sub(&dnode->ref_count, 1);
+    atomic_fetch_add(&dnode->ref_count, 1);
+    mnt_mkbusy(dnode->mnt);
      __current->cwd = dnode;
  
      unlock_dnode(dnode);
      __current->cwd = dnode;
  
      unlock_dnode(dnode);
@@ -1389,7 +1415,7 @@ vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
      vfs_dcache_rehash(newparent, current);
  
      // detach target
      vfs_dcache_rehash(newparent, current);
  
      // detach target
-    vfs_dcache_remove(target);
+    vfs_d_free(target);
  
      unlock_dnode(target);
  
  
      unlock_dnode(target);
  
@@ -1420,16 +1446,19 @@ __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
  
      errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
      if (errno == ENOENT) {
  
      errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
      if (errno == ENOENT) {
-        target = vfs_d_alloc();
-        hstrcpy(&target->name, &name);
+        target = vfs_d_alloc(target_parent, &name);
+        vfs_dcache_add(target_parent, target);
      } else if (errno) {
          goto done;
      }
  
      } else if (errno) {
          goto done;
      }
  
-    if (!(errno = vfs_do_rename(cur, target))) {
-        vfs_d_free(target);
+    if (!target) {
+        errno = ENOMEM;
+        goto done;
      }
  
      }
  
+    errno = vfs_do_rename(cur, target);
+
  done:
      vfree(name.value);
      return DO_STATUS(errno);
  done:
      vfree(name.value);
      return DO_STATUS(errno);
@@ -1455,19 +1484,19 @@ __DEFINE_LXSYSCALL3(int,
          goto done;
      }
  
          goto done;
      }
  
-    if (!(dev->inode->itype & VFS_IFVOLDEV)) {
-        errno = ENOTDEV;
-        goto done;
-    }
-
      if (mnt->ref_count > 1) {
          errno = EBUSY;
          goto done;
      }
  
      if (mnt->ref_count > 1) {
          errno = EBUSY;
          goto done;
      }
  
-    // FIXME should not touch the underlying fs!
-    struct device* device =
-      (struct device*)((struct twifs_node*)dev->inode->data)->data;
+    // By our convention.
+    // XXX could we do better?
+    struct device* device = (struct device*)dev->data;
+
+    if (!(dev->inode->itype & VFS_IFVOLDEV) || !device) {
+        errno = ENOTDEV;
+        goto done;
+    }
  
      errno = vfs_mount_at(fstype, device, mnt);
  
  
      errno = vfs_mount_at(fstype, device, mnt);