lunaix-os/kernel/fs/vfs.c

   1 /**
   2  * @file vfs.c
   3  * @author Lunaixsky (zelong56@gmail.com)
   4  * @brief Lunaix virtual file system - an abstraction layer for all file system.
   5  * @version 0.1
   6  * @date 2022-07-24
   7  *
   8  * @copyright Copyright (c) 2022
   9  *
  10  */
  11
  12 // Welcome to The Mountain O'Shit! :)
  13
  14 /*
  15  TODO vfs & device todos checklist
  16
  17     It is overseen by Twilight Sparkle ;)
  18
  19  1. Get inodes hooked into lru (CHECKED)
  20  2. Get dnodes hooked into lru (CHECKED)
  21  3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
  22  4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
  23         [good idea] or a constructor/destructor pattern in cake allocator ?
  24  5. (mount) Figure out a way to identify a busy mount point before unmount
  25             maybe a unified mount_point structure that maintain a referencing
  26             counter on any dnodes within the subtree? Such a counter will only
  27             increament if a file is opened or a dnode is being used as working
  28             directory and decreamenting conversely. (CHECKED)
  29  6. (mount) Ability to track all mount points (including sub-mounts)
  30             so we can be confident to clean up everything when we
  31             unmount. (CHECKED)
  32  7. (mount) Figure out a way to acquire the device represented by a dnode.
  33             so it can be used to mount. (e.g. we wish to get `struct device*`
  34             out of the dnode at /dev/sda)
  35             [tip] we should pay attention at twifs and add a private_data field
  36             under struct v_dnode? (CHECKED)
  37  8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
  38  9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
  39                     image file using a so called "loopback" pseudo device. Maybe
  40                     we can do similar thing in Lunaix? A block device emulation
  41                     above the regular file when we mount it on.
  42  10. (device) device number (dev_t) allocation
  43             [good idea] <class>:<subclass>:<uniq_id> composition (CHECKED)
  44 */
  45
  46 #include <klibc/string.h>
  47 #include <lunaix/foptions.h>
  48 #include <lunaix/fs.h>
  49 #include <lunaix/mm/cake.h>
  50 #include <lunaix/mm/valloc.h>
  51 #include <lunaix/process.h>
  52 #include <lunaix/spike.h>
  53 #include <lunaix/syscall.h>
  54 #include <lunaix/syscall_utils.h>
  55
  56 #include <lunaix/fs/twifs.h>
  57
  58 #include <usr/lunaix/dirent_defs.h>
  59
  60 static struct cake_pile* dnode_pile;
  61 static struct cake_pile* inode_pile;
  62 static struct cake_pile* file_pile;
  63 static struct cake_pile* superblock_pile;
  64 static struct cake_pile* fd_pile;
  65
  66 struct v_dnode* vfs_sysroot;
  67 static struct hbucket* dnode_cache;
  68
  69 struct lru_zone *dnode_lru, *inode_lru;
  70
  71 struct hstr vfs_ddot = HSTR("..", 2);
  72 struct hstr vfs_dot = HSTR(".", 1);
  73 struct hstr vfs_empty = HSTR("", 0);
  74
  75 static int
  76 __vfs_try_evict_dnode(struct lru_node* obj);
  77
  78 static int
  79 __vfs_try_evict_inode(struct lru_node* obj);
  80
  81 void
  82 vfs_init()
  83 {
  84     // 为他们专门创建一个蛋糕堆，而不使用valloc，这样我们可以最小化内碎片的产生
  85     dnode_pile = cake_new_pile("dnode_cache", sizeof(struct v_dnode), 1, 0);
  86     inode_pile = cake_new_pile("inode_cache", sizeof(struct v_inode), 1, 0);
  87     file_pile = cake_new_pile("file_cache", sizeof(struct v_file), 1, 0);
  88     fd_pile = cake_new_pile("fd_cache", sizeof(struct v_fd), 1, 0);
  89     superblock_pile =
  90       cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
  91
  92     dnode_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
  93
  94     dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
  95     inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
  96
  97     hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
  98     hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
  99
 100     // 创建一个根dnode。
 101     vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty);
 102     vfs_sysroot->parent = vfs_sysroot;
 103     atomic_fetch_add(&vfs_sysroot->ref_count, 1);
 104 }
 105
 106 static inline struct hbucket*
 107 __dcache_hash(struct v_dnode* parent, u32_t* hash)
 108 {
 109     u32_t _hash = *hash;
 110     // 确保低位更加随机
 111     _hash = _hash ^ (_hash >> VFS_HASHBITS);
 112     // 与parent的指针值做加法，来减小碰撞的可能性。
 113     _hash += (u32_t)__ptr(parent);
 114     *hash = _hash;
 115     return &dnode_cache[_hash & VFS_HASH_MASK];
 116 }
 117
 118 static inline int
 119 __sync_inode_nolock(struct v_inode* inode)
 120 {
 121     pcache_commit_all(inode);
 122
 123     int errno = ENOTSUP;
 124     if (inode->ops->sync) {
 125         errno = inode->ops->sync(inode);
 126     }
 127
 128     return errno;
 129 }
 130
 131 struct v_dnode*
 132 vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
 133 {
 134     if (!str->len || HSTR_EQ(str, &vfs_dot))
 135         return parent;
 136
 137     if (HSTR_EQ(str, &vfs_ddot)) {
 138         return parent->parent;
 139     }
 140
 141     u32_t hash = str->hash;
 142     struct hbucket* slot = __dcache_hash(parent, &hash);
 143
 144     struct v_dnode *pos, *n;
 145     hashtable_bucket_foreach(slot, pos, n, hash_list)
 146     {
 147         if (pos->name.hash == hash && pos->parent == parent) {
 148             return pos;
 149         }
 150     }
 151     return NULL;
 152 }
 153
 154 void
 155 vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
 156 {
 157     assert(parent);
 158
 159     atomic_fetch_add(&dnode->ref_count, 1);
 160     dnode->parent = parent;
 161     llist_append(&parent->children, &dnode->siblings);
 162
 163     struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
 164     hlist_add(&bucket->head, &dnode->hash_list);
 165 }
 166
 167 void
 168 vfs_dcache_remove(struct v_dnode* dnode)
 169 {
 170     assert(dnode);
 171     assert(dnode->ref_count == 1);
 172
 173     llist_delete(&dnode->siblings);
 174     llist_delete(&dnode->aka_list);
 175     hlist_delete(&dnode->hash_list);
 176
 177     dnode->parent = NULL;
 178     atomic_fetch_sub(&dnode->ref_count, 1);
 179 }
 180
 181 void
 182 vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
 183 {
 184     assert(new_parent);
 185
 186     hstr_rehash(&dnode->name, HSTR_FULL_HASH);
 187     vfs_dcache_remove(dnode);
 188     vfs_dcache_add(new_parent, dnode);
 189 }
 190
 191 int
 192 vfs_open(struct v_dnode* dnode, struct v_file** file)
 193 {
 194     if (!dnode->inode || !dnode->inode->ops->open) {
 195         return ENOTSUP;
 196     }
 197
 198     struct v_inode* inode = dnode->inode;
 199
 200     lock_inode(inode);
 201
 202     struct v_file* vfile = cake_grab(file_pile);
 203     memset(vfile, 0, sizeof(*vfile));
 204
 205     vfile->dnode = dnode;
 206     vfile->inode = inode;
 207     vfile->ref_count = ATOMIC_VAR_INIT(1);
 208     vfile->ops = inode->default_fops;
 209
 210     if (check_file_node(inode) && !inode->pg_cache) {
 211         struct pcache* pcache = vzalloc(sizeof(struct pcache));
 212         pcache_init(pcache);
 213         pcache->master = inode;
 214         inode->pg_cache = pcache;
 215     }
 216
 217     int errno = inode->ops->open(inode, vfile);
 218     if (errno) {
 219         cake_release(file_pile, vfile);
 220     } else {
 221         atomic_fetch_add(&dnode->ref_count, 1);
 222         inode->open_count++;
 223         mnt_mkbusy(dnode->mnt);
 224
 225         *file = vfile;
 226     }
 227
 228     unlock_inode(inode);
 229
 230     return errno;
 231 }
 232
 233 void
 234 vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
 235 {
 236     if (assign_to->inode) {
 237         llist_delete(&assign_to->aka_list);
 238         assign_to->inode->link_count--;
 239     }
 240
 241     llist_append(&inode->aka_dnodes, &assign_to->aka_list);
 242     assign_to->inode = inode;
 243     inode->link_count++;
 244 }
 245
 246 int
 247 vfs_link(struct v_dnode* to_link, struct v_dnode* name)
 248 {
 249     int errno;
 250
 251     if ((errno = vfs_check_writable(to_link))) {
 252         return errno;
 253     }
 254
 255     lock_inode(to_link->inode);
 256     if (to_link->super_block->root != name->super_block->root) {
 257         errno = EXDEV;
 258     } else if (!to_link->inode->ops->link) {
 259         errno = ENOTSUP;
 260     } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
 261         vfs_assign_inode(name, to_link->inode);
 262     }
 263     unlock_inode(to_link->inode);
 264
 265     return errno;
 266 }
 267
 268 int
 269 vfs_pclose(struct v_file* file, pid_t pid)
 270 {
 271     struct v_inode* inode;
 272     int errno = 0;
 273
 274     inode = file->inode;
 275
 276     /*
 277      * Prevent dead lock.
 278      * This happened when process is terminated while blocking on read.
 279      * In that case, the process is still holding the inode lock and it
 280          will never get released.
 281      * The unlocking should also include ownership check.
 282      *
 283      * To see why, consider two process both open the same file both with
 284      * fd=x.
 285      *      Process A: busy on reading x
 286      *      Process B: do nothing with x
 287      * Assuming that, after a very short time, process B get terminated
 288      * while process A is still busy in it's reading business. By this
 289      * design, the inode lock of this file x is get released by B rather
 290      * than A. And this will cause a probable race condition on A if other
 291      * process is writing to this file later after B exit.
 292     */
 293
 294     mutex_unlock_for(&inode->lock, pid);
 295
 296     if (file->ref_count > 1) {
 297         atomic_fetch_sub(&file->ref_count, 1);
 298         return 0;
 299     }
 300
 301     if ((errno = file->ops->close(file))) {
 302         goto done;
 303     }
 304
 305     atomic_fetch_sub(&file->dnode->ref_count, 1);
 306     mnt_chillax(file->dnode->mnt);
 307     cake_release(file_pile, file);
 308
 309     /*
 310         if the current inode is not being locked by other
 311         threads that does not share same open context,
 312         then we can try to do sync opportunistically
 313     */
 314     if (mutex_on_hold(&inode->lock)) {
 315         goto done;
 316     }
 317
 318     lock_inode(inode);
 319
 320     pcache_commit_all(inode);
 321     inode->open_count--;
 322
 323     if (!inode->open_count) {
 324         __sync_inode_nolock(inode);
 325     }
 326
 327     unlock_inode(inode);
 328
 329 done:
 330     return errno;
 331 }
 332
 333 int
 334 vfs_close(struct v_file* file)
 335 {
 336     return vfs_pclose(file, __current->pid);
 337 }
 338
 339 void
 340 vfs_free_fd(struct v_fd* fd)
 341 {
 342     cake_release(fd_pile, fd);
 343 }
 344
 345 int
 346 vfs_isync(struct v_inode* inode)
 347 {
 348     lock_inode(inode);
 349
 350     int errno = __sync_inode_nolock(inode);
 351
 352     unlock_inode(inode);
 353
 354     return errno;
 355 }
 356
 357 int
 358 vfs_fsync(struct v_file* file)
 359 {
 360     int errno;
 361     if ((errno = vfs_check_writable(file->dnode))) {
 362         return errno;
 363     }
 364
 365     return vfs_isync(file->inode);
 366 }
 367
 368 int
 369 vfs_alloc_fdslot(int* fd)
 370 {
 371     for (size_t i = 0; i < VFS_MAX_FD; i++) {
 372         if (!__current->fdtable->fds[i]) {
 373             *fd = i;
 374             return 0;
 375         }
 376     }
 377     return EMFILE;
 378 }
 379
 380 struct v_superblock*
 381 vfs_sb_alloc()
 382 {
 383     struct v_superblock* sb = cake_grab(superblock_pile);
 384     memset(sb, 0, sizeof(*sb));
 385     llist_init_head(&sb->sb_list);
 386     sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
 387     sb->ref_count = 1;
 388     return sb;
 389 }
 390
 391 void
 392 vfs_sb_ref(struct v_superblock* sb)
 393 {
 394     sb->ref_count++;
 395 }
 396
 397 void
 398 vfs_sb_free(struct v_superblock* sb)
 399 {
 400     assert(sb->ref_count);
 401
 402     sb->ref_count--;
 403     if (sb->ref_count) {
 404         return;
 405     }
 406
 407     if (sb->ops.release) {
 408         sb->ops.release(sb);
 409     }
 410
 411     vfree(sb->i_cache);
 412     cake_release(superblock_pile, sb);
 413 }
 414
 415 static int
 416 __vfs_try_evict_dnode(struct lru_node* obj)
 417 {
 418     struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
 419
 420     if (!dnode->ref_count) {
 421         vfs_d_free(dnode);
 422         return 1;
 423     }
 424     return 0;
 425 }
 426
 427 static int
 428 __vfs_try_evict_inode(struct lru_node* obj)
 429 {
 430     struct v_inode* inode = container_of(obj, struct v_inode, lru);
 431
 432     if (!inode->link_count && !inode->open_count) {
 433         vfs_i_free(inode);
 434         return 1;
 435     }
 436     return 0;
 437 }
 438
 439 struct v_dnode*
 440 vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
 441 {
 442     struct v_dnode* dnode = cake_grab(dnode_pile);
 443     if (!dnode) {
 444         lru_evict_half(dnode_lru);
 445
 446         if (!(dnode = cake_grab(dnode_pile))) {
 447             return NULL;
 448         }
 449     }
 450
 451     memset(dnode, 0, sizeof(*dnode));
 452     llist_init_head(&dnode->children);
 453     llist_init_head(&dnode->siblings);
 454     llist_init_head(&dnode->aka_list);
 455     mutex_init(&dnode->lock);
 456
 457     dnode->ref_count = ATOMIC_VAR_INIT(0);
 458     dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
 459
 460     hstrcpy(&dnode->name, name);
 461
 462     if (parent) {
 463         vfs_d_assign_sb(dnode, parent->super_block);
 464         dnode->mnt = parent->mnt;
 465     }
 466
 467     lru_use_one(dnode_lru, &dnode->lru);
 468
 469     return dnode;
 470 }
 471
 472 void
 473 vfs_d_free(struct v_dnode* dnode)
 474 {
 475     assert(dnode->ref_count == 1);
 476
 477     if (dnode->inode) {
 478         assert(dnode->inode->link_count > 0);
 479         dnode->inode->link_count--;
 480     }
 481
 482     vfs_dcache_remove(dnode);
 483     // Make sure the children de-referencing their parent.
 484     // With lru presented, the eviction will be propagated over the entire
 485     // detached subtree eventually
 486     struct v_dnode *pos, *n;
 487     llist_for_each(pos, n, &dnode->children, siblings)
 488     {
 489         vfs_dcache_remove(pos);
 490     }
 491
 492     if (dnode->destruct) {
 493         dnode->destruct(dnode);
 494     }
 495
 496     vfs_sb_free(dnode->super_block);
 497     vfree((void*)dnode->name.value);
 498     cake_release(dnode_pile, dnode);
 499 }
 500
 501 struct v_inode*
 502 vfs_i_find(struct v_superblock* sb, u32_t i_id)
 503 {
 504     struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
 505     struct v_inode *pos, *n;
 506     hashtable_bucket_foreach(slot, pos, n, hash_list)
 507     {
 508         if (pos->id == i_id) {
 509             lru_use_one(inode_lru, &pos->lru);
 510             return pos;
 511         }
 512     }
 513
 514     return NULL;
 515 }
 516
 517 void
 518 vfs_i_addhash(struct v_inode* inode)
 519 {
 520     struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
 521
 522     hlist_delete(&inode->hash_list);
 523     hlist_add(&slot->head, &inode->hash_list);
 524 }
 525
 526 struct v_inode*
 527 vfs_i_alloc(struct v_superblock* sb)
 528 {
 529     assert(sb->ops.init_inode);
 530
 531     struct v_inode* inode;
 532     if (!(inode = cake_grab(inode_pile))) {
 533         lru_evict_half(inode_lru);
 534         if (!(inode = cake_grab(inode_pile))) {
 535             return NULL;
 536         }
 537     }
 538
 539     memset(inode, 0, sizeof(*inode));
 540     mutex_init(&inode->lock);
 541     llist_init_head(&inode->xattrs);
 542     llist_init_head(&inode->aka_dnodes);
 543
 544     sb->ops.init_inode(sb, inode);
 545
 546     inode->ctime = clock_unixtime();
 547     inode->atime = inode->ctime;
 548     inode->mtime = inode->ctime;
 549
 550     vfs_i_assign_sb(inode, sb);
 551     lru_use_one(inode_lru, &inode->lru);
 552     return inode;
 553 }
 554
 555 void
 556 vfs_i_free(struct v_inode* inode)
 557 {
 558     if (inode->pg_cache) {
 559         pcache_release(inode->pg_cache);
 560         vfree(inode->pg_cache);
 561     }
 562     // we don't need to sync inode.
 563     // If an inode can be free, then it must be properly closed.
 564     // Hence it must be synced already!
 565     if (inode->destruct) {
 566         inode->destruct(inode);
 567     }
 568
 569     vfs_sb_free(inode->sb);
 570     hlist_delete(&inode->hash_list);
 571     cake_release(inode_pile, inode);
 572 }
 573
 574 /* ---- System call definition and support ---- */
 575
 576 // make a new name when not exists
 577 #define FLOC_MAYBE_MKNAME 1
 578
 579 // name must be non-exist and made.
 580 #define FLOC_MKNAME 2
 581
 582 // no follow symlink
 583 #define FLOC_NOFOLLOW 4
 584
 585 int
 586 vfs_getfd(int fd, struct v_fd** fd_s)
 587 {
 588     if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
 589         return 0;
 590     }
 591     return EBADF;
 592 }
 593
 594 static int
 595 __vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
 596             unsigned int itype, dev_t* dev)
 597 {
 598     int errno;
 599
 600     errno = parent->ops->create(parent, dnode, itype);
 601     if (errno) {
 602         return errno;
 603     }
 604
 605     return 0;
 606 }
 607
 608 struct file_locator {
 609     struct v_dnode* dir;
 610     struct v_dnode* file;
 611     bool fresh;
 612 };
 613
 614 /**
 615  * @brief unlock the file locator (floc) if possible.
 616  *        If the file to be located if not exists, and
 617  *        any FLOC_*MKNAME flag is set, then the parent
 618  *        dnode will be locked until the file has been properly
 619  *        finalised by subsequent logic.
 620  *
 621  * @param floc
 622  */
 623 static inline void
 624 __floc_try_unlock(struct file_locator* floc)
 625 {
 626     if (floc->fresh) {
 627         assert(floc->dir);
 628         unlock_dnode(floc->dir);
 629     }
 630 }
 631
 632 static int
 633 __vfs_try_locate_file(const char* path,
 634                       struct file_locator* floc,
 635                       int options)
 636 {
 637     char name_str[VFS_NAME_MAXLEN];
 638     struct v_dnode *fdir, *file;
 639     struct hstr name = HSTR(name_str, 0);
 640     int errno, woption = 0;
 641
 642     if ((options & FLOC_NOFOLLOW)) {
 643         woption |= VFS_WALK_NOFOLLOW;
 644         options &= ~FLOC_NOFOLLOW;
 645     }
 646
 647     floc->fresh = false;
 648     name_str[0] = 0;
 649     errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
 650     if (errno) {
 651         return errno;
 652     }
 653
 654     errno = vfs_walk(fdir, name.value, &file, NULL, woption);
 655
 656     if (errno && errno != ENOENT) {
 657         goto done;
 658     }
 659
 660     if (!errno) {
 661         if ((options & FLOC_MKNAME)) {
 662             errno = EEXIST;
 663         }
 664         goto done;
 665     }
 666
 667     // errno == ENOENT
 668     if (!options) {
 669         goto done;
 670     }
 671
 672     errno = vfs_check_writable(fdir);
 673     if (errno) {
 674         goto done;
 675     }
 676
 677     floc->fresh = true;
 678
 679     file = vfs_d_alloc(fdir, &name);
 680
 681     if (!file) {
 682         return ENOMEM;
 683     }
 684
 685     lock_dnode(fdir);
 686
 687     vfs_dcache_add(fdir, file);
 688
 689 done:
 690     floc->dir   = fdir;
 691     floc->file  = file;
 692
 693     return errno;
 694 }
 695
 696 int
 697 vfs_do_open(const char* path, int options)
 698 {
 699     int errno, fd, loptions = 0;
 700     struct v_dnode *dentry, *file;
 701     struct v_file* ofile = NULL;
 702     struct file_locator floc;
 703     struct v_inode* inode;
 704
 705     if ((options & FO_CREATE)) {
 706         loptions |= FLOC_MAYBE_MKNAME;
 707     } else if ((options & FO_NOFOLLOW)) {
 708         loptions |= FLOC_NOFOLLOW;
 709     }
 710
 711     errno = __vfs_try_locate_file(path, &floc, loptions);
 712
 713     if (errno || (errno = vfs_alloc_fdslot(&fd))) {
 714         return errno;
 715     }
 716
 717     file   = floc.file;
 718     dentry = floc.dir;
 719
 720     if (floc.fresh) {
 721         errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
 722         if (errno) {
 723             vfs_d_free(file);
 724             __floc_try_unlock(&floc);
 725             return errno;
 726         }
 727
 728         __floc_try_unlock(&floc);
 729     }
 730
 731
 732     if ((errno = vfs_open(file, &ofile))) {
 733         return errno;
 734     }
 735
 736     inode = ofile->inode;
 737     lock_inode(inode);
 738
 739     struct v_fd* fd_s = cake_grab(fd_pile);
 740     memset(fd_s, 0, sizeof(*fd_s));
 741
 742     if ((options & O_TRUNC)) {
 743         file->inode->fsize = 0;
 744     }
 745
 746     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 747         ofile->f_pos = 0;
 748     }
 749
 750     fd_s->file = ofile;
 751     fd_s->flags = options;
 752     __current->fdtable->fds[fd] = fd_s;
 753
 754     unlock_inode(inode);
 755
 756     return fd;
 757 }
 758
 759 __DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
 760 {
 761     int errno = vfs_do_open(path, options);
 762     return DO_STATUS_OR_RETURN(errno);
 763 }
 764
 765 __DEFINE_LXSYSCALL1(int, close, int, fd)
 766 {
 767     struct v_fd* fd_s;
 768     int errno = 0;
 769     if ((errno = vfs_getfd(fd, &fd_s))) {
 770         goto done_err;
 771     }
 772
 773     if ((errno = vfs_close(fd_s->file))) {
 774         goto done_err;
 775     }
 776
 777     cake_release(fd_pile, fd_s);
 778     __current->fdtable->fds[fd] = 0;
 779
 780 done_err:
 781     return DO_STATUS(errno);
 782 }
 783
 784 void
 785 __vfs_readdir_callback(struct dir_context* dctx,
 786                        const char* name,
 787                        const int len,
 788                        const int dtype)
 789 {
 790     struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
 791     strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
 792     dent->d_nlen = len;
 793     dent->d_type = dtype;
 794 }
 795
 796 __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
 797 {
 798     struct v_fd* fd_s;
 799     int errno;
 800
 801     if ((errno = vfs_getfd(fd, &fd_s))) {
 802         goto done;
 803     }
 804
 805     struct v_inode* inode = fd_s->file->inode;
 806
 807     lock_inode(inode);
 808
 809     if (!check_directory_node(inode)) {
 810         errno = ENOTDIR;
 811         goto unlock;
 812     }
 813
 814     struct dir_context dctx = (struct dir_context) {
 815         .cb_data = dent,
 816         .read_complete_callback = __vfs_readdir_callback
 817     };
 818
 819     if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
 820         goto unlock;
 821     }
 822     dent->d_offset++;
 823     fd_s->file->f_pos++;
 824
 825 unlock:
 826     unlock_inode(inode);
 827
 828 done:
 829     return DO_STATUS_OR_RETURN(errno);
 830 }
 831
 832 __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
 833 {
 834     int errno = 0;
 835     struct v_fd* fd_s;
 836     if ((errno = vfs_getfd(fd, &fd_s))) {
 837         goto done;
 838     }
 839
 840     struct v_file* file = fd_s->file;
 841     if (check_directory_node(file->inode)) {
 842         errno = EISDIR;
 843         goto done;
 844     }
 845
 846     lock_inode(file->inode);
 847
 848     file->inode->atime = clock_unixtime();
 849
 850     if (check_seqdev_node(file->inode) || (fd_s->flags & FO_DIRECT)) {
 851         errno = file->ops->read(file->inode, buf, count, file->f_pos);
 852     } else {
 853         errno = pcache_read(file->inode, buf, count, file->f_pos);
 854     }
 855
 856     if (errno > 0) {
 857         file->f_pos += errno;
 858         unlock_inode(file->inode);
 859         return errno;
 860     }
 861
 862     unlock_inode(file->inode);
 863
 864 done:
 865     return DO_STATUS(errno);
 866 }
 867
 868 __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
 869 {
 870     int errno = 0;
 871     struct v_fd* fd_s;
 872     if ((errno = vfs_getfd(fd, &fd_s))) {
 873         goto done;
 874     }
 875
 876     struct v_inode* inode;
 877     struct v_file* file = fd_s->file;
 878
 879     if ((errno = vfs_check_writable(file->dnode))) {
 880         goto done;
 881     }
 882
 883     if (check_directory_node(file->inode)) {
 884         errno = EISDIR;
 885         goto done;
 886     }
 887
 888     inode = file->inode;
 889     lock_inode(inode);
 890
 891     inode->mtime = clock_unixtime();
 892     if ((fd_s->flags & O_APPEND)) {
 893         file->f_pos = inode->fsize;
 894     }
 895
 896     if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
 897         errno = file->ops->write(inode, buf, count, file->f_pos);
 898     } else {
 899         errno = pcache_write(inode, buf, count, file->f_pos);
 900     }
 901
 902     if (errno > 0) {
 903         file->f_pos += errno;
 904         inode->fsize = MAX(inode->fsize, file->f_pos);
 905
 906         unlock_inode(inode);
 907         return errno;
 908     }
 909
 910     unlock_inode(inode);
 911
 912 done:
 913     return DO_STATUS(errno);
 914 }
 915
 916 __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options)
 917 {
 918     int errno = 0;
 919     struct v_fd* fd_s;
 920     if ((errno = vfs_getfd(fd, &fd_s))) {
 921         goto done;
 922     }
 923
 924     struct v_file* file = fd_s->file;
 925     struct v_inode* inode = file->inode;
 926
 927     if (!file->ops->seek) {
 928         errno = ENOTSUP;
 929         goto done;
 930     }
 931
 932     lock_inode(inode);
 933
 934     int overflow = 0;
 935     int fpos = file->f_pos;
 936
 937     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 938         options = (options != FSEEK_END) ? options : FSEEK_SET;
 939     }
 940
 941     switch (options) {
 942         case FSEEK_CUR:
 943             overflow = sadd_of((int)file->f_pos, offset, &fpos);
 944             break;
 945         case FSEEK_END:
 946             overflow = sadd_of((int)inode->fsize, offset, &fpos);
 947             break;
 948         case FSEEK_SET:
 949             fpos = offset;
 950             break;
 951     }
 952
 953     if (overflow) {
 954         errno = EOVERFLOW;
 955     }
 956     else {
 957         errno = file->ops->seek(file, fpos);
 958     }
 959
 960     unlock_inode(inode);
 961
 962 done:
 963     return DO_STATUS(errno);
 964 }
 965
 966 int
 967 vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth)
 968 {
 969     if (!dnode) {
 970         return 0;
 971     }
 972
 973     if (depth > 64) {
 974         return ENAMETOOLONG;
 975     }
 976
 977     size_t len = 0;
 978
 979     if (dnode->parent != dnode) {
 980         len = vfs_get_path(dnode->parent, buf, size, depth + 1);
 981     }
 982
 983     if (len >= size) {
 984         return len;
 985     }
 986
 987     if (!len || buf[len - 1] != VFS_PATH_DELIM) {
 988         buf[len++] = VFS_PATH_DELIM;
 989     }
 990
 991     size_t cpy_size = MIN(dnode->name.len, size - len);
 992     strncpy(buf + len, dnode->name.value, cpy_size);
 993     len += cpy_size;
 994
 995     return len;
 996 }
 997
 998 int
 999 vfs_readlink(struct v_dnode* dnode, char* buf, size_t size)
1000 {
1001     const char* link;
1002     struct v_inode* inode = dnode->inode;
1003
1004     if (!check_symlink_node(inode)) {
1005         return EINVAL;
1006     }
1007
1008     if (!inode->ops->read_symlink) {
1009         return ENOTSUP;
1010     }
1011
1012     lock_inode(inode);
1013
1014     int errno = inode->ops->read_symlink(inode, &link);
1015     if (errno >= 0) {
1016         strncpy(buf, link, MIN(size, (size_t)errno));
1017     }
1018
1019     unlock_inode(inode);
1020     return errno;
1021 }
1022
1023 int
1024 vfs_get_dtype(int itype)
1025 {
1026     int dtype = DT_FILE;
1027     if (check_itype(itype, VFS_IFSYMLINK)) {
1028         dtype |= DT_SYMLINK;
1029     }
1030
1031     if (check_itype(itype, VFS_IFDIR)) {
1032         dtype |= DT_DIR;
1033         return dtype;
1034     }
1035
1036     // TODO other types
1037
1038     return dtype;
1039 }
1040
1041 __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
1042 {
1043     int errno;
1044     struct v_fd* fd_s;
1045     if ((errno = vfs_getfd(fd, &fd_s))) {
1046         goto done;
1047     }
1048
1049     struct v_dnode* dnode;
1050     errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
1051
1052     if (errno >= 0) {
1053         return errno;
1054     }
1055
1056 done:
1057     return DO_STATUS(errno);
1058 }
1059
1060 __DEFINE_LXSYSCALL3(int, readlink, const char*, path, char*, buf, size_t, size)
1061 {
1062     int errno;
1063     struct v_dnode* dnode;
1064     if (!(errno = vfs_walk_proc(path, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1065         errno = vfs_readlink(dnode, buf, size);
1066     }
1067
1068     if (errno >= 0) {
1069         return errno;
1070     }
1071
1072     return DO_STATUS(errno);
1073 }
1074
1075 __DEFINE_LXSYSCALL4(
1076   int, readlinkat, int, dirfd, const char*, pathname, char*, buf, size_t, size)
1077 {
1078     int errno;
1079     struct v_fd* fd_s;
1080     if ((errno = vfs_getfd(dirfd, &fd_s))) {
1081         goto done;
1082     }
1083
1084     pathname = pathname ? pathname : "";
1085
1086     struct v_dnode* dnode;
1087     if (!(errno = vfs_walk(
1088             fd_s->file->dnode, pathname, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1089         errno = vfs_readlink(fd_s->file->dnode, buf, size);
1090     }
1091
1092     if (errno >= 0) {
1093         return errno;
1094     }
1095
1096 done:
1097     return DO_STATUS(errno);
1098 }
1099
1100 /*
1101     NOTE
1102     When we perform operation that could affect the layout of
1103     directory (i.e., rename, mkdir, rmdir). We must lock the parent dir
1104     whenever possible. This will blocking any ongoing path walking to reach
1105     it hence avoid any partial state.
1106 */
1107
1108 __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
1109 {
1110     int errno;
1111     struct v_dnode* dnode;
1112     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1113         return DO_STATUS(errno);
1114     }
1115
1116     lock_dnode(dnode);
1117
1118     if ((errno = vfs_check_writable(dnode))) {
1119         goto done;
1120     }
1121
1122     if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
1123         errno = EROFS;
1124         goto done;
1125     }
1126
1127     if (dnode->ref_count > 1 || dnode->inode->open_count) {
1128         errno = EBUSY;
1129         goto done;
1130     }
1131
1132     if (!llist_empty(&dnode->children)) {
1133         errno = ENOTEMPTY;
1134         goto done;
1135     }
1136
1137     struct v_dnode* parent = dnode->parent;
1138
1139     if (!parent) {
1140         errno = EINVAL;
1141         goto done;
1142     }
1143
1144     lock_dnode(parent);
1145     lock_inode(parent->inode);
1146
1147     if (check_directory_node(dnode->inode)) {
1148         errno = parent->inode->ops->rmdir(parent->inode, dnode);
1149         if (!errno) {
1150             vfs_dcache_remove(dnode);
1151         }
1152     } else {
1153         errno = ENOTDIR;
1154     }
1155
1156     unlock_inode(parent->inode);
1157     unlock_dnode(parent);
1158
1159 done:
1160     unlock_dnode(dnode);
1161     return DO_STATUS(errno);
1162 }
1163
1164 __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
1165 {
1166     int errno = 0;
1167     struct v_dnode *parent, *dir;
1168     char name_value[VFS_NAME_MAXLEN];
1169     struct hstr name = HHSTR(name_value, 0, 0);
1170
1171     if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
1172         goto done;
1173     }
1174
1175     if (!(errno = vfs_walk(parent, name_value, &dir, NULL, 0))) {
1176         errno = EEXIST;
1177         goto done;
1178     }
1179
1180     if ((errno = vfs_check_writable(parent))) {
1181         goto done;
1182     }
1183
1184     if (!(dir = vfs_d_alloc(parent, &name))) {
1185         errno = ENOMEM;
1186         goto done;
1187     }
1188
1189     struct v_inode* inode = parent->inode;
1190
1191     lock_dnode(parent);
1192     lock_inode(inode);
1193
1194     if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
1195         errno = ENOTSUP;
1196     } else if (!inode->ops->mkdir) {
1197         errno = ENOTSUP;
1198     } else if (!check_directory_node(inode)) {
1199         errno = ENOTDIR;
1200     } else if (!(errno = inode->ops->mkdir(inode, dir))) {
1201         vfs_dcache_add(parent, dir);
1202         goto cleanup;
1203     }
1204
1205     vfs_d_free(dir);
1206
1207 cleanup:
1208     unlock_inode(inode);
1209     unlock_dnode(parent);
1210 done:
1211     return DO_STATUS(errno);
1212 }
1213
1214 int
1215 __vfs_do_unlink(struct v_dnode* dnode)
1216 {
1217     int errno;
1218     struct v_inode* inode = dnode->inode;
1219
1220     if (dnode->ref_count > 1) {
1221         return EBUSY;
1222     }
1223
1224     if ((errno = vfs_check_writable(dnode))) {
1225         return errno;
1226     }
1227
1228     lock_inode(inode);
1229
1230     if (inode->open_count) {
1231         errno = EBUSY;
1232     } else if (!check_directory_node(inode)) {
1233         errno = inode->ops->unlink(inode, dnode);
1234         if (!errno) {
1235             vfs_d_free(dnode);
1236         }
1237     } else {
1238         errno = EISDIR;
1239     }
1240
1241     unlock_inode(inode);
1242
1243     return errno;
1244 }
1245
1246 __DEFINE_LXSYSCALL1(int, unlink, const char*, pathname)
1247 {
1248     int errno;
1249     struct v_dnode* dnode;
1250     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1251         goto done;
1252     }
1253
1254     errno = __vfs_do_unlink(dnode);
1255
1256 done:
1257     return DO_STATUS(errno);
1258 }
1259
1260 __DEFINE_LXSYSCALL2(int, unlinkat, int, fd, const char*, pathname)
1261 {
1262     int errno;
1263     struct v_fd* fd_s;
1264     if ((errno = vfs_getfd(fd, &fd_s))) {
1265         goto done;
1266     }
1267
1268     struct v_dnode* dnode;
1269     if (!(errno = vfs_walk(fd_s->file->dnode, pathname, &dnode, NULL, 0))) {
1270         errno = __vfs_do_unlink(dnode);
1271     }
1272
1273 done:
1274     return DO_STATUS(errno);
1275 }
1276
1277 __DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
1278 {
1279     int errno;
1280     struct file_locator floc;
1281     struct v_dnode *to_link, *name_file;
1282
1283     errno = __vfs_try_locate_file(oldpath, &floc, 0);
1284     if (errno) {
1285         goto done;
1286     }
1287
1288     __floc_try_unlock(&floc);
1289
1290     to_link = floc.file;
1291     errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
1292     if (!errno) {
1293         goto done;
1294     }
1295
1296     name_file = floc.file;
1297     errno = vfs_link(to_link, name_file);
1298     if (errno) {
1299         vfs_d_free(name_file);
1300     }
1301
1302 done:
1303     __floc_try_unlock(&floc);
1304     return DO_STATUS(errno);
1305 }
1306
1307 __DEFINE_LXSYSCALL1(int, fsync, int, fildes)
1308 {
1309     int errno;
1310     struct v_fd* fd_s;
1311
1312     if (!(errno = vfs_getfd(fildes, &fd_s))) {
1313         errno = vfs_fsync(fd_s->file);
1314     }
1315
1316     return DO_STATUS(errno);
1317 }
1318
1319 int
1320 vfs_dup_fd(struct v_fd* old, struct v_fd** new)
1321 {
1322     int errno = 0;
1323     struct v_fd* copied = cake_grab(fd_pile);
1324
1325     memcpy(copied, old, sizeof(struct v_fd));
1326
1327     atomic_fetch_add(&old->file->ref_count, 1);
1328
1329     *new = copied;
1330
1331     return errno;
1332 }
1333
1334 int
1335 vfs_dup2(int oldfd, int newfd)
1336 {
1337     if (newfd == oldfd) {
1338         return newfd;
1339     }
1340
1341     int errno;
1342     struct v_fd *oldfd_s, *newfd_s;
1343     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1344         goto done;
1345     }
1346
1347     if (!TEST_FD(newfd)) {
1348         errno = EBADF;
1349         goto done;
1350     }
1351
1352     newfd_s = __current->fdtable->fds[newfd];
1353     if (newfd_s && (errno = vfs_close(newfd_s->file))) {
1354         goto done;
1355     }
1356
1357     if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1358         __current->fdtable->fds[newfd] = newfd_s;
1359         return newfd;
1360     }
1361
1362 done:
1363     return DO_STATUS(errno);
1364 }
1365
1366 __DEFINE_LXSYSCALL2(int, dup2, int, oldfd, int, newfd)
1367 {
1368     return vfs_dup2(oldfd, newfd);
1369 }
1370
1371 __DEFINE_LXSYSCALL1(int, dup, int, oldfd)
1372 {
1373     int errno, newfd;
1374     struct v_fd *oldfd_s, *newfd_s;
1375     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1376         goto done;
1377     }
1378
1379     if (!(errno = vfs_alloc_fdslot(&newfd)) &&
1380         !(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1381         __current->fdtable->fds[newfd] = newfd_s;
1382         return newfd;
1383     }
1384
1385 done:
1386     return DO_STATUS(errno);
1387 }
1388
1389 __DEFINE_LXSYSCALL2(
1390   int, symlink, const char*, pathname, const char*, link_target)
1391 {
1392     int errno;
1393     struct file_locator floc;
1394     struct v_dnode *file;
1395     struct v_inode *f_ino;
1396
1397     errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
1398     if (errno) {
1399         goto done;
1400     }
1401
1402     file = floc.file;
1403     errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
1404     if (errno) {
1405         vfs_d_free(file);
1406         goto done;
1407     }
1408
1409     f_ino = file->inode;
1410
1411     assert(f_ino);
1412
1413     errno = vfs_check_writable(file);
1414     if (errno) {
1415         goto done;
1416     }
1417
1418     if (!f_ino->ops->set_symlink) {
1419         errno = ENOTSUP;
1420         goto done;
1421     }
1422
1423     lock_inode(f_ino);
1424
1425     errno = f_ino->ops->set_symlink(f_ino, link_target);
1426
1427     unlock_inode(f_ino);
1428
1429 done:
1430     __floc_try_unlock(&floc);
1431     return DO_STATUS(errno);
1432 }
1433
1434 void
1435 vfs_ref_file(struct v_file* file)
1436 {
1437     atomic_fetch_add(&file->ref_count, 1);
1438 }
1439
1440 void
1441 vfs_ref_dnode(struct v_dnode* dnode)
1442 {
1443     atomic_fetch_add(&dnode->ref_count, 1);
1444
1445     if (dnode->mnt) {
1446         mnt_mkbusy(dnode->mnt);
1447     }
1448 }
1449
1450 void
1451 vfs_unref_dnode(struct v_dnode* dnode)
1452 {
1453     atomic_fetch_sub(&dnode->ref_count, 1);
1454     if (dnode->mnt) {
1455         mnt_chillax(dnode->mnt);
1456     }
1457 }
1458
1459 int
1460 vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode)
1461 {
1462     int errno = 0;
1463
1464     lock_dnode(dnode);
1465
1466     if (!check_directory_node(dnode->inode)) {
1467         errno = ENOTDIR;
1468         goto done;
1469     }
1470
1471     if (proc->cwd) {
1472         vfs_unref_dnode(proc->cwd);
1473     }
1474
1475     vfs_ref_dnode(dnode);
1476     proc->cwd = dnode;
1477
1478     unlock_dnode(dnode);
1479
1480 done:
1481     return errno;
1482 }
1483
1484 __DEFINE_LXSYSCALL1(int, chdir, const char*, path)
1485 {
1486     struct v_dnode* dnode;
1487     int errno = 0;
1488
1489     if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1490         goto done;
1491     }
1492
1493     errno = vfs_do_chdir((struct proc_info*)__current, dnode);
1494
1495 done:
1496     return DO_STATUS(errno);
1497 }
1498
1499 __DEFINE_LXSYSCALL1(int, fchdir, int, fd)
1500 {
1501     struct v_fd* fd_s;
1502     int errno = 0;
1503
1504     if ((errno = vfs_getfd(fd, &fd_s))) {
1505         goto done;
1506     }
1507
1508     errno = vfs_do_chdir((struct proc_info*)__current, fd_s->file->dnode);
1509
1510 done:
1511     return DO_STATUS(errno);
1512 }
1513
1514 __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size)
1515 {
1516     int errno = 0;
1517     char* ret_ptr = 0;
1518     if (size < 2) {
1519         errno = ERANGE;
1520         goto done;
1521     }
1522
1523     size_t len = 0;
1524
1525     if (!__current->cwd) {
1526         *buf = VFS_PATH_DELIM;
1527         len = 1;
1528     } else {
1529         len = vfs_get_path(__current->cwd, buf, size, 0);
1530         if (len == size) {
1531             errno = ERANGE;
1532             goto done;
1533         }
1534     }
1535
1536     buf[len] = '\0';
1537
1538     ret_ptr = buf;
1539
1540 done:
1541     syscall_result(errno);
1542     return ret_ptr;
1543 }
1544
1545 int
1546 vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
1547 {
1548     int errno = 0;
1549     if (current->inode->id == target->inode->id) {
1550         // hard link
1551         return 0;
1552     }
1553
1554     if ((errno = vfs_check_writable(current))) {
1555         return errno;
1556     }
1557
1558     if (current->ref_count > 1 || target->ref_count > 1) {
1559         return EBUSY;
1560     }
1561
1562     if (current->super_block != target->super_block) {
1563         return EXDEV;
1564     }
1565
1566     struct v_dnode* oldparent = current->parent;
1567     struct v_dnode* newparent = target->parent;
1568
1569     lock_dnode(current);
1570     lock_dnode(target);
1571     if (oldparent)
1572         lock_dnode(oldparent);
1573     if (newparent)
1574         lock_dnode(newparent);
1575
1576     if (!llist_empty(&target->children)) {
1577         errno = ENOTEMPTY;
1578         unlock_dnode(target);
1579         goto cleanup;
1580     }
1581
1582     if ((errno =
1583            current->inode->ops->rename(current->inode, current, target))) {
1584         unlock_dnode(target);
1585         goto cleanup;
1586     }
1587
1588     // re-position current
1589     hstrcpy(&current->name, &target->name);
1590     vfs_dcache_rehash(newparent, current);
1591
1592     // detach target
1593     vfs_d_free(target);
1594
1595     unlock_dnode(target);
1596
1597 cleanup:
1598     unlock_dnode(current);
1599     if (oldparent)
1600         unlock_dnode(oldparent);
1601     if (newparent)
1602         unlock_dnode(newparent);
1603
1604     return errno;
1605 }
1606
1607 __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
1608 {
1609     struct v_dnode *cur, *target_parent, *target;
1610     struct hstr name = HSTR(valloc(VFS_NAME_MAXLEN), 0);
1611     int errno = 0;
1612
1613     if ((errno = vfs_walk_proc(oldpath, &cur, NULL, 0))) {
1614         goto done;
1615     }
1616
1617     if ((errno = vfs_walk(
1618            __current->cwd, newpath, &target_parent, &name, VFS_WALK_PARENT))) {
1619         goto done;
1620     }
1621
1622     errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
1623     if (errno == ENOENT) {
1624         target = vfs_d_alloc(target_parent, &name);
1625         vfs_dcache_add(target_parent, target);
1626     } else if (errno) {
1627         goto done;
1628     }
1629
1630     if (!target) {
1631         errno = ENOMEM;
1632         goto done;
1633     }
1634
1635     errno = vfs_do_rename(cur, target);
1636
1637 done:
1638     vfree((void*)name.value);
1639     return DO_STATUS(errno);
1640 }
1641
1642 __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat)
1643 {
1644     int errno = 0;
1645     struct v_fd* fds;
1646
1647     if ((errno = vfs_getfd(fd, &fds))) {
1648         goto done;
1649     }
1650
1651     struct v_inode* vino = fds->file->inode;
1652     struct device* fdev = vino->sb->dev;
1653
1654     *stat = (struct file_stat){.st_ino = vino->id,
1655                                .st_blocks = vino->lb_usage,
1656                                .st_size = vino->fsize,
1657                                .mode = vino->itype,
1658                                .st_ioblksize = PAGE_SIZE,
1659                                .st_blksize = vino->sb->blksize};
1660
1661     if (check_device_node(vino)) {
1662         struct device* rdev = resolve_device(vino->data);
1663         if (!rdev) {
1664             errno = EINVAL;
1665             goto done;
1666         }
1667
1668         stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp,
1669                                 .unique = rdev->ident.unique,
1670                                 .index = dev_uid(rdev) };
1671     }
1672
1673     if (fdev) {
1674         stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp,
1675                                .unique = fdev->ident.unique,
1676                                .index = dev_uid(fdev) };
1677     }
1678
1679 done:
1680     return DO_STATUS(errno);
1681 }