lunaix-os/kernel/fs/vfs.c

   1 /**
   2  * @file vfs.c
   3  * @author Lunaixsky (zelong56@gmail.com)
   4  * @brief Lunaix virtual file system - an abstraction layer for all file system.
   5  * @version 0.1
   6  * @date 2022-07-24
   7  *
   8  * @copyright Copyright (c) 2022
   9  *
  10  */
  11
  12 // Welcome to The Mountain O'Shit! :)
  13
  14 /*
  15  TODO vfs & device todos checklist
  16
  17     It is overseen by Twilight Sparkle ;)
  18
  19  1. Get inodes hooked into lru (CHECKED)
  20  2. Get dnodes hooked into lru (CHECKED)
  21  3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
  22  4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
  23         [good idea] or a constructor/destructor pattern in cake allocator ?
  24  5. (mount) Figure out a way to identify a busy mount point before unmount
  25             maybe a unified mount_point structure that maintain a referencing
  26             counter on any dnodes within the subtree? Such a counter will only
  27             increament if a file is opened or a dnode is being used as working
  28             directory and decreamenting conversely. (CHECKED)
  29  6. (mount) Ability to track all mount points (including sub-mounts)
  30             so we can be confident to clean up everything when we
  31             unmount. (CHECKED)
  32  7. (mount) Figure out a way to acquire the device represented by a dnode.
  33             so it can be used to mount. (e.g. we wish to get `struct device*`
  34             out of the dnode at /dev/sda)
  35             [tip] we should pay attention at twifs and add a private_data field
  36             under struct v_dnode? (CHECKED)
  37  8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
  38  9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
  39                     image file using a so called "loopback" pseudo device. Maybe
  40                     we can do similar thing in Lunaix? A block device emulation
  41                     above the regular file when we mount it on.
  42  10. (device) device number (dev_t) allocation
  43             [good idea] <class>:<subclass>:<uniq_id> composition (CHECKED)
  44 */
  45
  46 #include <klibc/string.h>
  47 #include <lunaix/foptions.h>
  48 #include <lunaix/fs.h>
  49 #include <lunaix/mm/cake.h>
  50 #include <lunaix/mm/valloc.h>
  51 #include <lunaix/process.h>
  52 #include <lunaix/spike.h>
  53 #include <lunaix/syscall.h>
  54 #include <lunaix/syscall_utils.h>
  55
  56 #include <lunaix/fs/twifs.h>
  57
  58 #include <usr/lunaix/dirent_defs.h>
  59
  60 static struct cake_pile* dnode_pile;
  61 static struct cake_pile* inode_pile;
  62 static struct cake_pile* file_pile;
  63 static struct cake_pile* superblock_pile;
  64 static struct cake_pile* fd_pile;
  65
  66 struct v_dnode* vfs_sysroot;
  67 static struct hbucket* dnode_cache;
  68
  69 struct lru_zone *dnode_lru, *inode_lru;
  70
  71 struct hstr vfs_ddot = HSTR("..", 2);
  72 struct hstr vfs_dot = HSTR(".", 1);
  73 struct hstr vfs_empty = HSTR("", 0);
  74
  75 static int
  76 __vfs_try_evict_dnode(struct lru_node* obj);
  77
  78 static int
  79 __vfs_try_evict_inode(struct lru_node* obj);
  80
  81 void
  82 vfs_init()
  83 {
  84     // 为他们专门创建一个蛋糕堆，而不使用valloc，这样我们可以最小化内碎片的产生
  85     dnode_pile = cake_new_pile("dnode_cache", sizeof(struct v_dnode), 1, 0);
  86     inode_pile = cake_new_pile("inode_cache", sizeof(struct v_inode), 1, 0);
  87     file_pile = cake_new_pile("file_cache", sizeof(struct v_file), 1, 0);
  88     fd_pile = cake_new_pile("fd_cache", sizeof(struct v_fd), 1, 0);
  89     superblock_pile =
  90       cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
  91
  92     dnode_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
  93
  94     dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
  95     inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
  96
  97     hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
  98     hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
  99
 100     // 创建一个根dnode。
 101     vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty);
 102     vfs_sysroot->parent = vfs_sysroot;
 103     atomic_fetch_add(&vfs_sysroot->ref_count, 1);
 104 }
 105
 106 static inline struct hbucket*
 107 __dcache_hash(struct v_dnode* parent, u32_t* hash)
 108 {
 109     u32_t _hash = *hash;
 110     // 确保低位更加随机
 111     _hash = _hash ^ (_hash >> VFS_HASHBITS);
 112     // 与parent的指针值做加法，来减小碰撞的可能性。
 113     _hash += (u32_t)__ptr(parent);
 114     *hash = _hash;
 115     return &dnode_cache[_hash & VFS_HASH_MASK];
 116 }
 117
 118 static inline int
 119 __sync_inode_nolock(struct v_inode* inode)
 120 {
 121     pcache_commit_all(inode);
 122
 123     int errno = ENOTSUP;
 124     if (inode->ops->sync) {
 125         errno = inode->ops->sync(inode);
 126     }
 127
 128     return errno;
 129 }
 130
 131 struct v_dnode*
 132 vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
 133 {
 134     if (!str->len || HSTR_EQ(str, &vfs_dot))
 135         return parent;
 136
 137     if (HSTR_EQ(str, &vfs_ddot)) {
 138         return parent->parent;
 139     }
 140
 141     u32_t hash = str->hash;
 142     struct hbucket* slot = __dcache_hash(parent, &hash);
 143
 144     struct v_dnode *pos, *n;
 145     hashtable_bucket_foreach(slot, pos, n, hash_list)
 146     {
 147         if (pos->name.hash == hash && pos->parent == parent) {
 148             return pos;
 149         }
 150     }
 151     return NULL;
 152 }
 153
 154 void
 155 vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
 156 {
 157     assert(parent);
 158
 159     atomic_fetch_add(&dnode->ref_count, 1);
 160     dnode->parent = parent;
 161     llist_append(&parent->children, &dnode->siblings);
 162
 163     struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
 164     hlist_add(&bucket->head, &dnode->hash_list);
 165 }
 166
 167 void
 168 vfs_dcache_remove(struct v_dnode* dnode)
 169 {
 170     assert(dnode);
 171     assert(dnode->ref_count == 1);
 172
 173     llist_delete(&dnode->siblings);
 174     llist_delete(&dnode->aka_list);
 175     hlist_delete(&dnode->hash_list);
 176
 177     dnode->parent = NULL;
 178     atomic_fetch_sub(&dnode->ref_count, 1);
 179 }
 180
 181 void
 182 vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
 183 {
 184     assert(new_parent);
 185
 186     hstr_rehash(&dnode->name, HSTR_FULL_HASH);
 187     vfs_dcache_remove(dnode);
 188     vfs_dcache_add(new_parent, dnode);
 189 }
 190
 191 int
 192 vfs_open(struct v_dnode* dnode, struct v_file** file)
 193 {
 194     if (!dnode->inode || !dnode->inode->ops->open) {
 195         return ENOTSUP;
 196     }
 197
 198     struct v_inode* inode = dnode->inode;
 199
 200     lock_inode(inode);
 201
 202     struct v_file* vfile = cake_grab(file_pile);
 203     memset(vfile, 0, sizeof(*vfile));
 204
 205     vfile->dnode = dnode;
 206     vfile->inode = inode;
 207     vfile->ref_count = ATOMIC_VAR_INIT(1);
 208     vfile->ops = inode->default_fops;
 209
 210     if (check_file_node(inode) && !inode->pg_cache) {
 211         struct pcache* pcache = vzalloc(sizeof(struct pcache));
 212         pcache_init(pcache);
 213         pcache->master = inode;
 214         inode->pg_cache = pcache;
 215     }
 216
 217     int errno = inode->ops->open(inode, vfile);
 218     if (errno) {
 219         cake_release(file_pile, vfile);
 220     } else {
 221         atomic_fetch_add(&dnode->ref_count, 1);
 222         inode->open_count++;
 223         mnt_mkbusy(dnode->mnt);
 224
 225         *file = vfile;
 226     }
 227
 228     unlock_inode(inode);
 229
 230     return errno;
 231 }
 232
 233 void
 234 vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
 235 {
 236     if (assign_to->inode) {
 237         llist_delete(&assign_to->aka_list);
 238         assign_to->inode->link_count--;
 239     }
 240
 241     llist_append(&inode->aka_dnodes, &assign_to->aka_list);
 242     assign_to->inode = inode;
 243     inode->link_count++;
 244 }
 245
 246 int
 247 vfs_link(struct v_dnode* to_link, struct v_dnode* name)
 248 {
 249     int errno;
 250
 251     if ((errno = vfs_check_writable(to_link))) {
 252         return errno;
 253     }
 254
 255     lock_inode(to_link->inode);
 256     if (to_link->super_block->root != name->super_block->root) {
 257         errno = EXDEV;
 258     } else if (!to_link->inode->ops->link) {
 259         errno = ENOTSUP;
 260     } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
 261         vfs_assign_inode(name, to_link->inode);
 262     }
 263     unlock_inode(to_link->inode);
 264
 265     return errno;
 266 }
 267
 268 int
 269 vfs_pclose(struct v_file* file, pid_t pid)
 270 {
 271     struct v_inode* inode;
 272     int errno = 0;
 273
 274     if (file->ref_count > 1) {
 275         atomic_fetch_sub(&file->ref_count, 1);
 276         return 0;
 277     }
 278
 279     inode = file->inode;
 280
 281     /*
 282      * Prevent dead lock.
 283      * This happened when process is terminated while blocking on read.
 284      * In that case, the process is still holding the inode lock and it
 285          will never get released.
 286      * The unlocking should also include ownership check.
 287      *
 288      * To see why, consider two process both open the same file both with
 289      * fd=x.
 290      *      Process A: busy on reading x
 291      *      Process B: do nothing with x
 292      * Assuming that, after a very short time, process B get terminated
 293      * while process A is still busy in it's reading business. By this
 294      * design, the inode lock of this file x is get released by B rather
 295      * than A. And this will cause a probable race condition on A if other
 296      * process is writing to this file later after B exit.
 297     */
 298
 299     if (mutex_on_hold(&inode->lock)) {
 300         mutex_unlock_for(&inode->lock, pid);
 301     }
 302
 303     lock_inode(inode);
 304
 305     pcache_commit_all(inode);
 306     if ((errno = file->ops->close(file))) {
 307         goto unlock;
 308     }
 309
 310     atomic_fetch_sub(&file->dnode->ref_count, 1);
 311     inode->open_count--;
 312
 313     if (!inode->open_count) {
 314         __sync_inode_nolock(inode);
 315     }
 316
 317     mnt_chillax(file->dnode->mnt);
 318     cake_release(file_pile, file);
 319
 320 unlock:
 321     unlock_inode(inode);
 322     return errno;
 323 }
 324
 325 int
 326 vfs_close(struct v_file* file)
 327 {
 328     return vfs_pclose(file, __current->pid);
 329 }
 330
 331 void
 332 vfs_free_fd(struct v_fd* fd)
 333 {
 334     cake_release(fd_pile, fd);
 335 }
 336
 337 int
 338 vfs_isync(struct v_inode* inode)
 339 {
 340     lock_inode(inode);
 341
 342     int errno = __sync_inode_nolock(inode);
 343
 344     unlock_inode(inode);
 345
 346     return errno;
 347 }
 348
 349 int
 350 vfs_fsync(struct v_file* file)
 351 {
 352     int errno;
 353     if ((errno = vfs_check_writable(file->dnode))) {
 354         return errno;
 355     }
 356
 357     return vfs_isync(file->inode);
 358 }
 359
 360 int
 361 vfs_alloc_fdslot(int* fd)
 362 {
 363     for (size_t i = 0; i < VFS_MAX_FD; i++) {
 364         if (!__current->fdtable->fds[i]) {
 365             *fd = i;
 366             return 0;
 367         }
 368     }
 369     return EMFILE;
 370 }
 371
 372 struct v_superblock*
 373 vfs_sb_alloc()
 374 {
 375     struct v_superblock* sb = cake_grab(superblock_pile);
 376     memset(sb, 0, sizeof(*sb));
 377     llist_init_head(&sb->sb_list);
 378     sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
 379     sb->ref_count = 1;
 380     return sb;
 381 }
 382
 383 void
 384 vfs_sb_ref(struct v_superblock* sb)
 385 {
 386     sb->ref_count++;
 387 }
 388
 389 void
 390 vfs_sb_free(struct v_superblock* sb)
 391 {
 392     assert(sb->ref_count);
 393
 394     sb->ref_count--;
 395     if (sb->ref_count) {
 396         return;
 397     }
 398
 399     if (sb->ops.release) {
 400         sb->ops.release(sb);
 401     }
 402
 403     vfree(sb->i_cache);
 404     cake_release(superblock_pile, sb);
 405 }
 406
 407 static int
 408 __vfs_try_evict_dnode(struct lru_node* obj)
 409 {
 410     struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
 411
 412     if (!dnode->ref_count) {
 413         vfs_d_free(dnode);
 414         return 1;
 415     }
 416     return 0;
 417 }
 418
 419 static int
 420 __vfs_try_evict_inode(struct lru_node* obj)
 421 {
 422     struct v_inode* inode = container_of(obj, struct v_inode, lru);
 423
 424     if (!inode->link_count && !inode->open_count) {
 425         vfs_i_free(inode);
 426         return 1;
 427     }
 428     return 0;
 429 }
 430
 431 struct v_dnode*
 432 vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
 433 {
 434     struct v_dnode* dnode = cake_grab(dnode_pile);
 435     if (!dnode) {
 436         lru_evict_half(dnode_lru);
 437
 438         if (!(dnode = cake_grab(dnode_pile))) {
 439             return NULL;
 440         }
 441     }
 442
 443     memset(dnode, 0, sizeof(*dnode));
 444     llist_init_head(&dnode->children);
 445     llist_init_head(&dnode->siblings);
 446     llist_init_head(&dnode->aka_list);
 447     mutex_init(&dnode->lock);
 448
 449     dnode->ref_count = ATOMIC_VAR_INIT(0);
 450     dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
 451
 452     hstrcpy(&dnode->name, name);
 453
 454     if (parent) {
 455         vfs_d_assign_sb(dnode, parent->super_block);
 456         dnode->mnt = parent->mnt;
 457     }
 458
 459     lru_use_one(dnode_lru, &dnode->lru);
 460
 461     return dnode;
 462 }
 463
 464 void
 465 vfs_d_free(struct v_dnode* dnode)
 466 {
 467     assert(dnode->ref_count == 1);
 468
 469     if (dnode->inode) {
 470         assert(dnode->inode->link_count > 0);
 471         dnode->inode->link_count--;
 472     }
 473
 474     vfs_dcache_remove(dnode);
 475     // Make sure the children de-referencing their parent.
 476     // With lru presented, the eviction will be propagated over the entire
 477     // detached subtree eventually
 478     struct v_dnode *pos, *n;
 479     llist_for_each(pos, n, &dnode->children, siblings)
 480     {
 481         vfs_dcache_remove(pos);
 482     }
 483
 484     if (dnode->destruct) {
 485         dnode->destruct(dnode);
 486     }
 487
 488     vfs_sb_free(dnode->super_block);
 489     vfree((void*)dnode->name.value);
 490     cake_release(dnode_pile, dnode);
 491 }
 492
 493 struct v_inode*
 494 vfs_i_find(struct v_superblock* sb, u32_t i_id)
 495 {
 496     struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
 497     struct v_inode *pos, *n;
 498     hashtable_bucket_foreach(slot, pos, n, hash_list)
 499     {
 500         if (pos->id == i_id) {
 501             lru_use_one(inode_lru, &pos->lru);
 502             return pos;
 503         }
 504     }
 505
 506     return NULL;
 507 }
 508
 509 void
 510 vfs_i_addhash(struct v_inode* inode)
 511 {
 512     struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
 513
 514     hlist_delete(&inode->hash_list);
 515     hlist_add(&slot->head, &inode->hash_list);
 516 }
 517
 518 struct v_inode*
 519 vfs_i_alloc(struct v_superblock* sb)
 520 {
 521     assert(sb->ops.init_inode);
 522
 523     struct v_inode* inode;
 524     if (!(inode = cake_grab(inode_pile))) {
 525         lru_evict_half(inode_lru);
 526         if (!(inode = cake_grab(inode_pile))) {
 527             return NULL;
 528         }
 529     }
 530
 531     memset(inode, 0, sizeof(*inode));
 532     mutex_init(&inode->lock);
 533     llist_init_head(&inode->xattrs);
 534     llist_init_head(&inode->aka_dnodes);
 535
 536     sb->ops.init_inode(sb, inode);
 537
 538     inode->ctime = clock_unixtime();
 539     inode->atime = inode->ctime;
 540     inode->mtime = inode->ctime;
 541
 542     vfs_i_assign_sb(inode, sb);
 543     lru_use_one(inode_lru, &inode->lru);
 544     return inode;
 545 }
 546
 547 void
 548 vfs_i_free(struct v_inode* inode)
 549 {
 550     if (inode->pg_cache) {
 551         pcache_release(inode->pg_cache);
 552         vfree(inode->pg_cache);
 553     }
 554     // we don't need to sync inode.
 555     // If an inode can be free, then it must be properly closed.
 556     // Hence it must be synced already!
 557     if (inode->destruct) {
 558         inode->destruct(inode);
 559     }
 560
 561     vfs_sb_free(inode->sb);
 562     hlist_delete(&inode->hash_list);
 563     cake_release(inode_pile, inode);
 564 }
 565
 566 /* ---- System call definition and support ---- */
 567
 568 // make a new name when not exists
 569 #define FLOC_MAYBE_MKNAME 1
 570
 571 // name must be non-exist and made.
 572 #define FLOC_MKNAME 2
 573
 574 // no follow symlink
 575 #define FLOC_NOFOLLOW 4
 576
 577 int
 578 vfs_getfd(int fd, struct v_fd** fd_s)
 579 {
 580     if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
 581         return 0;
 582     }
 583     return EBADF;
 584 }
 585
 586 static int
 587 __vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
 588             unsigned int itype, dev_t* dev)
 589 {
 590     int errno;
 591
 592     errno = parent->ops->create(parent, dnode, itype);
 593     if (errno) {
 594         return errno;
 595     }
 596
 597     return 0;
 598 }
 599
 600 struct file_locator {
 601     struct v_dnode* dir;
 602     struct v_dnode* file;
 603     bool fresh;
 604 };
 605
 606 /**
 607  * @brief unlock the file locator (floc) if possible.
 608  *        If the file to be located if not exists, and
 609  *        any FLOC_*MKNAME flag is set, then the parent
 610  *        dnode will be locked until the file has been properly
 611  *        finalised by subsequent logic.
 612  *
 613  * @param floc
 614  */
 615 static inline void
 616 __floc_try_unlock(struct file_locator* floc)
 617 {
 618     if (floc->fresh) {
 619         assert(floc->dir);
 620         unlock_dnode(floc->dir);
 621     }
 622 }
 623
 624 static int
 625 __vfs_try_locate_file(const char* path,
 626                       struct file_locator* floc,
 627                       int options)
 628 {
 629     char name_str[VFS_NAME_MAXLEN];
 630     struct v_dnode *fdir, *file;
 631     struct hstr name = HSTR(name_str, 0);
 632     int errno, woption = 0;
 633
 634     if ((options & FLOC_NOFOLLOW)) {
 635         woption |= VFS_WALK_NOFOLLOW;
 636         options &= ~FLOC_NOFOLLOW;
 637     }
 638
 639     floc->fresh = false;
 640     name_str[0] = 0;
 641     errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
 642     if (errno) {
 643         return errno;
 644     }
 645
 646     errno = vfs_walk(fdir, name.value, &file, NULL, woption);
 647
 648     if (errno && errno != ENOENT) {
 649         goto done;
 650     }
 651
 652     if (!errno) {
 653         if ((options & FLOC_MKNAME)) {
 654             errno = EEXIST;
 655         }
 656         goto done;
 657     }
 658
 659     // errno == ENOENT
 660     if (!options) {
 661         goto done;
 662     }
 663
 664     errno = vfs_check_writable(fdir);
 665     if (errno) {
 666         goto done;
 667     }
 668
 669     floc->fresh = true;
 670
 671     file = vfs_d_alloc(fdir, &name);
 672
 673     if (!file) {
 674         return ENOMEM;
 675     }
 676
 677     lock_dnode(fdir);
 678
 679     vfs_dcache_add(fdir, file);
 680
 681 done:
 682     floc->dir   = fdir;
 683     floc->file  = file;
 684
 685     return errno;
 686 }
 687
 688 int
 689 vfs_do_open(const char* path, int options)
 690 {
 691     int errno, fd, loptions = 0;
 692     struct v_dnode *dentry, *file;
 693     struct v_file* ofile = NULL;
 694     struct file_locator floc;
 695     struct v_inode* inode;
 696
 697     if ((options & FO_CREATE)) {
 698         loptions |= FLOC_MAYBE_MKNAME;
 699     } else if ((options & FO_NOFOLLOW)) {
 700         loptions |= FLOC_NOFOLLOW;
 701     }
 702
 703     errno = __vfs_try_locate_file(path, &floc, loptions);
 704
 705     if (errno || (errno = vfs_alloc_fdslot(&fd))) {
 706         return errno;
 707     }
 708
 709     file   = floc.file;
 710     dentry = floc.dir;
 711
 712     if (floc.fresh) {
 713         errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
 714         if (errno) {
 715             vfs_d_free(file);
 716             __floc_try_unlock(&floc);
 717             return errno;
 718         }
 719
 720         __floc_try_unlock(&floc);
 721     }
 722
 723
 724     if ((errno = vfs_open(file, &ofile))) {
 725         return errno;
 726     }
 727
 728     inode = ofile->inode;
 729     lock_inode(inode);
 730
 731     struct v_fd* fd_s = cake_grab(fd_pile);
 732     memset(fd_s, 0, sizeof(*fd_s));
 733
 734     if ((options & O_TRUNC)) {
 735         file->inode->fsize = 0;
 736     }
 737
 738     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 739         ofile->f_pos = 0;
 740     }
 741
 742     fd_s->file = ofile;
 743     fd_s->flags = options;
 744     __current->fdtable->fds[fd] = fd_s;
 745
 746     unlock_inode(inode);
 747
 748     return fd;
 749 }
 750
 751 __DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
 752 {
 753     int errno = vfs_do_open(path, options);
 754     return DO_STATUS_OR_RETURN(errno);
 755 }
 756
 757 __DEFINE_LXSYSCALL1(int, close, int, fd)
 758 {
 759     struct v_fd* fd_s;
 760     int errno = 0;
 761     if ((errno = vfs_getfd(fd, &fd_s))) {
 762         goto done_err;
 763     }
 764
 765     if ((errno = vfs_close(fd_s->file))) {
 766         goto done_err;
 767     }
 768
 769     cake_release(fd_pile, fd_s);
 770     __current->fdtable->fds[fd] = 0;
 771
 772 done_err:
 773     return DO_STATUS(errno);
 774 }
 775
 776 void
 777 __vfs_readdir_callback(struct dir_context* dctx,
 778                        const char* name,
 779                        const int len,
 780                        const int dtype)
 781 {
 782     struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
 783     strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
 784     dent->d_nlen = len;
 785     dent->d_type = dtype;
 786 }
 787
 788 __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
 789 {
 790     struct v_fd* fd_s;
 791     int errno;
 792
 793     if ((errno = vfs_getfd(fd, &fd_s))) {
 794         goto done;
 795     }
 796
 797     struct v_inode* inode = fd_s->file->inode;
 798
 799     lock_inode(inode);
 800
 801     if (!check_directory_node(inode)) {
 802         errno = ENOTDIR;
 803         goto unlock;
 804     }
 805
 806     struct dir_context dctx = (struct dir_context) {
 807         .cb_data = dent,
 808         .read_complete_callback = __vfs_readdir_callback
 809     };
 810
 811     if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
 812         goto unlock;
 813     }
 814     dent->d_offset++;
 815     fd_s->file->f_pos++;
 816
 817 unlock:
 818     unlock_inode(inode);
 819
 820 done:
 821     return DO_STATUS_OR_RETURN(errno);
 822 }
 823
 824 __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
 825 {
 826     int errno = 0;
 827     struct v_fd* fd_s;
 828     if ((errno = vfs_getfd(fd, &fd_s))) {
 829         goto done;
 830     }
 831
 832     struct v_file* file = fd_s->file;
 833     if (check_directory_node(file->inode)) {
 834         errno = EISDIR;
 835         goto done;
 836     }
 837
 838     lock_inode(file->inode);
 839
 840     file->inode->atime = clock_unixtime();
 841
 842     if (check_seqdev_node(file->inode) || (fd_s->flags & FO_DIRECT)) {
 843         errno = file->ops->read(file->inode, buf, count, file->f_pos);
 844     } else {
 845         errno = pcache_read(file->inode, buf, count, file->f_pos);
 846     }
 847
 848     if (errno > 0) {
 849         file->f_pos += errno;
 850         unlock_inode(file->inode);
 851         return errno;
 852     }
 853
 854     unlock_inode(file->inode);
 855
 856 done:
 857     return DO_STATUS(errno);
 858 }
 859
 860 __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
 861 {
 862     int errno = 0;
 863     struct v_fd* fd_s;
 864     if ((errno = vfs_getfd(fd, &fd_s))) {
 865         goto done;
 866     }
 867
 868     struct v_inode* inode;
 869     struct v_file* file = fd_s->file;
 870
 871     if ((errno = vfs_check_writable(file->dnode))) {
 872         goto done;
 873     }
 874
 875     if (check_directory_node(file->inode)) {
 876         errno = EISDIR;
 877         goto done;
 878     }
 879
 880     inode = file->inode;
 881     lock_inode(inode);
 882
 883     inode->mtime = clock_unixtime();
 884     if ((fd_s->flags & O_APPEND)) {
 885         file->f_pos = inode->fsize;
 886     }
 887
 888     if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
 889         errno = file->ops->write(inode, buf, count, file->f_pos);
 890     } else {
 891         errno = pcache_write(inode, buf, count, file->f_pos);
 892     }
 893
 894     if (errno > 0) {
 895         file->f_pos += errno;
 896         inode->fsize = MAX(inode->fsize, file->f_pos);
 897
 898         unlock_inode(inode);
 899         return errno;
 900     }
 901
 902     unlock_inode(inode);
 903
 904 done:
 905     return DO_STATUS(errno);
 906 }
 907
 908 __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options)
 909 {
 910     int errno = 0;
 911     struct v_fd* fd_s;
 912     if ((errno = vfs_getfd(fd, &fd_s))) {
 913         goto done;
 914     }
 915
 916     struct v_file* file = fd_s->file;
 917     struct v_inode* inode = file->inode;
 918
 919     if (!file->ops->seek) {
 920         errno = ENOTSUP;
 921         goto done;
 922     }
 923
 924     lock_inode(inode);
 925
 926     int overflow = 0;
 927     int fpos = file->f_pos;
 928
 929     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 930         options = (options != FSEEK_END) ? options : FSEEK_SET;
 931     }
 932
 933     switch (options) {
 934         case FSEEK_CUR:
 935             overflow = sadd_of((int)file->f_pos, offset, &fpos);
 936             break;
 937         case FSEEK_END:
 938             overflow = sadd_of((int)inode->fsize, offset, &fpos);
 939             break;
 940         case FSEEK_SET:
 941             fpos = offset;
 942             break;
 943     }
 944
 945     if (overflow) {
 946         errno = EOVERFLOW;
 947     }
 948     else {
 949         errno = file->ops->seek(file, fpos);
 950     }
 951
 952     unlock_inode(inode);
 953
 954 done:
 955     return DO_STATUS(errno);
 956 }
 957
 958 int
 959 vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth)
 960 {
 961     if (!dnode) {
 962         return 0;
 963     }
 964
 965     if (depth > 64) {
 966         return ENAMETOOLONG;
 967     }
 968
 969     size_t len = 0;
 970
 971     if (dnode->parent != dnode) {
 972         len = vfs_get_path(dnode->parent, buf, size, depth + 1);
 973     }
 974
 975     if (len >= size) {
 976         return len;
 977     }
 978
 979     if (!len || buf[len - 1] != VFS_PATH_DELIM) {
 980         buf[len++] = VFS_PATH_DELIM;
 981     }
 982
 983     size_t cpy_size = MIN(dnode->name.len, size - len);
 984     strncpy(buf + len, dnode->name.value, cpy_size);
 985     len += cpy_size;
 986
 987     return len;
 988 }
 989
 990 int
 991 vfs_readlink(struct v_dnode* dnode, char* buf, size_t size)
 992 {
 993     const char* link;
 994     struct v_inode* inode = dnode->inode;
 995
 996     if (!check_symlink_node(inode)) {
 997         return EINVAL;
 998     }
 999
1000     if (!inode->ops->read_symlink) {
1001         return ENOTSUP;
1002     }
1003
1004     lock_inode(inode);
1005
1006     int errno = inode->ops->read_symlink(inode, &link);
1007     if (errno >= 0) {
1008         strncpy(buf, link, MIN(size, (size_t)errno));
1009     }
1010
1011     unlock_inode(inode);
1012     return errno;
1013 }
1014
1015 int
1016 vfs_get_dtype(int itype)
1017 {
1018     int dtype = DT_FILE;
1019     if (check_itype(itype, VFS_IFSYMLINK)) {
1020         dtype |= DT_SYMLINK;
1021     }
1022
1023     if (check_itype(itype, VFS_IFDIR)) {
1024         dtype |= DT_DIR;
1025         return dtype;
1026     }
1027
1028     // TODO other types
1029
1030     return dtype;
1031 }
1032
1033 __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
1034 {
1035     int errno;
1036     struct v_fd* fd_s;
1037     if ((errno = vfs_getfd(fd, &fd_s))) {
1038         goto done;
1039     }
1040
1041     struct v_dnode* dnode;
1042     errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
1043
1044     if (errno >= 0) {
1045         return errno;
1046     }
1047
1048 done:
1049     return DO_STATUS(errno);
1050 }
1051
1052 __DEFINE_LXSYSCALL3(int, readlink, const char*, path, char*, buf, size_t, size)
1053 {
1054     int errno;
1055     struct v_dnode* dnode;
1056     if (!(errno = vfs_walk_proc(path, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1057         errno = vfs_readlink(dnode, buf, size);
1058     }
1059
1060     if (errno >= 0) {
1061         return errno;
1062     }
1063
1064     return DO_STATUS(errno);
1065 }
1066
1067 __DEFINE_LXSYSCALL4(
1068   int, readlinkat, int, dirfd, const char*, pathname, char*, buf, size_t, size)
1069 {
1070     int errno;
1071     struct v_fd* fd_s;
1072     if ((errno = vfs_getfd(dirfd, &fd_s))) {
1073         goto done;
1074     }
1075
1076     pathname = pathname ? pathname : "";
1077
1078     struct v_dnode* dnode;
1079     if (!(errno = vfs_walk(
1080             fd_s->file->dnode, pathname, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1081         errno = vfs_readlink(fd_s->file->dnode, buf, size);
1082     }
1083
1084     if (errno >= 0) {
1085         return errno;
1086     }
1087
1088 done:
1089     return DO_STATUS(errno);
1090 }
1091
1092 /*
1093     NOTE
1094     When we perform operation that could affect the layout of
1095     directory (i.e., rename, mkdir, rmdir). We must lock the parent dir
1096     whenever possible. This will blocking any ongoing path walking to reach
1097     it hence avoid any partial state.
1098 */
1099
1100 __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
1101 {
1102     int errno;
1103     struct v_dnode* dnode;
1104     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1105         return DO_STATUS(errno);
1106     }
1107
1108     lock_dnode(dnode);
1109
1110     if ((errno = vfs_check_writable(dnode))) {
1111         goto done;
1112     }
1113
1114     if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
1115         errno = EROFS;
1116         goto done;
1117     }
1118
1119     if (dnode->ref_count > 1 || dnode->inode->open_count) {
1120         errno = EBUSY;
1121         goto done;
1122     }
1123
1124     if (!llist_empty(&dnode->children)) {
1125         errno = ENOTEMPTY;
1126         goto done;
1127     }
1128
1129     struct v_dnode* parent = dnode->parent;
1130
1131     if (!parent) {
1132         errno = EINVAL;
1133         goto done;
1134     }
1135
1136     lock_dnode(parent);
1137     lock_inode(parent->inode);
1138
1139     if (check_directory_node(dnode->inode)) {
1140         errno = parent->inode->ops->rmdir(parent->inode, dnode);
1141         if (!errno) {
1142             vfs_dcache_remove(dnode);
1143         }
1144     } else {
1145         errno = ENOTDIR;
1146     }
1147
1148     unlock_inode(parent->inode);
1149     unlock_dnode(parent);
1150
1151 done:
1152     unlock_dnode(dnode);
1153     return DO_STATUS(errno);
1154 }
1155
1156 __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
1157 {
1158     int errno = 0;
1159     struct v_dnode *parent, *dir;
1160     char name_value[VFS_NAME_MAXLEN];
1161     struct hstr name = HHSTR(name_value, 0, 0);
1162
1163     if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
1164         goto done;
1165     }
1166
1167     if (!(errno = vfs_walk(parent, name_value, &dir, NULL, 0))) {
1168         errno = EEXIST;
1169         goto done;
1170     }
1171
1172     if ((errno = vfs_check_writable(parent))) {
1173         goto done;
1174     }
1175
1176     if (!(dir = vfs_d_alloc(parent, &name))) {
1177         errno = ENOMEM;
1178         goto done;
1179     }
1180
1181     struct v_inode* inode = parent->inode;
1182
1183     lock_dnode(parent);
1184     lock_inode(inode);
1185
1186     if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
1187         errno = ENOTSUP;
1188     } else if (!inode->ops->mkdir) {
1189         errno = ENOTSUP;
1190     } else if (!check_directory_node(inode)) {
1191         errno = ENOTDIR;
1192     } else if (!(errno = inode->ops->mkdir(inode, dir))) {
1193         vfs_dcache_add(parent, dir);
1194         goto cleanup;
1195     }
1196
1197     vfs_d_free(dir);
1198
1199 cleanup:
1200     unlock_inode(inode);
1201     unlock_dnode(parent);
1202 done:
1203     return DO_STATUS(errno);
1204 }
1205
1206 int
1207 __vfs_do_unlink(struct v_dnode* dnode)
1208 {
1209     int errno;
1210     struct v_inode* inode = dnode->inode;
1211
1212     if (dnode->ref_count > 1) {
1213         return EBUSY;
1214     }
1215
1216     if ((errno = vfs_check_writable(dnode))) {
1217         return errno;
1218     }
1219
1220     lock_inode(inode);
1221
1222     if (inode->open_count) {
1223         errno = EBUSY;
1224     } else if (!check_directory_node(inode)) {
1225         errno = inode->ops->unlink(inode, dnode);
1226         if (!errno) {
1227             vfs_d_free(dnode);
1228         }
1229     } else {
1230         errno = EISDIR;
1231     }
1232
1233     unlock_inode(inode);
1234
1235     return errno;
1236 }
1237
1238 __DEFINE_LXSYSCALL1(int, unlink, const char*, pathname)
1239 {
1240     int errno;
1241     struct v_dnode* dnode;
1242     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1243         goto done;
1244     }
1245
1246     errno = __vfs_do_unlink(dnode);
1247
1248 done:
1249     return DO_STATUS(errno);
1250 }
1251
1252 __DEFINE_LXSYSCALL2(int, unlinkat, int, fd, const char*, pathname)
1253 {
1254     int errno;
1255     struct v_fd* fd_s;
1256     if ((errno = vfs_getfd(fd, &fd_s))) {
1257         goto done;
1258     }
1259
1260     struct v_dnode* dnode;
1261     if (!(errno = vfs_walk(fd_s->file->dnode, pathname, &dnode, NULL, 0))) {
1262         errno = __vfs_do_unlink(dnode);
1263     }
1264
1265 done:
1266     return DO_STATUS(errno);
1267 }
1268
1269 __DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
1270 {
1271     int errno;
1272     struct file_locator floc;
1273     struct v_dnode *to_link, *name_file;
1274
1275     errno = __vfs_try_locate_file(oldpath, &floc, 0);
1276     if (errno) {
1277         goto done;
1278     }
1279
1280     __floc_try_unlock(&floc);
1281
1282     to_link = floc.file;
1283     errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
1284     if (!errno) {
1285         goto done;
1286     }
1287
1288     name_file = floc.file;
1289     errno = vfs_link(to_link, name_file);
1290     if (errno) {
1291         vfs_d_free(name_file);
1292     }
1293
1294 done:
1295     __floc_try_unlock(&floc);
1296     return DO_STATUS(errno);
1297 }
1298
1299 __DEFINE_LXSYSCALL1(int, fsync, int, fildes)
1300 {
1301     int errno;
1302     struct v_fd* fd_s;
1303
1304     if (!(errno = vfs_getfd(fildes, &fd_s))) {
1305         errno = vfs_fsync(fd_s->file);
1306     }
1307
1308     return DO_STATUS(errno);
1309 }
1310
1311 int
1312 vfs_dup_fd(struct v_fd* old, struct v_fd** new)
1313 {
1314     int errno = 0;
1315     struct v_fd* copied = cake_grab(fd_pile);
1316
1317     memcpy(copied, old, sizeof(struct v_fd));
1318
1319     atomic_fetch_add(&old->file->ref_count, 1);
1320
1321     *new = copied;
1322
1323     return errno;
1324 }
1325
1326 int
1327 vfs_dup2(int oldfd, int newfd)
1328 {
1329     if (newfd == oldfd) {
1330         return newfd;
1331     }
1332
1333     int errno;
1334     struct v_fd *oldfd_s, *newfd_s;
1335     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1336         goto done;
1337     }
1338
1339     if (!TEST_FD(newfd)) {
1340         errno = EBADF;
1341         goto done;
1342     }
1343
1344     newfd_s = __current->fdtable->fds[newfd];
1345     if (newfd_s && (errno = vfs_close(newfd_s->file))) {
1346         goto done;
1347     }
1348
1349     if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1350         __current->fdtable->fds[newfd] = newfd_s;
1351         return newfd;
1352     }
1353
1354 done:
1355     return DO_STATUS(errno);
1356 }
1357
1358 __DEFINE_LXSYSCALL2(int, dup2, int, oldfd, int, newfd)
1359 {
1360     return vfs_dup2(oldfd, newfd);
1361 }
1362
1363 __DEFINE_LXSYSCALL1(int, dup, int, oldfd)
1364 {
1365     int errno, newfd;
1366     struct v_fd *oldfd_s, *newfd_s;
1367     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1368         goto done;
1369     }
1370
1371     if (!(errno = vfs_alloc_fdslot(&newfd)) &&
1372         !(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1373         __current->fdtable->fds[newfd] = newfd_s;
1374         return newfd;
1375     }
1376
1377 done:
1378     return DO_STATUS(errno);
1379 }
1380
1381 __DEFINE_LXSYSCALL2(
1382   int, symlink, const char*, pathname, const char*, link_target)
1383 {
1384     int errno;
1385     struct file_locator floc;
1386     struct v_dnode *file;
1387     struct v_inode *f_ino;
1388
1389     errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
1390     if (errno) {
1391         goto done;
1392     }
1393
1394     file = floc.file;
1395     errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
1396     if (errno) {
1397         vfs_d_free(file);
1398         goto done;
1399     }
1400
1401     f_ino = file->inode;
1402
1403     assert(f_ino);
1404
1405     errno = vfs_check_writable(file);
1406     if (errno) {
1407         goto done;
1408     }
1409
1410     if (!f_ino->ops->set_symlink) {
1411         errno = ENOTSUP;
1412         goto done;
1413     }
1414
1415     lock_inode(f_ino);
1416
1417     errno = f_ino->ops->set_symlink(f_ino, link_target);
1418
1419     unlock_inode(f_ino);
1420
1421 done:
1422     __floc_try_unlock(&floc);
1423     return DO_STATUS(errno);
1424 }
1425
1426 void
1427 vfs_ref_file(struct v_file* file)
1428 {
1429     atomic_fetch_add(&file->ref_count, 1);
1430 }
1431
1432 void
1433 vfs_ref_dnode(struct v_dnode* dnode)
1434 {
1435     atomic_fetch_add(&dnode->ref_count, 1);
1436
1437     if (dnode->mnt) {
1438         mnt_mkbusy(dnode->mnt);
1439     }
1440 }
1441
1442 void
1443 vfs_unref_dnode(struct v_dnode* dnode)
1444 {
1445     atomic_fetch_sub(&dnode->ref_count, 1);
1446     if (dnode->mnt) {
1447         mnt_chillax(dnode->mnt);
1448     }
1449 }
1450
1451 int
1452 vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode)
1453 {
1454     int errno = 0;
1455
1456     lock_dnode(dnode);
1457
1458     if (!check_directory_node(dnode->inode)) {
1459         errno = ENOTDIR;
1460         goto done;
1461     }
1462
1463     if (proc->cwd) {
1464         vfs_unref_dnode(proc->cwd);
1465     }
1466
1467     vfs_ref_dnode(dnode);
1468     proc->cwd = dnode;
1469
1470     unlock_dnode(dnode);
1471
1472 done:
1473     return errno;
1474 }
1475
1476 __DEFINE_LXSYSCALL1(int, chdir, const char*, path)
1477 {
1478     struct v_dnode* dnode;
1479     int errno = 0;
1480
1481     if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1482         goto done;
1483     }
1484
1485     errno = vfs_do_chdir((struct proc_info*)__current, dnode);
1486
1487 done:
1488     return DO_STATUS(errno);
1489 }
1490
1491 __DEFINE_LXSYSCALL1(int, fchdir, int, fd)
1492 {
1493     struct v_fd* fd_s;
1494     int errno = 0;
1495
1496     if ((errno = vfs_getfd(fd, &fd_s))) {
1497         goto done;
1498     }
1499
1500     errno = vfs_do_chdir((struct proc_info*)__current, fd_s->file->dnode);
1501
1502 done:
1503     return DO_STATUS(errno);
1504 }
1505
1506 __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size)
1507 {
1508     int errno = 0;
1509     char* ret_ptr = 0;
1510     if (size < 2) {
1511         errno = ERANGE;
1512         goto done;
1513     }
1514
1515     size_t len = 0;
1516
1517     if (!__current->cwd) {
1518         *buf = VFS_PATH_DELIM;
1519         len = 1;
1520     } else {
1521         len = vfs_get_path(__current->cwd, buf, size, 0);
1522         if (len == size) {
1523             errno = ERANGE;
1524             goto done;
1525         }
1526     }
1527
1528     buf[len] = '\0';
1529
1530     ret_ptr = buf;
1531
1532 done:
1533     syscall_result(errno);
1534     return ret_ptr;
1535 }
1536
1537 int
1538 vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
1539 {
1540     int errno = 0;
1541     if (current->inode->id == target->inode->id) {
1542         // hard link
1543         return 0;
1544     }
1545
1546     if ((errno = vfs_check_writable(current))) {
1547         return errno;
1548     }
1549
1550     if (current->ref_count > 1 || target->ref_count > 1) {
1551         return EBUSY;
1552     }
1553
1554     if (current->super_block != target->super_block) {
1555         return EXDEV;
1556     }
1557
1558     struct v_dnode* oldparent = current->parent;
1559     struct v_dnode* newparent = target->parent;
1560
1561     lock_dnode(current);
1562     lock_dnode(target);
1563     if (oldparent)
1564         lock_dnode(oldparent);
1565     if (newparent)
1566         lock_dnode(newparent);
1567
1568     if (!llist_empty(&target->children)) {
1569         errno = ENOTEMPTY;
1570         unlock_dnode(target);
1571         goto cleanup;
1572     }
1573
1574     if ((errno =
1575            current->inode->ops->rename(current->inode, current, target))) {
1576         unlock_dnode(target);
1577         goto cleanup;
1578     }
1579
1580     // re-position current
1581     hstrcpy(&current->name, &target->name);
1582     vfs_dcache_rehash(newparent, current);
1583
1584     // detach target
1585     vfs_d_free(target);
1586
1587     unlock_dnode(target);
1588
1589 cleanup:
1590     unlock_dnode(current);
1591     if (oldparent)
1592         unlock_dnode(oldparent);
1593     if (newparent)
1594         unlock_dnode(newparent);
1595
1596     return errno;
1597 }
1598
1599 __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
1600 {
1601     struct v_dnode *cur, *target_parent, *target;
1602     struct hstr name = HSTR(valloc(VFS_NAME_MAXLEN), 0);
1603     int errno = 0;
1604
1605     if ((errno = vfs_walk_proc(oldpath, &cur, NULL, 0))) {
1606         goto done;
1607     }
1608
1609     if ((errno = vfs_walk(
1610            __current->cwd, newpath, &target_parent, &name, VFS_WALK_PARENT))) {
1611         goto done;
1612     }
1613
1614     errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
1615     if (errno == ENOENT) {
1616         target = vfs_d_alloc(target_parent, &name);
1617         vfs_dcache_add(target_parent, target);
1618     } else if (errno) {
1619         goto done;
1620     }
1621
1622     if (!target) {
1623         errno = ENOMEM;
1624         goto done;
1625     }
1626
1627     errno = vfs_do_rename(cur, target);
1628
1629 done:
1630     vfree((void*)name.value);
1631     return DO_STATUS(errno);
1632 }
1633
1634 __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat)
1635 {
1636     int errno = 0;
1637     struct v_fd* fds;
1638
1639     if ((errno = vfs_getfd(fd, &fds))) {
1640         goto done;
1641     }
1642
1643     struct v_inode* vino = fds->file->inode;
1644     struct device* fdev = vino->sb->dev;
1645
1646     *stat = (struct file_stat){.st_ino = vino->id,
1647                                .st_blocks = vino->lb_usage,
1648                                .st_size = vino->fsize,
1649                                .mode = vino->itype,
1650                                .st_ioblksize = PAGE_SIZE,
1651                                .st_blksize = vino->sb->blksize};
1652
1653     if (check_device_node(vino)) {
1654         struct device* rdev = resolve_device(vino->data);
1655         if (!rdev || rdev->magic != DEV_STRUCT_MAGIC) {
1656             errno = EINVAL;
1657             goto done;
1658         }
1659
1660         stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp,
1661                                 .unique = rdev->ident.unique,
1662                                 .index = rdev->dev_uid};
1663     }
1664
1665     if (fdev) {
1666         stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp,
1667                                .unique = fdev->ident.unique,
1668                                .index = fdev->dev_uid};
1669     }
1670
1671 done:
1672     return DO_STATUS(errno);
1673 }