lunaix-os/kernel/fs/vfs.c

   1 /**
   2  * @file vfs.c
   3  * @author Lunaixsky (zelong56@gmail.com)
   4  * @brief Lunaix virtual file system - an abstraction layer for all file system.
   5  * @version 0.1
   6  * @date 2022-07-24
   7  *
   8  * @copyright Copyright (c) 2022
   9  *
  10  */
  11
  12 // Welcome to The Mountain O'Shit! :)
  13
  14 /*
  15  TODO vfs & device todos checklist
  16
  17     It is overseen by Twilight Sparkle ;)
  18
  19  1. Get inodes hooked into lru (CHECKED)
  20  2. Get dnodes hooked into lru (CHECKED)
  21  3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
  22  4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
  23         [good idea] or a constructor/destructor pattern in cake allocator ?
  24  5. (mount) Figure out a way to identify a busy mount point before unmount
  25             maybe a unified mount_point structure that maintain a referencing
  26             counter on any dnodes within the subtree? Such a counter will only
  27             increament if a file is opened or a dnode is being used as working
  28             directory and decreamenting conversely. (CHECKED)
  29  6. (mount) Ability to track all mount points (including sub-mounts)
  30             so we can be confident to clean up everything when we
  31             unmount. (CHECKED)
  32  7. (mount) Figure out a way to acquire the device represented by a dnode.
  33             so it can be used to mount. (e.g. we wish to get `struct device*`
  34             out of the dnode at /dev/sda)
  35             [tip] we should pay attention at twifs and add a private_data field
  36             under struct v_dnode? (CHECKED)
  37  8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
  38  9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
  39                     image file using a so called "loopback" pseudo device. Maybe
  40                     we can do similar thing in Lunaix? A block device emulation
  41                     above the regular file when we mount it on.
  42  10. (device) device number (dev_t) allocation
  43             [good idea] <class>:<subclass>:<uniq_id> composition (CHECKED)
  44 */
  45
  46 #include <klibc/string.h>
  47 #include <lunaix/foptions.h>
  48 #include <lunaix/fs.h>
  49 #include <lunaix/mm/cake.h>
  50 #include <lunaix/mm/valloc.h>
  51 #include <lunaix/process.h>
  52 #include <lunaix/spike.h>
  53 #include <lunaix/syscall.h>
  54 #include <lunaix/syscall_utils.h>
  55
  56 #include <lunaix/fs/twifs.h>
  57
  58 #include <usr/lunaix/dirent_defs.h>
  59
  60 static struct cake_pile* dnode_pile;
  61 static struct cake_pile* inode_pile;
  62 static struct cake_pile* file_pile;
  63 static struct cake_pile* superblock_pile;
  64 static struct cake_pile* fd_pile;
  65
  66 struct v_dnode* vfs_sysroot;
  67
  68 struct lru_zone *dnode_lru, *inode_lru;
  69
  70 struct hstr vfs_ddot = HSTR("..", 2);
  71 struct hstr vfs_dot = HSTR(".", 1);
  72 struct hstr vfs_empty = HSTR("", 0);
  73
  74 static int
  75 __vfs_try_evict_dnode(struct lru_node* obj);
  76
  77 static int
  78 __vfs_try_evict_inode(struct lru_node* obj);
  79
  80 void
  81 vfs_init()
  82 {
  83     // 为他们专门创建一个蛋糕堆，而不使用valloc，这样我们可以最小化内碎片的产生
  84     dnode_pile = cake_new_pile("dnode_cache", sizeof(struct v_dnode), 1, 0);
  85     inode_pile = cake_new_pile("inode_cache", sizeof(struct v_inode), 1, 0);
  86     file_pile = cake_new_pile("file_cache", sizeof(struct v_file), 1, 0);
  87     fd_pile = cake_new_pile("fd_cache", sizeof(struct v_fd), 1, 0);
  88     superblock_pile =
  89       cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
  90
  91     dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
  92     inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
  93
  94     hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
  95     hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
  96
  97     // 创建一个根dnode。
  98     vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty);
  99     vfs_sysroot->parent = vfs_sysroot;
 100
 101     vfs_ref_dnode(vfs_sysroot);
 102 }
 103
 104 static inline struct hbucket*
 105 __dcache_hash(struct v_dnode* parent, u32_t* hash)
 106 {
 107     struct hbucket* d_cache;
 108     u32_t _hash;
 109
 110     d_cache = parent->super_block->d_cache;
 111     _hash = *hash;
 112     _hash = _hash ^ (_hash >> VFS_HASHBITS);
 113     _hash += (u32_t)__ptr(parent);
 114
 115     *hash = _hash;
 116     return &d_cache[_hash & VFS_HASH_MASK];
 117 }
 118
 119 static inline int
 120 __sync_inode_nolock(struct v_inode* inode)
 121 {
 122     pcache_commit_all(inode);
 123
 124     int errno = ENOTSUP;
 125     if (inode->ops->sync) {
 126         errno = inode->ops->sync(inode);
 127     }
 128
 129     return errno;
 130 }
 131
 132 struct v_dnode*
 133 vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
 134 {
 135     if (!str->len || HSTR_EQ(str, &vfs_dot))
 136         return parent;
 137
 138     if (HSTR_EQ(str, &vfs_ddot)) {
 139         return parent->parent;
 140     }
 141
 142     u32_t hash = str->hash;
 143     struct hbucket* slot = __dcache_hash(parent, &hash);
 144
 145     struct v_dnode *pos, *n;
 146     hashtable_bucket_foreach(slot, pos, n, hash_list)
 147     {
 148         if (pos->name.hash == hash && pos->parent == parent) {
 149             return pos;
 150         }
 151     }
 152     return NULL;
 153 }
 154
 155 void
 156 vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
 157 {
 158     assert(parent);
 159
 160     dnode->ref_count = 1;
 161     dnode->parent = parent;
 162     llist_append(&parent->children, &dnode->siblings);
 163
 164     struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
 165     hlist_add(&bucket->head, &dnode->hash_list);
 166 }
 167
 168 void
 169 vfs_dcache_remove(struct v_dnode* dnode)
 170 {
 171     assert(dnode);
 172     assert(dnode->ref_count == 1);
 173
 174     llist_delete(&dnode->siblings);
 175     llist_delete(&dnode->aka_list);
 176     hlist_delete(&dnode->hash_list);
 177
 178     dnode->parent = NULL;
 179     dnode->ref_count = 0;
 180 }
 181
 182 void
 183 vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
 184 {
 185     assert(new_parent);
 186
 187     hstr_rehash(&dnode->name, HSTR_FULL_HASH);
 188     vfs_dcache_remove(dnode);
 189     vfs_dcache_add(new_parent, dnode);
 190 }
 191
 192 int
 193 vfs_open(struct v_dnode* dnode, struct v_file** file)
 194 {
 195     struct v_inode* inode = dnode->inode;
 196
 197     if (!inode || !inode->ops->open) {
 198         return ENOTSUP;
 199     }
 200
 201     lock_inode(inode);
 202
 203     struct v_file* vfile = cake_grab(file_pile);
 204     memset(vfile, 0, sizeof(*vfile));
 205
 206     vfile->dnode = dnode;
 207     vfile->inode = inode;
 208     vfile->ref_count = 1;
 209     vfile->ops = inode->default_fops;
 210
 211     if (check_regfile_node(inode) && !inode->pg_cache) {
 212         struct pcache* pcache = vzalloc(sizeof(struct pcache));
 213         pcache_init(pcache);
 214         pcache->master = inode;
 215         inode->pg_cache = pcache;
 216     }
 217
 218     int errno = inode->ops->open(inode, vfile);
 219     if (errno) {
 220         cake_release(file_pile, vfile);
 221     } else {
 222         vfs_ref_dnode(dnode);
 223         inode->open_count++;
 224
 225         *file = vfile;
 226     }
 227
 228     unlock_inode(inode);
 229
 230     return errno;
 231 }
 232
 233 void
 234 vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
 235 {
 236     if (assign_to->inode) {
 237         llist_delete(&assign_to->aka_list);
 238         assign_to->inode->link_count--;
 239     }
 240
 241     llist_append(&inode->aka_dnodes, &assign_to->aka_list);
 242     assign_to->inode = inode;
 243     inode->link_count++;
 244 }
 245
 246 int
 247 vfs_link(struct v_dnode* to_link, struct v_dnode* name)
 248 {
 249     int errno;
 250
 251     if ((errno = vfs_check_writable(to_link))) {
 252         return errno;
 253     }
 254
 255     lock_inode(to_link->inode);
 256     if (to_link->super_block->root != name->super_block->root) {
 257         errno = EXDEV;
 258     } else if (!to_link->inode->ops->link) {
 259         errno = ENOTSUP;
 260     } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
 261         vfs_assign_inode(name, to_link->inode);
 262     }
 263     unlock_inode(to_link->inode);
 264
 265     return errno;
 266 }
 267
 268 int
 269 vfs_pclose(struct v_file* file, pid_t pid)
 270 {
 271     struct v_inode* inode;
 272     int errno = 0;
 273
 274     inode = file->inode;
 275
 276     /*
 277      * Prevent dead lock.
 278      * This happened when process is terminated while blocking on read.
 279      * In that case, the process is still holding the inode lock and it
 280          will never get released.
 281      * The unlocking should also include ownership check.
 282      *
 283      * To see why, consider two process both open the same file both with
 284      * fd=x.
 285      *      Process A: busy on reading x
 286      *      Process B: do nothing with x
 287      * Assuming that, after a very short time, process B get terminated
 288      * while process A is still busy in it's reading business. By this
 289      * design, the inode lock of this file x is get released by B rather
 290      * than A. And this will cause a probable race condition on A if other
 291      * process is writing to this file later after B exit.
 292     */
 293
 294     mutex_unlock_for(&inode->lock, pid);
 295
 296     if (vfs_check_duped_file(file)) {
 297         vfs_unref_file(file);
 298         return 0;
 299     }
 300
 301     if ((errno = file->ops->close(file))) {
 302         goto done;
 303     }
 304
 305     vfs_unref_dnode(file->dnode);
 306     cake_release(file_pile, file);
 307
 308     /*
 309         if the current inode is not being locked by other
 310         threads that does not share same open context,
 311         then we can try to do sync opportunistically
 312     */
 313     if (mutex_on_hold(&inode->lock)) {
 314         goto done;
 315     }
 316
 317     lock_inode(inode);
 318
 319     pcache_commit_all(inode);
 320     inode->open_count--;
 321
 322     if (!inode->open_count) {
 323         __sync_inode_nolock(inode);
 324     }
 325
 326     unlock_inode(inode);
 327
 328 done:
 329     return errno;
 330 }
 331
 332 int
 333 vfs_close(struct v_file* file)
 334 {
 335     return vfs_pclose(file, __current->pid);
 336 }
 337
 338 void
 339 vfs_free_fd(struct v_fd* fd)
 340 {
 341     cake_release(fd_pile, fd);
 342 }
 343
 344 int
 345 vfs_isync(struct v_inode* inode)
 346 {
 347     lock_inode(inode);
 348
 349     int errno = __sync_inode_nolock(inode);
 350
 351     unlock_inode(inode);
 352
 353     return errno;
 354 }
 355
 356 int
 357 vfs_fsync(struct v_file* file)
 358 {
 359     int errno;
 360     if ((errno = vfs_check_writable(file->dnode))) {
 361         return errno;
 362     }
 363
 364     return vfs_isync(file->inode);
 365 }
 366
 367 int
 368 vfs_alloc_fdslot(int* fd)
 369 {
 370     for (size_t i = 0; i < VFS_MAX_FD; i++) {
 371         if (!__current->fdtable->fds[i]) {
 372             *fd = i;
 373             return 0;
 374         }
 375     }
 376     return EMFILE;
 377 }
 378
 379 struct v_superblock*
 380 vfs_sb_alloc()
 381 {
 382     struct v_superblock* sb = cake_grab(superblock_pile);
 383     memset(sb, 0, sizeof(*sb));
 384     llist_init_head(&sb->sb_list);
 385
 386     sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
 387     sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
 388
 389     sb->ref_count = 1;
 390     return sb;
 391 }
 392
 393 void
 394 vfs_sb_ref(struct v_superblock* sb)
 395 {
 396     sb->ref_count++;
 397 }
 398
 399 void
 400 vfs_sb_unref(struct v_superblock* sb)
 401 {
 402     assert(sb->ref_count);
 403
 404     sb->ref_count--;
 405     if (likely(sb->ref_count)) {
 406         return;
 407     }
 408
 409     if (sb->ops.release) {
 410         sb->ops.release(sb);
 411     }
 412
 413     vfree(sb->i_cache);
 414     vfree(sb->d_cache);
 415
 416     cake_release(superblock_pile, sb);
 417 }
 418
 419 static int
 420 __vfs_try_evict_dnode(struct lru_node* obj)
 421 {
 422     struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
 423
 424     if (!dnode->ref_count) {
 425         vfs_d_free(dnode);
 426         return 1;
 427     }
 428     return 0;
 429 }
 430
 431 static int
 432 __vfs_try_evict_inode(struct lru_node* obj)
 433 {
 434     struct v_inode* inode = container_of(obj, struct v_inode, lru);
 435
 436     if (!inode->link_count && !inode->open_count) {
 437         vfs_i_free(inode);
 438         return 1;
 439     }
 440     return 0;
 441 }
 442
 443 struct v_dnode*
 444 vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
 445 {
 446     struct v_dnode* dnode = cake_grab(dnode_pile);
 447     if (!dnode) {
 448         lru_evict_half(dnode_lru);
 449
 450         if (!(dnode = cake_grab(dnode_pile))) {
 451             return NULL;
 452         }
 453     }
 454
 455     memset(dnode, 0, sizeof(*dnode));
 456     llist_init_head(&dnode->children);
 457     llist_init_head(&dnode->siblings);
 458     llist_init_head(&dnode->aka_list);
 459     mutex_init(&dnode->lock);
 460
 461     dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
 462
 463     hstrcpy(&dnode->name, name);
 464
 465     if (parent) {
 466         vfs_d_assign_sb(dnode, parent->super_block);
 467         dnode->mnt = parent->mnt;
 468     }
 469
 470     lru_use_one(dnode_lru, &dnode->lru);
 471
 472     return dnode;
 473 }
 474
 475 void
 476 vfs_d_free(struct v_dnode* dnode)
 477 {
 478     assert(dnode->ref_count == 1);
 479
 480     if (dnode->inode) {
 481         assert(dnode->inode->link_count > 0);
 482         dnode->inode->link_count--;
 483     }
 484
 485     vfs_dcache_remove(dnode);
 486     // Make sure the children de-referencing their parent.
 487     // With lru presented, the eviction will be propagated over the entire
 488     // detached subtree eventually
 489     struct v_dnode *pos, *n;
 490     llist_for_each(pos, n, &dnode->children, siblings)
 491     {
 492         vfs_dcache_remove(pos);
 493     }
 494
 495     if (dnode->destruct) {
 496         dnode->destruct(dnode);
 497     }
 498
 499     vfs_sb_unref(dnode->super_block);
 500     vfree((void*)dnode->name.value);
 501     cake_release(dnode_pile, dnode);
 502 }
 503
 504 struct v_inode*
 505 vfs_i_find(struct v_superblock* sb, u32_t i_id)
 506 {
 507     struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
 508     struct v_inode *pos, *n;
 509     hashtable_bucket_foreach(slot, pos, n, hash_list)
 510     {
 511         if (pos->id == i_id) {
 512             lru_use_one(inode_lru, &pos->lru);
 513             return pos;
 514         }
 515     }
 516
 517     return NULL;
 518 }
 519
 520 void
 521 vfs_i_addhash(struct v_inode* inode)
 522 {
 523     struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
 524
 525     hlist_delete(&inode->hash_list);
 526     hlist_add(&slot->head, &inode->hash_list);
 527 }
 528
 529 struct v_inode*
 530 vfs_i_alloc(struct v_superblock* sb)
 531 {
 532     assert(sb->ops.init_inode);
 533
 534     struct v_inode* inode;
 535     if (!(inode = cake_grab(inode_pile))) {
 536         lru_evict_half(inode_lru);
 537         if (!(inode = cake_grab(inode_pile))) {
 538             return NULL;
 539         }
 540     }
 541
 542     memset(inode, 0, sizeof(*inode));
 543     mutex_init(&inode->lock);
 544     llist_init_head(&inode->xattrs);
 545     llist_init_head(&inode->aka_dnodes);
 546
 547     sb->ops.init_inode(sb, inode);
 548
 549     inode->ctime = clock_unixtime();
 550     inode->atime = inode->ctime;
 551     inode->mtime = inode->ctime;
 552
 553     vfs_i_assign_sb(inode, sb);
 554     lru_use_one(inode_lru, &inode->lru);
 555     return inode;
 556 }
 557
 558 void
 559 vfs_i_free(struct v_inode* inode)
 560 {
 561     if (inode->pg_cache) {
 562         pcache_release(inode->pg_cache);
 563         vfree(inode->pg_cache);
 564     }
 565     // we don't need to sync inode.
 566     // If an inode can be free, then it must be properly closed.
 567     // Hence it must be synced already!
 568     if (inode->destruct) {
 569         inode->destruct(inode);
 570     }
 571
 572     vfs_sb_unref(inode->sb);
 573     hlist_delete(&inode->hash_list);
 574     cake_release(inode_pile, inode);
 575 }
 576
 577 /* ---- System call definition and support ---- */
 578
 579 // make a new name when not exists
 580 #define FLOC_MAYBE_MKNAME 1
 581
 582 // name must be non-exist and made.
 583 #define FLOC_MKNAME 2
 584
 585 // no follow symlink
 586 #define FLOC_NOFOLLOW 4
 587
 588 int
 589 vfs_getfd(int fd, struct v_fd** fd_s)
 590 {
 591     if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
 592         return 0;
 593     }
 594     return EBADF;
 595 }
 596
 597 static int
 598 __vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
 599             unsigned int itype, dev_t* dev)
 600 {
 601     int errno;
 602
 603     errno = parent->ops->create(parent, dnode, itype);
 604     if (errno) {
 605         return errno;
 606     }
 607
 608     return 0;
 609 }
 610
 611 struct file_locator {
 612     struct v_dnode* dir;
 613     struct v_dnode* file;
 614     bool fresh;
 615 };
 616
 617 /**
 618  * @brief unlock the file locator (floc) if possible.
 619  *        If the file to be located if not exists, and
 620  *        any FLOC_*MKNAME flag is set, then the parent
 621  *        dnode will be locked until the file has been properly
 622  *        finalised by subsequent logic.
 623  *
 624  * @param floc
 625  */
 626 static inline void
 627 __floc_try_unlock(struct file_locator* floc)
 628 {
 629     if (floc->fresh) {
 630         assert(floc->dir);
 631         unlock_dnode(floc->dir);
 632     }
 633 }
 634
 635 static int
 636 __vfs_try_locate_file(const char* path,
 637                       struct file_locator* floc,
 638                       int options)
 639 {
 640     char name_str[VFS_NAME_MAXLEN];
 641     struct v_dnode *fdir, *file;
 642     struct hstr name = HSTR(name_str, 0);
 643     int errno, woption = 0;
 644
 645     if ((options & FLOC_NOFOLLOW)) {
 646         woption |= VFS_WALK_NOFOLLOW;
 647         options &= ~FLOC_NOFOLLOW;
 648     }
 649
 650     floc->fresh = false;
 651     name_str[0] = 0;
 652     errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
 653     if (errno) {
 654         return errno;
 655     }
 656
 657     errno = vfs_walk(fdir, name.value, &file, NULL, woption);
 658
 659     if (errno && errno != ENOENT) {
 660         goto done;
 661     }
 662
 663     if (!errno) {
 664         if ((options & FLOC_MKNAME)) {
 665             errno = EEXIST;
 666         }
 667         goto done;
 668     }
 669
 670     // errno == ENOENT
 671     if (!options) {
 672         goto done;
 673     }
 674
 675     errno = vfs_check_writable(fdir);
 676     if (errno) {
 677         goto done;
 678     }
 679
 680     floc->fresh = true;
 681
 682     file = vfs_d_alloc(fdir, &name);
 683
 684     if (!file) {
 685         return ENOMEM;
 686     }
 687
 688     lock_dnode(fdir);
 689
 690     vfs_dcache_add(fdir, file);
 691
 692 done:
 693     floc->dir   = fdir;
 694     floc->file  = file;
 695
 696     return errno;
 697 }
 698
 699 int
 700 vfs_do_open(const char* path, int options)
 701 {
 702     int errno, fd, loptions = 0;
 703     struct v_dnode *dentry, *file;
 704     struct v_file* ofile = NULL;
 705     struct file_locator floc;
 706     struct v_inode* inode;
 707
 708     if ((options & FO_CREATE)) {
 709         loptions |= FLOC_MAYBE_MKNAME;
 710     } else if ((options & FO_NOFOLLOW)) {
 711         loptions |= FLOC_NOFOLLOW;
 712     }
 713
 714     errno = __vfs_try_locate_file(path, &floc, loptions);
 715
 716     if (errno || (errno = vfs_alloc_fdslot(&fd))) {
 717         return errno;
 718     }
 719
 720     file   = floc.file;
 721     dentry = floc.dir;
 722
 723     if (floc.fresh) {
 724         errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
 725         if (errno) {
 726             vfs_d_free(file);
 727             __floc_try_unlock(&floc);
 728             return errno;
 729         }
 730
 731         __floc_try_unlock(&floc);
 732     }
 733
 734
 735     if ((errno = vfs_open(file, &ofile))) {
 736         return errno;
 737     }
 738
 739     inode = ofile->inode;
 740     lock_inode(inode);
 741
 742     struct v_fd* fd_s = cake_grab(fd_pile);
 743     memset(fd_s, 0, sizeof(*fd_s));
 744
 745     if ((options & O_TRUNC)) {
 746         file->inode->fsize = 0;
 747     }
 748
 749     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 750         ofile->f_pos = 0;
 751     }
 752
 753     fd_s->file = ofile;
 754     fd_s->flags = options;
 755     __current->fdtable->fds[fd] = fd_s;
 756
 757     unlock_inode(inode);
 758
 759     return fd;
 760 }
 761
 762 __DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
 763 {
 764     int errno = vfs_do_open(path, options);
 765     return DO_STATUS_OR_RETURN(errno);
 766 }
 767
 768 __DEFINE_LXSYSCALL1(int, close, int, fd)
 769 {
 770     struct v_fd* fd_s;
 771     int errno = 0;
 772     if ((errno = vfs_getfd(fd, &fd_s))) {
 773         goto done_err;
 774     }
 775
 776     if ((errno = vfs_close(fd_s->file))) {
 777         goto done_err;
 778     }
 779
 780     cake_release(fd_pile, fd_s);
 781     __current->fdtable->fds[fd] = 0;
 782
 783 done_err:
 784     return DO_STATUS(errno);
 785 }
 786
 787 void
 788 __vfs_readdir_callback(struct dir_context* dctx,
 789                        const char* name,
 790                        const int len,
 791                        const int dtype)
 792 {
 793     struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
 794     strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
 795     dent->d_nlen = len;
 796     dent->d_type = dtype;
 797 }
 798
 799 __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
 800 {
 801     struct v_fd* fd_s;
 802     int errno;
 803
 804     if ((errno = vfs_getfd(fd, &fd_s))) {
 805         goto done;
 806     }
 807
 808     struct v_inode* inode = fd_s->file->inode;
 809
 810     lock_inode(inode);
 811
 812     if (!check_directory_node(inode)) {
 813         errno = ENOTDIR;
 814         goto unlock;
 815     }
 816
 817     struct dir_context dctx = (struct dir_context) {
 818         .cb_data = dent,
 819         .read_complete_callback = __vfs_readdir_callback
 820     };
 821
 822     if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
 823         goto unlock;
 824     }
 825     dent->d_offset++;
 826     fd_s->file->f_pos++;
 827
 828 unlock:
 829     unlock_inode(inode);
 830
 831 done:
 832     return DO_STATUS_OR_RETURN(errno);
 833 }
 834
 835 __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
 836 {
 837     int errno = 0;
 838     struct v_fd* fd_s;
 839     if ((errno = vfs_getfd(fd, &fd_s))) {
 840         goto done;
 841     }
 842
 843     struct v_file* file = fd_s->file;
 844     if (check_directory_node(file->inode)) {
 845         errno = EISDIR;
 846         goto done;
 847     }
 848
 849     if (!check_allow_read(file->inode)) {
 850         errno = EPERM;
 851         goto done;
 852     }
 853
 854     lock_inode(file->inode);
 855
 856     file->inode->atime = clock_unixtime();
 857
 858     if (check_seqdev_node(file->inode) || (fd_s->flags & FO_DIRECT)) {
 859         errno = file->ops->read(file->inode, buf, count, file->f_pos);
 860     } else {
 861         errno = pcache_read(file->inode, buf, count, file->f_pos);
 862     }
 863
 864     if (errno > 0) {
 865         file->f_pos += errno;
 866         unlock_inode(file->inode);
 867         return errno;
 868     }
 869
 870     unlock_inode(file->inode);
 871
 872 done:
 873     return DO_STATUS(errno);
 874 }
 875
 876 __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
 877 {
 878     int errno = 0;
 879     struct v_fd* fd_s;
 880     if ((errno = vfs_getfd(fd, &fd_s))) {
 881         goto done;
 882     }
 883
 884     struct v_inode* inode;
 885     struct v_file* file = fd_s->file;
 886
 887     if ((errno = vfs_check_writable(file->dnode))) {
 888         goto done;
 889     }
 890
 891     if (check_directory_node(file->inode)) {
 892         errno = EISDIR;
 893         goto done;
 894     }
 895
 896     inode = file->inode;
 897     lock_inode(inode);
 898
 899     inode->mtime = clock_unixtime();
 900     if ((fd_s->flags & O_APPEND)) {
 901         file->f_pos = inode->fsize;
 902     }
 903
 904     if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
 905         errno = file->ops->write(inode, buf, count, file->f_pos);
 906     } else {
 907         errno = pcache_write(inode, buf, count, file->f_pos);
 908     }
 909
 910     if (errno > 0) {
 911         file->f_pos += errno;
 912         inode->fsize = MAX(inode->fsize, file->f_pos);
 913
 914         unlock_inode(inode);
 915         return errno;
 916     }
 917
 918     unlock_inode(inode);
 919
 920 done:
 921     return DO_STATUS(errno);
 922 }
 923
 924 __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options)
 925 {
 926     int errno = 0;
 927     struct v_fd* fd_s;
 928     if ((errno = vfs_getfd(fd, &fd_s))) {
 929         goto done;
 930     }
 931
 932     struct v_file* file = fd_s->file;
 933     struct v_inode* inode = file->inode;
 934
 935     if (!file->ops->seek) {
 936         errno = ENOTSUP;
 937         goto done;
 938     }
 939
 940     if (!check_allow_read(inode)) {
 941         errno = EPERM;
 942         goto done;
 943     }
 944
 945     lock_inode(inode);
 946
 947     int overflow = 0;
 948     int fpos = file->f_pos;
 949
 950     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 951         options = (options != FSEEK_END) ? options : FSEEK_SET;
 952     }
 953
 954     switch (options) {
 955         case FSEEK_CUR:
 956             overflow = sadd_of((int)file->f_pos, offset, &fpos);
 957             break;
 958         case FSEEK_END:
 959             overflow = sadd_of((int)inode->fsize, offset, &fpos);
 960             break;
 961         case FSEEK_SET:
 962             fpos = offset;
 963             break;
 964     }
 965
 966     if (overflow) {
 967         errno = EOVERFLOW;
 968     }
 969     else {
 970         errno = file->ops->seek(file, fpos);
 971     }
 972
 973     unlock_inode(inode);
 974
 975 done:
 976     return DO_STATUS(errno);
 977 }
 978
 979 int
 980 vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth)
 981 {
 982     if (!dnode) {
 983         return 0;
 984     }
 985
 986     if (depth > 64) {
 987         return ENAMETOOLONG;
 988     }
 989
 990     size_t len = 0;
 991
 992     if (dnode->parent != dnode) {
 993         len = vfs_get_path(dnode->parent, buf, size, depth + 1);
 994     }
 995
 996     if (len >= size) {
 997         return len;
 998     }
 999
1000     if (!len || buf[len - 1] != VFS_PATH_DELIM) {
1001         buf[len++] = VFS_PATH_DELIM;
1002     }
1003
1004     size_t cpy_size = MIN(dnode->name.len, size - len);
1005     strncpy(buf + len, dnode->name.value, cpy_size);
1006     len += cpy_size;
1007
1008     return len;
1009 }
1010
1011 int
1012 vfs_readlink(struct v_dnode* dnode, char* buf, size_t size)
1013 {
1014     const char* link;
1015     struct v_inode* inode = dnode->inode;
1016
1017     if (!check_symlink_node(inode)) {
1018         return EINVAL;
1019     }
1020
1021     if (!inode->ops->read_symlink) {
1022         return ENOTSUP;
1023     }
1024
1025     if (!check_allow_read(inode)) {
1026         return EPERM;
1027     }
1028
1029     lock_inode(inode);
1030
1031     int errno = inode->ops->read_symlink(inode, &link);
1032     if (errno >= 0) {
1033         strncpy(buf, link, MIN(size, (size_t)errno));
1034     }
1035
1036     unlock_inode(inode);
1037     return errno;
1038 }
1039
1040 int
1041 vfs_get_dtype(int itype)
1042 {
1043     int dtype = DT_FILE;
1044     if (check_itype(itype, VFS_IFSYMLINK)) {
1045         dtype |= DT_SYMLINK;
1046     }
1047
1048     if (check_itype(itype, VFS_IFDIR)) {
1049         dtype |= DT_DIR;
1050         return dtype;
1051     }
1052
1053     // TODO other types
1054
1055     return dtype;
1056 }
1057
1058 __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
1059 {
1060     int errno;
1061     struct v_fd* fd_s;
1062     if ((errno = vfs_getfd(fd, &fd_s))) {
1063         goto done;
1064     }
1065
1066     struct v_dnode* dnode;
1067     errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
1068
1069     if (errno >= 0) {
1070         return errno;
1071     }
1072
1073 done:
1074     return DO_STATUS(errno);
1075 }
1076
1077 __DEFINE_LXSYSCALL3(int, readlink, const char*, path, char*, buf, size_t, size)
1078 {
1079     int errno;
1080     struct v_dnode* dnode;
1081     if (!(errno = vfs_walk_proc(path, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1082         errno = vfs_readlink(dnode, buf, size);
1083     }
1084
1085     if (errno >= 0) {
1086         return errno;
1087     }
1088
1089     return DO_STATUS(errno);
1090 }
1091
1092 __DEFINE_LXSYSCALL4(
1093   int, readlinkat, int, dirfd, const char*, pathname, char*, buf, size_t, size)
1094 {
1095     int errno;
1096     struct v_fd* fd_s;
1097     if ((errno = vfs_getfd(dirfd, &fd_s))) {
1098         goto done;
1099     }
1100
1101     pathname = pathname ? pathname : "";
1102
1103     struct v_dnode* dnode;
1104     if (!(errno = vfs_walk(
1105             fd_s->file->dnode, pathname, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1106         errno = vfs_readlink(fd_s->file->dnode, buf, size);
1107     }
1108
1109     if (errno >= 0) {
1110         return errno;
1111     }
1112
1113 done:
1114     return DO_STATUS(errno);
1115 }
1116
1117 /*
1118     NOTE
1119     When we perform operation that could affect the layout of
1120     directory (i.e., rename, mkdir, rmdir). We must lock the parent dir
1121     whenever possible. This will blocking any ongoing path walking to reach
1122     it hence avoid any partial state.
1123 */
1124
1125 __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
1126 {
1127     int errno;
1128     struct v_dnode* dnode;
1129     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1130         return DO_STATUS(errno);
1131     }
1132
1133     lock_dnode(dnode);
1134
1135     if ((errno = vfs_check_writable(dnode))) {
1136         goto done;
1137     }
1138
1139     if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
1140         errno = EROFS;
1141         goto done;
1142     }
1143
1144     if (dnode->ref_count > 1 || dnode->inode->open_count) {
1145         errno = EBUSY;
1146         goto done;
1147     }
1148
1149     if (!llist_empty(&dnode->children)) {
1150         errno = ENOTEMPTY;
1151         goto done;
1152     }
1153
1154     struct v_dnode* parent = dnode->parent;
1155
1156     if (!parent) {
1157         errno = EINVAL;
1158         goto done;
1159     }
1160
1161     lock_dnode(parent);
1162     lock_inode(parent->inode);
1163
1164     if (check_directory_node(dnode->inode)) {
1165         errno = parent->inode->ops->rmdir(parent->inode, dnode);
1166         if (!errno) {
1167             vfs_dcache_remove(dnode);
1168         }
1169     } else {
1170         errno = ENOTDIR;
1171     }
1172
1173     unlock_inode(parent->inode);
1174     unlock_dnode(parent);
1175
1176 done:
1177     unlock_dnode(dnode);
1178     return DO_STATUS(errno);
1179 }
1180
1181 __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
1182 {
1183     int errno = 0;
1184     struct v_dnode *parent, *dir;
1185     char name_value[VFS_NAME_MAXLEN];
1186     struct hstr name = HHSTR(name_value, 0, 0);
1187
1188     if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
1189         goto done;
1190     }
1191
1192     if (!(errno = vfs_walk(parent, name_value, &dir, NULL, 0))) {
1193         errno = EEXIST;
1194         goto done;
1195     }
1196
1197     if ((errno = vfs_check_writable(parent))) {
1198         goto done;
1199     }
1200
1201     if (!(dir = vfs_d_alloc(parent, &name))) {
1202         errno = ENOMEM;
1203         goto done;
1204     }
1205
1206     struct v_inode* inode = parent->inode;
1207
1208     lock_dnode(parent);
1209     lock_inode(inode);
1210
1211     if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
1212         errno = ENOTSUP;
1213     } else if (!inode->ops->mkdir) {
1214         errno = ENOTSUP;
1215     } else if (!check_directory_node(inode)) {
1216         errno = ENOTDIR;
1217     } else if (!(errno = inode->ops->mkdir(inode, dir))) {
1218         vfs_dcache_add(parent, dir);
1219         goto cleanup;
1220     }
1221
1222     vfs_d_free(dir);
1223
1224 cleanup:
1225     unlock_inode(inode);
1226     unlock_dnode(parent);
1227 done:
1228     return DO_STATUS(errno);
1229 }
1230
1231 int
1232 __vfs_do_unlink(struct v_dnode* dnode)
1233 {
1234     int errno;
1235     struct v_inode* inode = dnode->inode;
1236
1237     if (dnode->ref_count > 1) {
1238         return EBUSY;
1239     }
1240
1241     if ((errno = vfs_check_writable(dnode))) {
1242         return errno;
1243     }
1244
1245     lock_inode(inode);
1246
1247     if (inode->open_count) {
1248         errno = EBUSY;
1249     } else if (!check_directory_node(inode)) {
1250         errno = inode->ops->unlink(inode, dnode);
1251         if (!errno) {
1252             vfs_d_free(dnode);
1253         }
1254     } else {
1255         errno = EISDIR;
1256     }
1257
1258     unlock_inode(inode);
1259
1260     return errno;
1261 }
1262
1263 __DEFINE_LXSYSCALL1(int, unlink, const char*, pathname)
1264 {
1265     int errno;
1266     struct v_dnode* dnode;
1267     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1268         goto done;
1269     }
1270
1271     errno = __vfs_do_unlink(dnode);
1272
1273 done:
1274     return DO_STATUS(errno);
1275 }
1276
1277 __DEFINE_LXSYSCALL2(int, unlinkat, int, fd, const char*, pathname)
1278 {
1279     int errno;
1280     struct v_fd* fd_s;
1281     if ((errno = vfs_getfd(fd, &fd_s))) {
1282         goto done;
1283     }
1284
1285     struct v_dnode* dnode;
1286     if (!(errno = vfs_walk(fd_s->file->dnode, pathname, &dnode, NULL, 0))) {
1287         errno = __vfs_do_unlink(dnode);
1288     }
1289
1290 done:
1291     return DO_STATUS(errno);
1292 }
1293
1294 __DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
1295 {
1296     int errno;
1297     struct file_locator floc;
1298     struct v_dnode *to_link, *name_file;
1299
1300     errno = __vfs_try_locate_file(oldpath, &floc, 0);
1301     if (errno) {
1302         goto done;
1303     }
1304
1305     __floc_try_unlock(&floc);
1306
1307     to_link = floc.file;
1308     errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
1309     if (!errno) {
1310         goto done;
1311     }
1312
1313     name_file = floc.file;
1314     errno = vfs_link(to_link, name_file);
1315     if (errno) {
1316         vfs_d_free(name_file);
1317     }
1318
1319 done:
1320     __floc_try_unlock(&floc);
1321     return DO_STATUS(errno);
1322 }
1323
1324 __DEFINE_LXSYSCALL1(int, fsync, int, fildes)
1325 {
1326     int errno;
1327     struct v_fd* fd_s;
1328
1329     if (!(errno = vfs_getfd(fildes, &fd_s))) {
1330         errno = vfs_fsync(fd_s->file);
1331     }
1332
1333     return DO_STATUS(errno);
1334 }
1335
1336 int
1337 vfs_dup_fd(struct v_fd* old, struct v_fd** new)
1338 {
1339     int errno = 0;
1340     struct v_fd* copied = cake_grab(fd_pile);
1341
1342     memcpy(copied, old, sizeof(struct v_fd));
1343
1344     vfs_ref_file(old->file);
1345
1346     *new = copied;
1347
1348     return errno;
1349 }
1350
1351 int
1352 vfs_dup2(int oldfd, int newfd)
1353 {
1354     if (newfd == oldfd) {
1355         return newfd;
1356     }
1357
1358     int errno;
1359     struct v_fd *oldfd_s, *newfd_s;
1360     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1361         goto done;
1362     }
1363
1364     if (!TEST_FD(newfd)) {
1365         errno = EBADF;
1366         goto done;
1367     }
1368
1369     newfd_s = __current->fdtable->fds[newfd];
1370     if (newfd_s && (errno = vfs_close(newfd_s->file))) {
1371         goto done;
1372     }
1373
1374     if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1375         __current->fdtable->fds[newfd] = newfd_s;
1376         return newfd;
1377     }
1378
1379 done:
1380     return DO_STATUS(errno);
1381 }
1382
1383 __DEFINE_LXSYSCALL2(int, dup2, int, oldfd, int, newfd)
1384 {
1385     return vfs_dup2(oldfd, newfd);
1386 }
1387
1388 __DEFINE_LXSYSCALL1(int, dup, int, oldfd)
1389 {
1390     int errno, newfd;
1391     struct v_fd *oldfd_s, *newfd_s;
1392     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1393         goto done;
1394     }
1395
1396     if (!(errno = vfs_alloc_fdslot(&newfd)) &&
1397         !(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1398         __current->fdtable->fds[newfd] = newfd_s;
1399         return newfd;
1400     }
1401
1402 done:
1403     return DO_STATUS(errno);
1404 }
1405
1406 __DEFINE_LXSYSCALL2(
1407   int, symlink, const char*, pathname, const char*, link_target)
1408 {
1409     int errno;
1410     struct file_locator floc;
1411     struct v_dnode *file;
1412     struct v_inode *f_ino;
1413
1414     errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
1415     if (errno) {
1416         goto done;
1417     }
1418
1419     file = floc.file;
1420     errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
1421     if (errno) {
1422         vfs_d_free(file);
1423         goto done;
1424     }
1425
1426     f_ino = file->inode;
1427
1428     assert(f_ino);
1429
1430     errno = vfs_check_writable(file);
1431     if (errno) {
1432         goto done;
1433     }
1434
1435     if (!f_ino->ops->set_symlink) {
1436         errno = ENOTSUP;
1437         goto done;
1438     }
1439
1440     lock_inode(f_ino);
1441
1442     errno = f_ino->ops->set_symlink(f_ino, link_target);
1443
1444     unlock_inode(f_ino);
1445
1446 done:
1447     __floc_try_unlock(&floc);
1448     return DO_STATUS(errno);
1449 }
1450
1451 int
1452 vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode)
1453 {
1454     int errno = 0;
1455
1456     lock_dnode(dnode);
1457
1458     if (!check_directory_node(dnode->inode)) {
1459         errno = ENOTDIR;
1460         goto done;
1461     }
1462
1463     if (proc->cwd) {
1464         vfs_unref_dnode(proc->cwd);
1465     }
1466
1467     vfs_ref_dnode(dnode);
1468     proc->cwd = dnode;
1469
1470     unlock_dnode(dnode);
1471
1472 done:
1473     return errno;
1474 }
1475
1476 __DEFINE_LXSYSCALL1(int, chdir, const char*, path)
1477 {
1478     struct v_dnode* dnode;
1479     int errno = 0;
1480
1481     if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1482         goto done;
1483     }
1484
1485     errno = vfs_do_chdir((struct proc_info*)__current, dnode);
1486
1487 done:
1488     return DO_STATUS(errno);
1489 }
1490
1491 __DEFINE_LXSYSCALL1(int, fchdir, int, fd)
1492 {
1493     struct v_fd* fd_s;
1494     int errno = 0;
1495
1496     if ((errno = vfs_getfd(fd, &fd_s))) {
1497         goto done;
1498     }
1499
1500     errno = vfs_do_chdir((struct proc_info*)__current, fd_s->file->dnode);
1501
1502 done:
1503     return DO_STATUS(errno);
1504 }
1505
1506 __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size)
1507 {
1508     int errno = 0;
1509     char* ret_ptr = 0;
1510     if (size < 2) {
1511         errno = ERANGE;
1512         goto done;
1513     }
1514
1515     size_t len = 0;
1516
1517     if (!__current->cwd) {
1518         *buf = VFS_PATH_DELIM;
1519         len = 1;
1520     } else {
1521         len = vfs_get_path(__current->cwd, buf, size, 0);
1522         if (len == size) {
1523             errno = ERANGE;
1524             goto done;
1525         }
1526     }
1527
1528     buf[len] = '\0';
1529
1530     ret_ptr = buf;
1531
1532 done:
1533     syscall_result(errno);
1534     return ret_ptr;
1535 }
1536
1537 int
1538 vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
1539 {
1540     int errno = 0;
1541     if (current->inode->id == target->inode->id) {
1542         // hard link
1543         return 0;
1544     }
1545
1546     if ((errno = vfs_check_writable(current))) {
1547         return errno;
1548     }
1549
1550     if (current->ref_count > 1 || target->ref_count > 1) {
1551         return EBUSY;
1552     }
1553
1554     if (current->super_block != target->super_block) {
1555         return EXDEV;
1556     }
1557
1558     struct v_dnode* oldparent = current->parent;
1559     struct v_dnode* newparent = target->parent;
1560
1561     lock_dnode(current);
1562     lock_dnode(target);
1563     if (oldparent)
1564         lock_dnode(oldparent);
1565     if (newparent)
1566         lock_dnode(newparent);
1567
1568     if (!llist_empty(&target->children)) {
1569         errno = ENOTEMPTY;
1570         unlock_dnode(target);
1571         goto cleanup;
1572     }
1573
1574     if ((errno =
1575            current->inode->ops->rename(current->inode, current, target))) {
1576         unlock_dnode(target);
1577         goto cleanup;
1578     }
1579
1580     // re-position current
1581     hstrcpy(&current->name, &target->name);
1582     vfs_dcache_rehash(newparent, current);
1583
1584     // detach target
1585     vfs_d_free(target);
1586
1587     unlock_dnode(target);
1588
1589 cleanup:
1590     unlock_dnode(current);
1591     if (oldparent)
1592         unlock_dnode(oldparent);
1593     if (newparent)
1594         unlock_dnode(newparent);
1595
1596     return errno;
1597 }
1598
1599 __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
1600 {
1601     struct v_dnode *cur, *target_parent, *target;
1602     struct hstr name = HSTR(valloc(VFS_NAME_MAXLEN), 0);
1603     int errno = 0;
1604
1605     if ((errno = vfs_walk_proc(oldpath, &cur, NULL, 0))) {
1606         goto done;
1607     }
1608
1609     if ((errno = vfs_walk(
1610            __current->cwd, newpath, &target_parent, &name, VFS_WALK_PARENT))) {
1611         goto done;
1612     }
1613
1614     errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
1615     if (errno == ENOENT) {
1616         target = vfs_d_alloc(target_parent, &name);
1617         vfs_dcache_add(target_parent, target);
1618     } else if (errno) {
1619         goto done;
1620     }
1621
1622     if (!target) {
1623         errno = ENOMEM;
1624         goto done;
1625     }
1626
1627     errno = vfs_do_rename(cur, target);
1628
1629 done:
1630     vfree((void*)name.value);
1631     return DO_STATUS(errno);
1632 }
1633
1634 __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat)
1635 {
1636     int errno = 0;
1637     struct v_fd* fds;
1638
1639     if ((errno = vfs_getfd(fd, &fds))) {
1640         goto done;
1641     }
1642
1643     struct v_inode* vino = fds->file->inode;
1644     struct device* fdev = vino->sb->dev;
1645
1646     *stat = (struct file_stat){.st_ino = vino->id,
1647                                .st_blocks = vino->lb_usage,
1648                                .st_size = vino->fsize,
1649                                .mode = vino->itype,
1650                                .st_ioblksize = PAGE_SIZE,
1651                                .st_blksize = vino->sb->blksize};
1652
1653     if (check_device_node(vino)) {
1654         struct device* rdev = resolve_device(vino->data);
1655         if (!rdev) {
1656             errno = EINVAL;
1657             goto done;
1658         }
1659
1660         stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp,
1661                                 .unique = rdev->ident.unique,
1662                                 .index = dev_uid(rdev) };
1663     }
1664
1665     if (fdev) {
1666         stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp,
1667                                .unique = fdev->ident.unique,
1668                                .index = dev_uid(fdev) };
1669     }
1670
1671 done:
1672     return DO_STATUS(errno);
1673 }