lunaix-os/kernel/fs/vfs.c

   1 /**
   2  * @file vfs.c
   3  * @author Lunaixsky (zelong56@gmail.com)
   4  * @brief Lunaix virtual file system - an abstraction layer for all file system.
   5  * @version 0.1
   6  * @date 2022-07-24
   7  *
   8  * @copyright Copyright (c) 2022
   9  *
  10  */
  11
  12 // Welcome to The Mountain O'Shit! :)
  13
  14 /*
  15  TODO vfs & device todos checklist
  16
  17     It is overseen by Twilight Sparkle ;)
  18
  19  1. Get inodes hooked into lru (CHECKED)
  20  2. Get dnodes hooked into lru (CHECKED)
  21  3. Get inodes properly hashed so they can be reused by underling fs (CHECKED)
  22  4. (lru) Add a callback function (or destructor) for eviction. (CHECKED)
  23         [good idea] or a constructor/destructor pattern in cake allocator ?
  24  5. (mount) Figure out a way to identify a busy mount point before unmount
  25             maybe a unified mount_point structure that maintain a referencing
  26             counter on any dnodes within the subtree? Such a counter will only
  27             increament if a file is opened or a dnode is being used as working
  28             directory and decreamenting conversely. (CHECKED)
  29  6. (mount) Ability to track all mount points (including sub-mounts)
  30             so we can be confident to clean up everything when we
  31             unmount. (CHECKED)
  32  7. (mount) Figure out a way to acquire the device represented by a dnode.
  33             so it can be used to mount. (e.g. we wish to get `struct device*`
  34             out of the dnode at /dev/sda)
  35             [tip] we should pay attention at twifs and add a private_data field
  36             under struct v_dnode? (CHECKED)
  37  8. (mount) Then, we should refactor on mount/unmount mechanism. (CHECKED)
  38  9. (mount) (future) Ability to mount any thing? e.g. Linux can mount a disk
  39                     image file using a so called "loopback" pseudo device. Maybe
  40                     we can do similar thing in Lunaix? A block device emulation
  41                     above the regular file when we mount it on.
  42  10. (device) device number (dev_t) allocation
  43             [good idea] <class>:<subclass>:<uniq_id> composition (CHECKED)
  44 */
  45
  46 #include <klibc/string.h>
  47 #include <lunaix/foptions.h>
  48 #include <lunaix/fs.h>
  49 #include <lunaix/mm/cake.h>
  50 #include <lunaix/mm/valloc.h>
  51 #include <lunaix/process.h>
  52 #include <lunaix/spike.h>
  53 #include <lunaix/syscall.h>
  54 #include <lunaix/syscall_utils.h>
  55
  56 #include <lunaix/fs/twifs.h>
  57
  58 #include <usr/lunaix/dirent_defs.h>
  59
  60 static struct cake_pile* dnode_pile;
  61 static struct cake_pile* inode_pile;
  62 static struct cake_pile* file_pile;
  63 static struct cake_pile* superblock_pile;
  64 static struct cake_pile* fd_pile;
  65
  66 struct v_dnode* vfs_sysroot;
  67
  68 struct lru_zone *dnode_lru, *inode_lru;
  69
  70 struct hstr vfs_ddot = HSTR("..", 2);
  71 struct hstr vfs_dot = HSTR(".", 1);
  72 struct hstr vfs_empty = HSTR("", 0);
  73
  74 static int
  75 __vfs_try_evict_dnode(struct lru_node* obj);
  76
  77 static int
  78 __vfs_try_evict_inode(struct lru_node* obj);
  79
  80 void
  81 vfs_init()
  82 {
  83     // 为他们专门创建一个蛋糕堆，而不使用valloc，这样我们可以最小化内碎片的产生
  84     dnode_pile = cake_new_pile("dnode_cache", sizeof(struct v_dnode), 1, 0);
  85     inode_pile = cake_new_pile("inode_cache", sizeof(struct v_inode), 1, 0);
  86     file_pile = cake_new_pile("file_cache", sizeof(struct v_file), 1, 0);
  87     fd_pile = cake_new_pile("fd_cache", sizeof(struct v_fd), 1, 0);
  88     superblock_pile =
  89       cake_new_pile("sb_cache", sizeof(struct v_superblock), 1, 0);
  90
  91     dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
  92     inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
  93
  94     hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
  95     hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
  96
  97     // 创建一个根dnode。
  98     vfs_sysroot = vfs_d_alloc(NULL, &vfs_empty);
  99     vfs_sysroot->parent = vfs_sysroot;
 100
 101     vfs_ref_dnode(vfs_sysroot);
 102 }
 103
 104 static inline struct hbucket*
 105 __dcache_hash(struct v_dnode* parent, u32_t* hash)
 106 {
 107     struct hbucket* d_cache;
 108     u32_t _hash;
 109
 110     d_cache = parent->super_block->d_cache;
 111     _hash = *hash;
 112     _hash = _hash ^ (_hash >> VFS_HASHBITS);
 113     _hash += (u32_t)__ptr(parent);
 114
 115     *hash = _hash;
 116     return &d_cache[_hash & VFS_HASH_MASK];
 117 }
 118
 119 static inline int
 120 __sync_inode_nolock(struct v_inode* inode)
 121 {
 122     pcache_commit_all(inode);
 123
 124     int errno = ENOTSUP;
 125     if (inode->ops->sync) {
 126         errno = inode->ops->sync(inode);
 127     }
 128
 129     return errno;
 130 }
 131
 132 struct v_dnode*
 133 vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
 134 {
 135     if (!str->len || HSTR_EQ(str, &vfs_dot))
 136         return parent;
 137
 138     if (HSTR_EQ(str, &vfs_ddot)) {
 139         return parent->parent;
 140     }
 141
 142     u32_t hash = str->hash;
 143     struct hbucket* slot = __dcache_hash(parent, &hash);
 144
 145     struct v_dnode *pos, *n;
 146     hashtable_bucket_foreach(slot, pos, n, hash_list)
 147     {
 148         if (pos->name.hash == hash && pos->parent == parent) {
 149             return pos;
 150         }
 151     }
 152     return NULL;
 153 }
 154
 155 void
 156 vfs_dcache_add(struct v_dnode* parent, struct v_dnode* dnode)
 157 {
 158     assert(parent);
 159
 160     dnode->ref_count = 1;
 161     dnode->parent = parent;
 162     llist_append(&parent->children, &dnode->siblings);
 163
 164     struct hbucket* bucket = __dcache_hash(parent, &dnode->name.hash);
 165     hlist_add(&bucket->head, &dnode->hash_list);
 166 }
 167
 168 void
 169 vfs_dcache_remove(struct v_dnode* dnode)
 170 {
 171     assert(dnode);
 172     assert(dnode->ref_count == 1);
 173
 174     llist_delete(&dnode->siblings);
 175     llist_delete(&dnode->aka_list);
 176     hlist_delete(&dnode->hash_list);
 177
 178     dnode->parent = NULL;
 179     dnode->ref_count = 0;
 180 }
 181
 182 void
 183 vfs_dcache_rehash(struct v_dnode* new_parent, struct v_dnode* dnode)
 184 {
 185     assert(new_parent);
 186
 187     hstr_rehash(&dnode->name, HSTR_FULL_HASH);
 188     vfs_dcache_remove(dnode);
 189     vfs_dcache_add(new_parent, dnode);
 190 }
 191
 192 int
 193 vfs_open(struct v_dnode* dnode, struct v_file** file)
 194 {
 195     if (!dnode->inode || !dnode->inode->ops->open) {
 196         return ENOTSUP;
 197     }
 198
 199     struct v_inode* inode = dnode->inode;
 200
 201     lock_inode(inode);
 202
 203     struct v_file* vfile = cake_grab(file_pile);
 204     memset(vfile, 0, sizeof(*vfile));
 205
 206     vfile->dnode = dnode;
 207     vfile->inode = inode;
 208     vfile->ref_count = 1;
 209     vfile->ops = inode->default_fops;
 210
 211     if (check_regfile_node(inode) && !inode->pg_cache) {
 212         struct pcache* pcache = vzalloc(sizeof(struct pcache));
 213         pcache_init(pcache);
 214         pcache->master = inode;
 215         inode->pg_cache = pcache;
 216     }
 217
 218     int errno = inode->ops->open(inode, vfile);
 219     if (errno) {
 220         cake_release(file_pile, vfile);
 221     } else {
 222         vfs_ref_dnode(dnode);
 223         inode->open_count++;
 224
 225         *file = vfile;
 226     }
 227
 228     unlock_inode(inode);
 229
 230     return errno;
 231 }
 232
 233 void
 234 vfs_assign_inode(struct v_dnode* assign_to, struct v_inode* inode)
 235 {
 236     if (assign_to->inode) {
 237         llist_delete(&assign_to->aka_list);
 238         assign_to->inode->link_count--;
 239     }
 240
 241     llist_append(&inode->aka_dnodes, &assign_to->aka_list);
 242     assign_to->inode = inode;
 243     inode->link_count++;
 244 }
 245
 246 int
 247 vfs_link(struct v_dnode* to_link, struct v_dnode* name)
 248 {
 249     int errno;
 250
 251     if ((errno = vfs_check_writable(to_link))) {
 252         return errno;
 253     }
 254
 255     lock_inode(to_link->inode);
 256     if (to_link->super_block->root != name->super_block->root) {
 257         errno = EXDEV;
 258     } else if (!to_link->inode->ops->link) {
 259         errno = ENOTSUP;
 260     } else if (!(errno = to_link->inode->ops->link(to_link->inode, name))) {
 261         vfs_assign_inode(name, to_link->inode);
 262     }
 263     unlock_inode(to_link->inode);
 264
 265     return errno;
 266 }
 267
 268 int
 269 vfs_pclose(struct v_file* file, pid_t pid)
 270 {
 271     struct v_inode* inode;
 272     int errno = 0;
 273
 274     inode = file->inode;
 275
 276     /*
 277      * Prevent dead lock.
 278      * This happened when process is terminated while blocking on read.
 279      * In that case, the process is still holding the inode lock and it
 280          will never get released.
 281      * The unlocking should also include ownership check.
 282      *
 283      * To see why, consider two process both open the same file both with
 284      * fd=x.
 285      *      Process A: busy on reading x
 286      *      Process B: do nothing with x
 287      * Assuming that, after a very short time, process B get terminated
 288      * while process A is still busy in it's reading business. By this
 289      * design, the inode lock of this file x is get released by B rather
 290      * than A. And this will cause a probable race condition on A if other
 291      * process is writing to this file later after B exit.
 292     */
 293
 294     mutex_unlock_for(&inode->lock, pid);
 295
 296     if (vfs_check_duped_file(file)) {
 297         vfs_unref_file(file);
 298         return 0;
 299     }
 300
 301     if ((errno = file->ops->close(file))) {
 302         goto done;
 303     }
 304
 305     vfs_unref_dnode(file->dnode);
 306     cake_release(file_pile, file);
 307
 308     /*
 309         if the current inode is not being locked by other
 310         threads that does not share same open context,
 311         then we can try to do sync opportunistically
 312     */
 313     if (mutex_on_hold(&inode->lock)) {
 314         goto done;
 315     }
 316
 317     lock_inode(inode);
 318
 319     pcache_commit_all(inode);
 320     inode->open_count--;
 321
 322     if (!inode->open_count) {
 323         __sync_inode_nolock(inode);
 324     }
 325
 326     unlock_inode(inode);
 327
 328 done:
 329     return errno;
 330 }
 331
 332 int
 333 vfs_close(struct v_file* file)
 334 {
 335     return vfs_pclose(file, __current->pid);
 336 }
 337
 338 void
 339 vfs_free_fd(struct v_fd* fd)
 340 {
 341     cake_release(fd_pile, fd);
 342 }
 343
 344 int
 345 vfs_isync(struct v_inode* inode)
 346 {
 347     lock_inode(inode);
 348
 349     int errno = __sync_inode_nolock(inode);
 350
 351     unlock_inode(inode);
 352
 353     return errno;
 354 }
 355
 356 int
 357 vfs_fsync(struct v_file* file)
 358 {
 359     int errno;
 360     if ((errno = vfs_check_writable(file->dnode))) {
 361         return errno;
 362     }
 363
 364     return vfs_isync(file->inode);
 365 }
 366
 367 int
 368 vfs_alloc_fdslot(int* fd)
 369 {
 370     for (size_t i = 0; i < VFS_MAX_FD; i++) {
 371         if (!__current->fdtable->fds[i]) {
 372             *fd = i;
 373             return 0;
 374         }
 375     }
 376     return EMFILE;
 377 }
 378
 379 struct v_superblock*
 380 vfs_sb_alloc()
 381 {
 382     struct v_superblock* sb = cake_grab(superblock_pile);
 383     memset(sb, 0, sizeof(*sb));
 384     llist_init_head(&sb->sb_list);
 385
 386     sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
 387     sb->d_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
 388
 389     sb->ref_count = 1;
 390     return sb;
 391 }
 392
 393 void
 394 vfs_sb_ref(struct v_superblock* sb)
 395 {
 396     sb->ref_count++;
 397 }
 398
 399 void
 400 vfs_sb_unref(struct v_superblock* sb)
 401 {
 402     assert(sb->ref_count);
 403
 404     sb->ref_count--;
 405     if (likely(sb->ref_count)) {
 406         return;
 407     }
 408
 409     if (sb->ops.release) {
 410         sb->ops.release(sb);
 411     }
 412
 413     vfree(sb->i_cache);
 414     vfree(sb->d_cache);
 415
 416     cake_release(superblock_pile, sb);
 417 }
 418
 419 static int
 420 __vfs_try_evict_dnode(struct lru_node* obj)
 421 {
 422     struct v_dnode* dnode = container_of(obj, struct v_dnode, lru);
 423
 424     if (!dnode->ref_count) {
 425         vfs_d_free(dnode);
 426         return 1;
 427     }
 428     return 0;
 429 }
 430
 431 static int
 432 __vfs_try_evict_inode(struct lru_node* obj)
 433 {
 434     struct v_inode* inode = container_of(obj, struct v_inode, lru);
 435
 436     if (!inode->link_count && !inode->open_count) {
 437         vfs_i_free(inode);
 438         return 1;
 439     }
 440     return 0;
 441 }
 442
 443 struct v_dnode*
 444 vfs_d_alloc(struct v_dnode* parent, struct hstr* name)
 445 {
 446     struct v_dnode* dnode = cake_grab(dnode_pile);
 447     if (!dnode) {
 448         lru_evict_half(dnode_lru);
 449
 450         if (!(dnode = cake_grab(dnode_pile))) {
 451             return NULL;
 452         }
 453     }
 454
 455     memset(dnode, 0, sizeof(*dnode));
 456     llist_init_head(&dnode->children);
 457     llist_init_head(&dnode->siblings);
 458     llist_init_head(&dnode->aka_list);
 459     mutex_init(&dnode->lock);
 460
 461     dnode->name = HHSTR(vzalloc(VFS_NAME_MAXLEN), 0, 0);
 462
 463     hstrcpy(&dnode->name, name);
 464
 465     if (parent) {
 466         vfs_d_assign_sb(dnode, parent->super_block);
 467         dnode->mnt = parent->mnt;
 468     }
 469
 470     lru_use_one(dnode_lru, &dnode->lru);
 471
 472     return dnode;
 473 }
 474
 475 void
 476 vfs_d_free(struct v_dnode* dnode)
 477 {
 478     assert(dnode->ref_count == 1);
 479
 480     if (dnode->inode) {
 481         assert(dnode->inode->link_count > 0);
 482         dnode->inode->link_count--;
 483     }
 484
 485     vfs_dcache_remove(dnode);
 486     // Make sure the children de-referencing their parent.
 487     // With lru presented, the eviction will be propagated over the entire
 488     // detached subtree eventually
 489     struct v_dnode *pos, *n;
 490     llist_for_each(pos, n, &dnode->children, siblings)
 491     {
 492         vfs_dcache_remove(pos);
 493     }
 494
 495     if (dnode->destruct) {
 496         dnode->destruct(dnode);
 497     }
 498
 499     vfs_sb_unref(dnode->super_block);
 500     vfree((void*)dnode->name.value);
 501     cake_release(dnode_pile, dnode);
 502 }
 503
 504 struct v_inode*
 505 vfs_i_find(struct v_superblock* sb, u32_t i_id)
 506 {
 507     struct hbucket* slot = &sb->i_cache[i_id & VFS_HASH_MASK];
 508     struct v_inode *pos, *n;
 509     hashtable_bucket_foreach(slot, pos, n, hash_list)
 510     {
 511         if (pos->id == i_id) {
 512             lru_use_one(inode_lru, &pos->lru);
 513             return pos;
 514         }
 515     }
 516
 517     return NULL;
 518 }
 519
 520 void
 521 vfs_i_addhash(struct v_inode* inode)
 522 {
 523     struct hbucket* slot = &inode->sb->i_cache[inode->id & VFS_HASH_MASK];
 524
 525     hlist_delete(&inode->hash_list);
 526     hlist_add(&slot->head, &inode->hash_list);
 527 }
 528
 529 struct v_inode*
 530 vfs_i_alloc(struct v_superblock* sb)
 531 {
 532     assert(sb->ops.init_inode);
 533
 534     struct v_inode* inode;
 535     if (!(inode = cake_grab(inode_pile))) {
 536         lru_evict_half(inode_lru);
 537         if (!(inode = cake_grab(inode_pile))) {
 538             return NULL;
 539         }
 540     }
 541
 542     memset(inode, 0, sizeof(*inode));
 543     mutex_init(&inode->lock);
 544     llist_init_head(&inode->xattrs);
 545     llist_init_head(&inode->aka_dnodes);
 546
 547     sb->ops.init_inode(sb, inode);
 548
 549     inode->ctime = clock_unixtime();
 550     inode->atime = inode->ctime;
 551     inode->mtime = inode->ctime;
 552
 553     vfs_i_assign_sb(inode, sb);
 554     lru_use_one(inode_lru, &inode->lru);
 555     return inode;
 556 }
 557
 558 void
 559 vfs_i_free(struct v_inode* inode)
 560 {
 561     if (inode->pg_cache) {
 562         pcache_release(inode->pg_cache);
 563         vfree(inode->pg_cache);
 564     }
 565     // we don't need to sync inode.
 566     // If an inode can be free, then it must be properly closed.
 567     // Hence it must be synced already!
 568     if (inode->destruct) {
 569         inode->destruct(inode);
 570     }
 571
 572     vfs_sb_unref(inode->sb);
 573     hlist_delete(&inode->hash_list);
 574     cake_release(inode_pile, inode);
 575 }
 576
 577 /* ---- System call definition and support ---- */
 578
 579 // make a new name when not exists
 580 #define FLOC_MAYBE_MKNAME 1
 581
 582 // name must be non-exist and made.
 583 #define FLOC_MKNAME 2
 584
 585 // no follow symlink
 586 #define FLOC_NOFOLLOW 4
 587
 588 int
 589 vfs_getfd(int fd, struct v_fd** fd_s)
 590 {
 591     if (TEST_FD(fd) && (*fd_s = __current->fdtable->fds[fd])) {
 592         return 0;
 593     }
 594     return EBADF;
 595 }
 596
 597 static int
 598 __vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
 599             unsigned int itype, dev_t* dev)
 600 {
 601     int errno;
 602
 603     errno = parent->ops->create(parent, dnode, itype);
 604     if (errno) {
 605         return errno;
 606     }
 607
 608     return 0;
 609 }
 610
 611 struct file_locator {
 612     struct v_dnode* dir;
 613     struct v_dnode* file;
 614     bool fresh;
 615 };
 616
 617 /**
 618  * @brief unlock the file locator (floc) if possible.
 619  *        If the file to be located if not exists, and
 620  *        any FLOC_*MKNAME flag is set, then the parent
 621  *        dnode will be locked until the file has been properly
 622  *        finalised by subsequent logic.
 623  *
 624  * @param floc
 625  */
 626 static inline void
 627 __floc_try_unlock(struct file_locator* floc)
 628 {
 629     if (floc->fresh) {
 630         assert(floc->dir);
 631         unlock_dnode(floc->dir);
 632     }
 633 }
 634
 635 static int
 636 __vfs_try_locate_file(const char* path,
 637                       struct file_locator* floc,
 638                       int options)
 639 {
 640     char name_str[VFS_NAME_MAXLEN];
 641     struct v_dnode *fdir, *file;
 642     struct hstr name = HSTR(name_str, 0);
 643     int errno, woption = 0;
 644
 645     if ((options & FLOC_NOFOLLOW)) {
 646         woption |= VFS_WALK_NOFOLLOW;
 647         options &= ~FLOC_NOFOLLOW;
 648     }
 649
 650     floc->fresh = false;
 651     name_str[0] = 0;
 652     errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
 653     if (errno) {
 654         return errno;
 655     }
 656
 657     errno = vfs_walk(fdir, name.value, &file, NULL, woption);
 658
 659     if (errno && errno != ENOENT) {
 660         goto done;
 661     }
 662
 663     if (!errno) {
 664         if ((options & FLOC_MKNAME)) {
 665             errno = EEXIST;
 666         }
 667         goto done;
 668     }
 669
 670     // errno == ENOENT
 671     if (!options) {
 672         goto done;
 673     }
 674
 675     errno = vfs_check_writable(fdir);
 676     if (errno) {
 677         goto done;
 678     }
 679
 680     floc->fresh = true;
 681
 682     file = vfs_d_alloc(fdir, &name);
 683
 684     if (!file) {
 685         return ENOMEM;
 686     }
 687
 688     lock_dnode(fdir);
 689
 690     vfs_dcache_add(fdir, file);
 691
 692 done:
 693     floc->dir   = fdir;
 694     floc->file  = file;
 695
 696     return errno;
 697 }
 698
 699 int
 700 vfs_do_open(const char* path, int options)
 701 {
 702     int errno, fd, loptions = 0;
 703     struct v_dnode *dentry, *file;
 704     struct v_file* ofile = NULL;
 705     struct file_locator floc;
 706     struct v_inode* inode;
 707
 708     if ((options & FO_CREATE)) {
 709         loptions |= FLOC_MAYBE_MKNAME;
 710     } else if ((options & FO_NOFOLLOW)) {
 711         loptions |= FLOC_NOFOLLOW;
 712     }
 713
 714     errno = __vfs_try_locate_file(path, &floc, loptions);
 715
 716     if (errno || (errno = vfs_alloc_fdslot(&fd))) {
 717         return errno;
 718     }
 719
 720     file   = floc.file;
 721     dentry = floc.dir;
 722
 723     if (floc.fresh) {
 724         errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
 725         if (errno) {
 726             vfs_d_free(file);
 727             __floc_try_unlock(&floc);
 728             return errno;
 729         }
 730
 731         __floc_try_unlock(&floc);
 732     }
 733
 734
 735     if ((errno = vfs_open(file, &ofile))) {
 736         return errno;
 737     }
 738
 739     inode = ofile->inode;
 740     lock_inode(inode);
 741
 742     struct v_fd* fd_s = cake_grab(fd_pile);
 743     memset(fd_s, 0, sizeof(*fd_s));
 744
 745     if ((options & O_TRUNC)) {
 746         file->inode->fsize = 0;
 747     }
 748
 749     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 750         ofile->f_pos = 0;
 751     }
 752
 753     fd_s->file = ofile;
 754     fd_s->flags = options;
 755     __current->fdtable->fds[fd] = fd_s;
 756
 757     unlock_inode(inode);
 758
 759     return fd;
 760 }
 761
 762 __DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
 763 {
 764     int errno = vfs_do_open(path, options);
 765     return DO_STATUS_OR_RETURN(errno);
 766 }
 767
 768 __DEFINE_LXSYSCALL1(int, close, int, fd)
 769 {
 770     struct v_fd* fd_s;
 771     int errno = 0;
 772     if ((errno = vfs_getfd(fd, &fd_s))) {
 773         goto done_err;
 774     }
 775
 776     if ((errno = vfs_close(fd_s->file))) {
 777         goto done_err;
 778     }
 779
 780     cake_release(fd_pile, fd_s);
 781     __current->fdtable->fds[fd] = 0;
 782
 783 done_err:
 784     return DO_STATUS(errno);
 785 }
 786
 787 void
 788 __vfs_readdir_callback(struct dir_context* dctx,
 789                        const char* name,
 790                        const int len,
 791                        const int dtype)
 792 {
 793     struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
 794     strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
 795     dent->d_nlen = len;
 796     dent->d_type = dtype;
 797 }
 798
 799 __DEFINE_LXSYSCALL2(int, sys_readdir, int, fd, struct lx_dirent*, dent)
 800 {
 801     struct v_fd* fd_s;
 802     int errno;
 803
 804     if ((errno = vfs_getfd(fd, &fd_s))) {
 805         goto done;
 806     }
 807
 808     struct v_inode* inode = fd_s->file->inode;
 809
 810     lock_inode(inode);
 811
 812     if (!check_directory_node(inode)) {
 813         errno = ENOTDIR;
 814         goto unlock;
 815     }
 816
 817     struct dir_context dctx = (struct dir_context) {
 818         .cb_data = dent,
 819         .read_complete_callback = __vfs_readdir_callback
 820     };
 821
 822     if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
 823         goto unlock;
 824     }
 825     dent->d_offset++;
 826     fd_s->file->f_pos++;
 827
 828 unlock:
 829     unlock_inode(inode);
 830
 831 done:
 832     return DO_STATUS_OR_RETURN(errno);
 833 }
 834
 835 __DEFINE_LXSYSCALL3(int, read, int, fd, void*, buf, size_t, count)
 836 {
 837     int errno = 0;
 838     struct v_fd* fd_s;
 839     if ((errno = vfs_getfd(fd, &fd_s))) {
 840         goto done;
 841     }
 842
 843     struct v_file* file = fd_s->file;
 844     if (check_directory_node(file->inode)) {
 845         errno = EISDIR;
 846         goto done;
 847     }
 848
 849     lock_inode(file->inode);
 850
 851     file->inode->atime = clock_unixtime();
 852
 853     if (check_seqdev_node(file->inode) || (fd_s->flags & FO_DIRECT)) {
 854         errno = file->ops->read(file->inode, buf, count, file->f_pos);
 855     } else {
 856         errno = pcache_read(file->inode, buf, count, file->f_pos);
 857     }
 858
 859     if (errno > 0) {
 860         file->f_pos += errno;
 861         unlock_inode(file->inode);
 862         return errno;
 863     }
 864
 865     unlock_inode(file->inode);
 866
 867 done:
 868     return DO_STATUS(errno);
 869 }
 870
 871 __DEFINE_LXSYSCALL3(int, write, int, fd, void*, buf, size_t, count)
 872 {
 873     int errno = 0;
 874     struct v_fd* fd_s;
 875     if ((errno = vfs_getfd(fd, &fd_s))) {
 876         goto done;
 877     }
 878
 879     struct v_inode* inode;
 880     struct v_file* file = fd_s->file;
 881
 882     if ((errno = vfs_check_writable(file->dnode))) {
 883         goto done;
 884     }
 885
 886     if (check_directory_node(file->inode)) {
 887         errno = EISDIR;
 888         goto done;
 889     }
 890
 891     inode = file->inode;
 892     lock_inode(inode);
 893
 894     inode->mtime = clock_unixtime();
 895     if ((fd_s->flags & O_APPEND)) {
 896         file->f_pos = inode->fsize;
 897     }
 898
 899     if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
 900         errno = file->ops->write(inode, buf, count, file->f_pos);
 901     } else {
 902         errno = pcache_write(inode, buf, count, file->f_pos);
 903     }
 904
 905     if (errno > 0) {
 906         file->f_pos += errno;
 907         inode->fsize = MAX(inode->fsize, file->f_pos);
 908
 909         unlock_inode(inode);
 910         return errno;
 911     }
 912
 913     unlock_inode(inode);
 914
 915 done:
 916     return DO_STATUS(errno);
 917 }
 918
 919 __DEFINE_LXSYSCALL3(int, lseek, int, fd, int, offset, int, options)
 920 {
 921     int errno = 0;
 922     struct v_fd* fd_s;
 923     if ((errno = vfs_getfd(fd, &fd_s))) {
 924         goto done;
 925     }
 926
 927     struct v_file* file = fd_s->file;
 928     struct v_inode* inode = file->inode;
 929
 930     if (!file->ops->seek) {
 931         errno = ENOTSUP;
 932         goto done;
 933     }
 934
 935     lock_inode(inode);
 936
 937     int overflow = 0;
 938     int fpos = file->f_pos;
 939
 940     if (vfs_get_dtype(inode->itype) == DT_DIR) {
 941         options = (options != FSEEK_END) ? options : FSEEK_SET;
 942     }
 943
 944     switch (options) {
 945         case FSEEK_CUR:
 946             overflow = sadd_of((int)file->f_pos, offset, &fpos);
 947             break;
 948         case FSEEK_END:
 949             overflow = sadd_of((int)inode->fsize, offset, &fpos);
 950             break;
 951         case FSEEK_SET:
 952             fpos = offset;
 953             break;
 954     }
 955
 956     if (overflow) {
 957         errno = EOVERFLOW;
 958     }
 959     else {
 960         errno = file->ops->seek(file, fpos);
 961     }
 962
 963     unlock_inode(inode);
 964
 965 done:
 966     return DO_STATUS(errno);
 967 }
 968
 969 int
 970 vfs_get_path(struct v_dnode* dnode, char* buf, size_t size, int depth)
 971 {
 972     if (!dnode) {
 973         return 0;
 974     }
 975
 976     if (depth > 64) {
 977         return ENAMETOOLONG;
 978     }
 979
 980     size_t len = 0;
 981
 982     if (dnode->parent != dnode) {
 983         len = vfs_get_path(dnode->parent, buf, size, depth + 1);
 984     }
 985
 986     if (len >= size) {
 987         return len;
 988     }
 989
 990     if (!len || buf[len - 1] != VFS_PATH_DELIM) {
 991         buf[len++] = VFS_PATH_DELIM;
 992     }
 993
 994     size_t cpy_size = MIN(dnode->name.len, size - len);
 995     strncpy(buf + len, dnode->name.value, cpy_size);
 996     len += cpy_size;
 997
 998     return len;
 999 }
1000
1001 int
1002 vfs_readlink(struct v_dnode* dnode, char* buf, size_t size)
1003 {
1004     const char* link;
1005     struct v_inode* inode = dnode->inode;
1006
1007     if (!check_symlink_node(inode)) {
1008         return EINVAL;
1009     }
1010
1011     if (!inode->ops->read_symlink) {
1012         return ENOTSUP;
1013     }
1014
1015     lock_inode(inode);
1016
1017     int errno = inode->ops->read_symlink(inode, &link);
1018     if (errno >= 0) {
1019         strncpy(buf, link, MIN(size, (size_t)errno));
1020     }
1021
1022     unlock_inode(inode);
1023     return errno;
1024 }
1025
1026 int
1027 vfs_get_dtype(int itype)
1028 {
1029     int dtype = DT_FILE;
1030     if (check_itype(itype, VFS_IFSYMLINK)) {
1031         dtype |= DT_SYMLINK;
1032     }
1033
1034     if (check_itype(itype, VFS_IFDIR)) {
1035         dtype |= DT_DIR;
1036         return dtype;
1037     }
1038
1039     // TODO other types
1040
1041     return dtype;
1042 }
1043
1044 __DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
1045 {
1046     int errno;
1047     struct v_fd* fd_s;
1048     if ((errno = vfs_getfd(fd, &fd_s))) {
1049         goto done;
1050     }
1051
1052     struct v_dnode* dnode;
1053     errno = vfs_get_path(fd_s->file->dnode, buf, size, 0);
1054
1055     if (errno >= 0) {
1056         return errno;
1057     }
1058
1059 done:
1060     return DO_STATUS(errno);
1061 }
1062
1063 __DEFINE_LXSYSCALL3(int, readlink, const char*, path, char*, buf, size_t, size)
1064 {
1065     int errno;
1066     struct v_dnode* dnode;
1067     if (!(errno = vfs_walk_proc(path, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1068         errno = vfs_readlink(dnode, buf, size);
1069     }
1070
1071     if (errno >= 0) {
1072         return errno;
1073     }
1074
1075     return DO_STATUS(errno);
1076 }
1077
1078 __DEFINE_LXSYSCALL4(
1079   int, readlinkat, int, dirfd, const char*, pathname, char*, buf, size_t, size)
1080 {
1081     int errno;
1082     struct v_fd* fd_s;
1083     if ((errno = vfs_getfd(dirfd, &fd_s))) {
1084         goto done;
1085     }
1086
1087     pathname = pathname ? pathname : "";
1088
1089     struct v_dnode* dnode;
1090     if (!(errno = vfs_walk(
1091             fd_s->file->dnode, pathname, &dnode, NULL, VFS_WALK_NOFOLLOW))) {
1092         errno = vfs_readlink(fd_s->file->dnode, buf, size);
1093     }
1094
1095     if (errno >= 0) {
1096         return errno;
1097     }
1098
1099 done:
1100     return DO_STATUS(errno);
1101 }
1102
1103 /*
1104     NOTE
1105     When we perform operation that could affect the layout of
1106     directory (i.e., rename, mkdir, rmdir). We must lock the parent dir
1107     whenever possible. This will blocking any ongoing path walking to reach
1108     it hence avoid any partial state.
1109 */
1110
1111 __DEFINE_LXSYSCALL1(int, rmdir, const char*, pathname)
1112 {
1113     int errno;
1114     struct v_dnode* dnode;
1115     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1116         return DO_STATUS(errno);
1117     }
1118
1119     lock_dnode(dnode);
1120
1121     if ((errno = vfs_check_writable(dnode))) {
1122         goto done;
1123     }
1124
1125     if ((dnode->super_block->fs->types & FSTYPE_ROFS)) {
1126         errno = EROFS;
1127         goto done;
1128     }
1129
1130     if (dnode->ref_count > 1 || dnode->inode->open_count) {
1131         errno = EBUSY;
1132         goto done;
1133     }
1134
1135     if (!llist_empty(&dnode->children)) {
1136         errno = ENOTEMPTY;
1137         goto done;
1138     }
1139
1140     struct v_dnode* parent = dnode->parent;
1141
1142     if (!parent) {
1143         errno = EINVAL;
1144         goto done;
1145     }
1146
1147     lock_dnode(parent);
1148     lock_inode(parent->inode);
1149
1150     if (check_directory_node(dnode->inode)) {
1151         errno = parent->inode->ops->rmdir(parent->inode, dnode);
1152         if (!errno) {
1153             vfs_dcache_remove(dnode);
1154         }
1155     } else {
1156         errno = ENOTDIR;
1157     }
1158
1159     unlock_inode(parent->inode);
1160     unlock_dnode(parent);
1161
1162 done:
1163     unlock_dnode(dnode);
1164     return DO_STATUS(errno);
1165 }
1166
1167 __DEFINE_LXSYSCALL1(int, mkdir, const char*, path)
1168 {
1169     int errno = 0;
1170     struct v_dnode *parent, *dir;
1171     char name_value[VFS_NAME_MAXLEN];
1172     struct hstr name = HHSTR(name_value, 0, 0);
1173
1174     if ((errno = vfs_walk_proc(path, &parent, &name, VFS_WALK_PARENT))) {
1175         goto done;
1176     }
1177
1178     if (!(errno = vfs_walk(parent, name_value, &dir, NULL, 0))) {
1179         errno = EEXIST;
1180         goto done;
1181     }
1182
1183     if ((errno = vfs_check_writable(parent))) {
1184         goto done;
1185     }
1186
1187     if (!(dir = vfs_d_alloc(parent, &name))) {
1188         errno = ENOMEM;
1189         goto done;
1190     }
1191
1192     struct v_inode* inode = parent->inode;
1193
1194     lock_dnode(parent);
1195     lock_inode(inode);
1196
1197     if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
1198         errno = ENOTSUP;
1199     } else if (!inode->ops->mkdir) {
1200         errno = ENOTSUP;
1201     } else if (!check_directory_node(inode)) {
1202         errno = ENOTDIR;
1203     } else if (!(errno = inode->ops->mkdir(inode, dir))) {
1204         vfs_dcache_add(parent, dir);
1205         goto cleanup;
1206     }
1207
1208     vfs_d_free(dir);
1209
1210 cleanup:
1211     unlock_inode(inode);
1212     unlock_dnode(parent);
1213 done:
1214     return DO_STATUS(errno);
1215 }
1216
1217 int
1218 __vfs_do_unlink(struct v_dnode* dnode)
1219 {
1220     int errno;
1221     struct v_inode* inode = dnode->inode;
1222
1223     if (dnode->ref_count > 1) {
1224         return EBUSY;
1225     }
1226
1227     if ((errno = vfs_check_writable(dnode))) {
1228         return errno;
1229     }
1230
1231     lock_inode(inode);
1232
1233     if (inode->open_count) {
1234         errno = EBUSY;
1235     } else if (!check_directory_node(inode)) {
1236         errno = inode->ops->unlink(inode, dnode);
1237         if (!errno) {
1238             vfs_d_free(dnode);
1239         }
1240     } else {
1241         errno = EISDIR;
1242     }
1243
1244     unlock_inode(inode);
1245
1246     return errno;
1247 }
1248
1249 __DEFINE_LXSYSCALL1(int, unlink, const char*, pathname)
1250 {
1251     int errno;
1252     struct v_dnode* dnode;
1253     if ((errno = vfs_walk_proc(pathname, &dnode, NULL, 0))) {
1254         goto done;
1255     }
1256
1257     errno = __vfs_do_unlink(dnode);
1258
1259 done:
1260     return DO_STATUS(errno);
1261 }
1262
1263 __DEFINE_LXSYSCALL2(int, unlinkat, int, fd, const char*, pathname)
1264 {
1265     int errno;
1266     struct v_fd* fd_s;
1267     if ((errno = vfs_getfd(fd, &fd_s))) {
1268         goto done;
1269     }
1270
1271     struct v_dnode* dnode;
1272     if (!(errno = vfs_walk(fd_s->file->dnode, pathname, &dnode, NULL, 0))) {
1273         errno = __vfs_do_unlink(dnode);
1274     }
1275
1276 done:
1277     return DO_STATUS(errno);
1278 }
1279
1280 __DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
1281 {
1282     int errno;
1283     struct file_locator floc;
1284     struct v_dnode *to_link, *name_file;
1285
1286     errno = __vfs_try_locate_file(oldpath, &floc, 0);
1287     if (errno) {
1288         goto done;
1289     }
1290
1291     __floc_try_unlock(&floc);
1292
1293     to_link = floc.file;
1294     errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
1295     if (!errno) {
1296         goto done;
1297     }
1298
1299     name_file = floc.file;
1300     errno = vfs_link(to_link, name_file);
1301     if (errno) {
1302         vfs_d_free(name_file);
1303     }
1304
1305 done:
1306     __floc_try_unlock(&floc);
1307     return DO_STATUS(errno);
1308 }
1309
1310 __DEFINE_LXSYSCALL1(int, fsync, int, fildes)
1311 {
1312     int errno;
1313     struct v_fd* fd_s;
1314
1315     if (!(errno = vfs_getfd(fildes, &fd_s))) {
1316         errno = vfs_fsync(fd_s->file);
1317     }
1318
1319     return DO_STATUS(errno);
1320 }
1321
1322 int
1323 vfs_dup_fd(struct v_fd* old, struct v_fd** new)
1324 {
1325     int errno = 0;
1326     struct v_fd* copied = cake_grab(fd_pile);
1327
1328     memcpy(copied, old, sizeof(struct v_fd));
1329
1330     vfs_ref_file(old->file);
1331
1332     *new = copied;
1333
1334     return errno;
1335 }
1336
1337 int
1338 vfs_dup2(int oldfd, int newfd)
1339 {
1340     if (newfd == oldfd) {
1341         return newfd;
1342     }
1343
1344     int errno;
1345     struct v_fd *oldfd_s, *newfd_s;
1346     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1347         goto done;
1348     }
1349
1350     if (!TEST_FD(newfd)) {
1351         errno = EBADF;
1352         goto done;
1353     }
1354
1355     newfd_s = __current->fdtable->fds[newfd];
1356     if (newfd_s && (errno = vfs_close(newfd_s->file))) {
1357         goto done;
1358     }
1359
1360     if (!(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1361         __current->fdtable->fds[newfd] = newfd_s;
1362         return newfd;
1363     }
1364
1365 done:
1366     return DO_STATUS(errno);
1367 }
1368
1369 __DEFINE_LXSYSCALL2(int, dup2, int, oldfd, int, newfd)
1370 {
1371     return vfs_dup2(oldfd, newfd);
1372 }
1373
1374 __DEFINE_LXSYSCALL1(int, dup, int, oldfd)
1375 {
1376     int errno, newfd;
1377     struct v_fd *oldfd_s, *newfd_s;
1378     if ((errno = vfs_getfd(oldfd, &oldfd_s))) {
1379         goto done;
1380     }
1381
1382     if (!(errno = vfs_alloc_fdslot(&newfd)) &&
1383         !(errno = vfs_dup_fd(oldfd_s, &newfd_s))) {
1384         __current->fdtable->fds[newfd] = newfd_s;
1385         return newfd;
1386     }
1387
1388 done:
1389     return DO_STATUS(errno);
1390 }
1391
1392 __DEFINE_LXSYSCALL2(
1393   int, symlink, const char*, pathname, const char*, link_target)
1394 {
1395     int errno;
1396     struct file_locator floc;
1397     struct v_dnode *file;
1398     struct v_inode *f_ino;
1399
1400     errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
1401     if (errno) {
1402         goto done;
1403     }
1404
1405     file = floc.file;
1406     errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
1407     if (errno) {
1408         vfs_d_free(file);
1409         goto done;
1410     }
1411
1412     f_ino = file->inode;
1413
1414     assert(f_ino);
1415
1416     errno = vfs_check_writable(file);
1417     if (errno) {
1418         goto done;
1419     }
1420
1421     if (!f_ino->ops->set_symlink) {
1422         errno = ENOTSUP;
1423         goto done;
1424     }
1425
1426     lock_inode(f_ino);
1427
1428     errno = f_ino->ops->set_symlink(f_ino, link_target);
1429
1430     unlock_inode(f_ino);
1431
1432 done:
1433     __floc_try_unlock(&floc);
1434     return DO_STATUS(errno);
1435 }
1436
1437 int
1438 vfs_do_chdir(struct proc_info* proc, struct v_dnode* dnode)
1439 {
1440     int errno = 0;
1441
1442     lock_dnode(dnode);
1443
1444     if (!check_directory_node(dnode->inode)) {
1445         errno = ENOTDIR;
1446         goto done;
1447     }
1448
1449     if (proc->cwd) {
1450         vfs_unref_dnode(proc->cwd);
1451     }
1452
1453     vfs_ref_dnode(dnode);
1454     proc->cwd = dnode;
1455
1456     unlock_dnode(dnode);
1457
1458 done:
1459     return errno;
1460 }
1461
1462 __DEFINE_LXSYSCALL1(int, chdir, const char*, path)
1463 {
1464     struct v_dnode* dnode;
1465     int errno = 0;
1466
1467     if ((errno = vfs_walk_proc(path, &dnode, NULL, 0))) {
1468         goto done;
1469     }
1470
1471     errno = vfs_do_chdir((struct proc_info*)__current, dnode);
1472
1473 done:
1474     return DO_STATUS(errno);
1475 }
1476
1477 __DEFINE_LXSYSCALL1(int, fchdir, int, fd)
1478 {
1479     struct v_fd* fd_s;
1480     int errno = 0;
1481
1482     if ((errno = vfs_getfd(fd, &fd_s))) {
1483         goto done;
1484     }
1485
1486     errno = vfs_do_chdir((struct proc_info*)__current, fd_s->file->dnode);
1487
1488 done:
1489     return DO_STATUS(errno);
1490 }
1491
1492 __DEFINE_LXSYSCALL2(char*, getcwd, char*, buf, size_t, size)
1493 {
1494     int errno = 0;
1495     char* ret_ptr = 0;
1496     if (size < 2) {
1497         errno = ERANGE;
1498         goto done;
1499     }
1500
1501     size_t len = 0;
1502
1503     if (!__current->cwd) {
1504         *buf = VFS_PATH_DELIM;
1505         len = 1;
1506     } else {
1507         len = vfs_get_path(__current->cwd, buf, size, 0);
1508         if (len == size) {
1509             errno = ERANGE;
1510             goto done;
1511         }
1512     }
1513
1514     buf[len] = '\0';
1515
1516     ret_ptr = buf;
1517
1518 done:
1519     syscall_result(errno);
1520     return ret_ptr;
1521 }
1522
1523 int
1524 vfs_do_rename(struct v_dnode* current, struct v_dnode* target)
1525 {
1526     int errno = 0;
1527     if (current->inode->id == target->inode->id) {
1528         // hard link
1529         return 0;
1530     }
1531
1532     if ((errno = vfs_check_writable(current))) {
1533         return errno;
1534     }
1535
1536     if (current->ref_count > 1 || target->ref_count > 1) {
1537         return EBUSY;
1538     }
1539
1540     if (current->super_block != target->super_block) {
1541         return EXDEV;
1542     }
1543
1544     struct v_dnode* oldparent = current->parent;
1545     struct v_dnode* newparent = target->parent;
1546
1547     lock_dnode(current);
1548     lock_dnode(target);
1549     if (oldparent)
1550         lock_dnode(oldparent);
1551     if (newparent)
1552         lock_dnode(newparent);
1553
1554     if (!llist_empty(&target->children)) {
1555         errno = ENOTEMPTY;
1556         unlock_dnode(target);
1557         goto cleanup;
1558     }
1559
1560     if ((errno =
1561            current->inode->ops->rename(current->inode, current, target))) {
1562         unlock_dnode(target);
1563         goto cleanup;
1564     }
1565
1566     // re-position current
1567     hstrcpy(&current->name, &target->name);
1568     vfs_dcache_rehash(newparent, current);
1569
1570     // detach target
1571     vfs_d_free(target);
1572
1573     unlock_dnode(target);
1574
1575 cleanup:
1576     unlock_dnode(current);
1577     if (oldparent)
1578         unlock_dnode(oldparent);
1579     if (newparent)
1580         unlock_dnode(newparent);
1581
1582     return errno;
1583 }
1584
1585 __DEFINE_LXSYSCALL2(int, rename, const char*, oldpath, const char*, newpath)
1586 {
1587     struct v_dnode *cur, *target_parent, *target;
1588     struct hstr name = HSTR(valloc(VFS_NAME_MAXLEN), 0);
1589     int errno = 0;
1590
1591     if ((errno = vfs_walk_proc(oldpath, &cur, NULL, 0))) {
1592         goto done;
1593     }
1594
1595     if ((errno = vfs_walk(
1596            __current->cwd, newpath, &target_parent, &name, VFS_WALK_PARENT))) {
1597         goto done;
1598     }
1599
1600     errno = vfs_walk(target_parent, name.value, &target, NULL, 0);
1601     if (errno == ENOENT) {
1602         target = vfs_d_alloc(target_parent, &name);
1603         vfs_dcache_add(target_parent, target);
1604     } else if (errno) {
1605         goto done;
1606     }
1607
1608     if (!target) {
1609         errno = ENOMEM;
1610         goto done;
1611     }
1612
1613     errno = vfs_do_rename(cur, target);
1614
1615 done:
1616     vfree((void*)name.value);
1617     return DO_STATUS(errno);
1618 }
1619
1620 __DEFINE_LXSYSCALL2(int, fstat, int, fd, struct file_stat*, stat)
1621 {
1622     int errno = 0;
1623     struct v_fd* fds;
1624
1625     if ((errno = vfs_getfd(fd, &fds))) {
1626         goto done;
1627     }
1628
1629     struct v_inode* vino = fds->file->inode;
1630     struct device* fdev = vino->sb->dev;
1631
1632     *stat = (struct file_stat){.st_ino = vino->id,
1633                                .st_blocks = vino->lb_usage,
1634                                .st_size = vino->fsize,
1635                                .mode = vino->itype,
1636                                .st_ioblksize = PAGE_SIZE,
1637                                .st_blksize = vino->sb->blksize};
1638
1639     if (check_device_node(vino)) {
1640         struct device* rdev = resolve_device(vino->data);
1641         if (!rdev) {
1642             errno = EINVAL;
1643             goto done;
1644         }
1645
1646         stat->st_rdev = (dev_t){.meta = rdev->ident.fn_grp,
1647                                 .unique = rdev->ident.unique,
1648                                 .index = dev_uid(rdev) };
1649     }
1650
1651     if (fdev) {
1652         stat->st_dev = (dev_t){.meta = fdev->ident.fn_grp,
1653                                .unique = fdev->ident.unique,
1654                                .index = dev_uid(fdev) };
1655     }
1656
1657 done:
1658     return DO_STATUS(errno);
1659 }