Second Extended Filesystem (ext2) and other improvements (#33)
[lunaix-os.git] / lunaix-os / kernel / fs / ext2 / inode.c
diff --git a/lunaix-os/kernel/fs/ext2/inode.c b/lunaix-os/kernel/fs/ext2/inode.c
new file mode 100644 (file)
index 0000000..389bdea
--- /dev/null
@@ -0,0 +1,1088 @@
+#include <lunaix/fs/api.h>
+#include <lunaix/mm/valloc.h>
+
+#include <klibc/string.h>
+
+#include "ext2.h"
+
+static struct v_inode_ops ext2_inode_ops = {
+    .dir_lookup = ext2dr_lookup,
+    .open  = ext2_open_inode,
+    .mkdir = ext2_mkdir,
+    .rmdir = ext2_rmdir,
+    .read_symlink = ext2_get_symlink,
+    .set_symlink  = ext2_set_symlink,
+    .rename = ext2_rename,
+    .link = ext2_link,
+    .unlink = ext2_unlink,
+    .create = ext2_create,
+    .sync = ext2_sync_inode
+};
+
+static struct v_file_ops ext2_file_ops = {
+    .close = ext2_close_inode,
+    
+    .read = ext2_inode_read,
+    .read_page = ext2_inode_read_page,
+    
+    .write = ext2_inode_write,
+    .write_page = ext2_inode_write_page,
+    
+    .readdir = ext2dr_read,
+    .seek = ext2_seek_inode,
+    .sync = ext2_file_sync
+};
+
+#define to_tag(e_ino, val)        \
+        (((val) >> (e_ino)->inds_lgents) | (1 << msbiti))
+#define valid_tag(tag)      ((tag) & (1 << msbiti))
+
+static void
+__btlb_insert(struct ext2_inode* e_inode, unsigned int blkid, bbuf_t buf)
+{
+    struct ext2_btlb* btlb;
+    struct ext2_btlb_entry* btlbe = NULL;
+    unsigned int cap_sel;
+    if (unlikely(!blkid)) {
+        return;
+    }
+
+    btlb = e_inode->btlb;
+
+    for (int i = 0; i < BTLB_SETS; i++)
+    {
+        if (valid_tag(btlb->buffer[i].tag)) {
+            continue;
+        }
+
+        btlbe = &btlb->buffer[i];
+        goto found;
+    }
+
+    /*
+        we have triggered the capacity miss.
+        since most file operation is heavily linear and strong
+        locality, we place our bet on it and avoid go through
+        the whole overhead of LRU eviction stuff. Just a trival
+        random eviction will do the fine job
+    */
+    cap_sel = hash_32(blkid, ilog2(BTLB_SETS));
+    btlbe = &btlb->buffer[cap_sel];
+
+    fsblock_put(btlbe->block);
+
+found:
+    btlbe->tag = to_tag(e_inode, blkid);
+    btlbe->block = fsblock_take(buf);
+}
+
+static bbuf_t
+__btlb_hit(struct ext2_inode* e_inode, unsigned int blkid)
+{
+    struct ext2_btlb* btlb;
+    struct ext2_btlb_entry* btlbe = NULL;
+    unsigned int in_tag, ref_cnts;
+
+    btlb = e_inode->btlb;
+    in_tag = to_tag(e_inode, blkid);
+
+    for (int i = 0; i < BTLB_SETS; i++)
+    {
+        btlbe = &btlb->buffer[i];
+
+        if (btlbe->tag != in_tag) {
+            continue;
+        }
+        
+        ref_cnts = blkbuf_refcounts(btlbe->block);
+        if (!ref_cnts) {
+            btlbe->tag = 0;
+            btlbe->block = bbuf_null;
+            break;
+        }
+
+        return fsblock_take(btlbe->block);
+    }
+
+    return NULL;
+}
+
+static void
+__btlb_flushall(struct ext2_inode* e_inode)
+{
+    struct ext2_btlb* btlb;
+    struct ext2_btlb_entry* btlbe = NULL;
+
+    btlb = e_inode->btlb;
+
+    for (int i = 0; i < BTLB_SETS; i++)
+    {
+        btlbe = &btlb->buffer[i];
+        if (!valid_tag(btlbe->tag)) {
+            continue;
+        }
+
+        btlbe->tag = 0;
+        fsblock_put(btlbe->block);
+    }
+}
+
+void
+ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode)
+{
+    struct ext2_inode* e_ino;
+
+    e_ino = EXT2_INO(inode);
+    *iter = (struct ext2_iterator){
+        .pos = 0,
+        .inode = inode,
+        .blksz = inode->sb->blksize,
+        .end_pos = ICEIL(e_ino->isize, inode->sb->blksize)
+    };
+}
+
+void
+ext2db_itreset(struct ext2_iterator* iter)
+{
+    if (likely(iter->sel_buf)) {
+        fsblock_put(iter->sel_buf);
+        iter->sel_buf = NULL;
+    }
+
+    iter->pos = 0;
+}
+
+int
+ext2db_itffw(struct ext2_iterator* iter, int count)
+{
+    iter->pos += count;
+    return count;
+}
+
+void
+ext2db_itend(struct ext2_iterator* iter)
+{
+    if (likely(iter->sel_buf)) {
+        fsblock_put(iter->sel_buf);
+        iter->sel_buf = NULL;
+    }
+}
+
+bool
+ext2db_itnext(struct ext2_iterator* iter)
+{
+    bbuf_t buf;
+
+    if (unlikely(iter->has_error)) {
+        return false;
+    }
+
+    if (unlikely(iter->pos > iter->end_pos)) {
+        return false;
+    }
+
+    if (likely(iter->sel_buf)) {
+        fsblock_put(iter->sel_buf);
+    }
+
+    buf = ext2db_get(iter->inode, iter->pos);
+    iter->sel_buf = buf;
+
+    if (!buf || !ext2_itcheckbuf(iter)) {
+        return false;
+    }
+
+    iter->pos++;
+    iter->data = blkbuf_data(buf);
+
+    return true;
+}
+
+void
+ext2ino_init(struct v_superblock* vsb, struct v_inode* inode)
+{
+    // Placeholder, to make vsb happy
+}
+
+static void
+__destruct_ext2_inode(struct ext2_inode* e_inode)
+{
+    __btlb_flushall(e_inode);
+
+    fsblock_put(e_inode->ind_ord1);
+    fsblock_put(e_inode->buf);
+
+    ext2gd_put(e_inode->blk_grp);
+
+    vfree_safe(e_inode->symlink);
+    vfree(e_inode->btlb);
+    vfree(e_inode);
+}
+
+static void
+ext2_destruct_inode(struct v_inode* inode)
+{
+    struct ext2_inode* e_inode;
+
+    e_inode = EXT2_INO(inode);
+
+    assert(e_inode);
+    __destruct_ext2_inode(e_inode);
+}
+
+static inline void
+__ext2ino_fill_common(struct v_inode* inode, ino_t ino_id)
+{
+    fsapi_inode_setid(inode, ino_id, ino_id);
+    fsapi_inode_setfops(inode, &ext2_file_ops);
+    fsapi_inode_setops(inode, &ext2_inode_ops);
+    fsapi_inode_setdector(inode, ext2_destruct_inode);
+}
+
+
+static unsigned int
+__translate_vfs_itype(unsigned int v_itype)
+{
+    unsigned int e_itype = IMODE_IFREG;
+
+    if (v_itype == VFS_IFFILE) {
+        e_itype = IMODE_IFREG;
+    }
+    else if (check_itype(v_itype, VFS_IFDIR)) {
+        e_itype = IMODE_IFDIR;
+        e_itype |= IMODE_UEX;
+    }
+    else if (check_itype(v_itype, VFS_IFSEQDEV)) {
+        e_itype = IMODE_IFCHR;
+    }
+    else if (check_itype(v_itype, VFS_IFVOLDEV)) {
+        e_itype = IMODE_IFBLK;
+    }
+    
+    if (check_itype(v_itype, VFS_IFSYMLINK)) {
+        e_itype |= IMODE_IFLNK;
+    }
+
+    // FIXME we keep this until we have our own user manager
+    e_itype |= (IMODE_URD | IMODE_GRD | IMODE_ORD);
+    return e_itype;
+}
+
+int
+ext2ino_fill(struct v_inode* inode, ino_t ino_id)
+{
+    struct ext2_sbinfo* sb;
+    struct ext2_inode* e_ino;
+    struct v_superblock* vsb;
+    struct ext2b_inode* b_ino;
+    unsigned int type = VFS_IFFILE;
+    int errno = 0;
+
+    vsb = inode->sb;
+    sb = EXT2_SB(vsb);
+
+    if ((errno = ext2ino_get(vsb, ino_id, &e_ino))) {
+        return errno;
+    }
+    
+    b_ino = e_ino->ino;
+    ino_id = e_ino->ino_id;
+
+    fsapi_inode_setsize(inode, e_ino->isize);
+    
+    fsapi_inode_settime(inode, b_ino->i_ctime, 
+                               b_ino->i_mtime, 
+                               b_ino->i_atime);
+    
+    __ext2ino_fill_common(inode, ino_id);
+
+    if (check_itype(b_ino->i_mode, IMODE_IFLNK)) {
+        type = VFS_IFSYMLINK;
+    }
+    else if (check_itype(b_ino->i_mode, IMODE_IFDIR)) {
+        type = VFS_IFDIR;
+    }
+    else if (check_itype(b_ino->i_mode, IMODE_IFCHR)) {
+        type = VFS_IFSEQDEV;
+    }
+    else if (check_itype(b_ino->i_mode, IMODE_IFBLK)) {
+        type = VFS_IFVOLDEV;
+    }
+
+    fsapi_inode_settype(inode, type);
+
+    fsapi_inode_complete(inode, e_ino);
+
+    return 0;
+}
+
+static int
+__get_group_desc(struct v_superblock* vsb, int ino, 
+                 struct ext2_gdesc** gd_out)
+{
+    unsigned int blkgrp_id;
+    struct ext2_sbinfo* sb;
+    
+    sb = EXT2_SB(vsb);
+
+    blkgrp_id = to_fsblock_id(ino) / sb->raw->s_ino_per_grp;
+    return ext2gd_take(vsb, blkgrp_id, gd_out);
+}
+
+static struct ext2b_inode*
+__get_raw_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, 
+                bbuf_t* buf_out, int ino_index)
+{
+    bbuf_t ino_tab;
+    struct ext2_sbinfo* sb;
+    struct ext2b_inode* b_inode;
+    unsigned int ino_tab_sel, ino_tab_off, tab_partlen;
+
+    assert(buf_out);
+
+    sb = gd->sb;
+    tab_partlen = sb->block_size / sb->raw->s_ino_size;
+    ino_tab_sel = ino_index / tab_partlen;
+    ino_tab_off = ino_index % tab_partlen;
+
+    ino_tab = fsblock_get(vsb, gd->info->bg_ino_tab + ino_tab_sel);
+    if (blkbuf_errbuf(ino_tab)) {
+        return NULL;
+    }
+
+    b_inode = (struct ext2b_inode*)blkbuf_data(ino_tab);
+    b_inode = &b_inode[ino_tab_off];
+    
+    *buf_out = ino_tab;
+    
+    return b_inode;
+}
+
+static struct ext2_inode*
+__create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index)
+{
+    bbuf_t ino_tab;
+    struct ext2_sbinfo* sb;
+    struct ext2b_inode* b_inode;
+    struct ext2_inode* inode;
+    unsigned int ind_ents;
+    size_t inds_blks;
+
+    sb = gd->sb;
+    b_inode = __get_raw_inode(vsb, gd, &ino_tab, ino_index);
+    if (!b_inode) {
+        return NULL;
+    }
+    
+    inode            = vzalloc(sizeof(*inode));
+    inode->btlb      = vzalloc(sizeof(struct ext2_btlb));
+    inode->buf       = ino_tab;
+    inode->ino       = b_inode;
+    inode->blk_grp   = gd;
+    inode->isize     = b_inode->i_size;
+
+    if (ext2_feature(vsb, FEAT_LARGE_FILE)) {
+        inode->isize |= (size_t)((u64_t)(b_inode->i_size_h32) << 32);
+    }
+
+    if (b_inode->i_blocks) {
+        inds_blks  = (size_t)b_inode->i_blocks;
+        inds_blks -= ICEIL(inode->isize, 512);
+        inds_blks /= (sb->block_size / 512);
+
+        inode->indirect_blocks = inds_blks;
+    }
+
+    ind_ents = sb->block_size / sizeof(int);
+    assert(is_pot(ind_ents));
+
+    inode->inds_lgents = ilog2(ind_ents);
+    inode->ino_id = gd->ino_base + to_ext2ino_id(ino_index);
+
+    return inode;
+}
+
+int
+ext2ino_get_fast(struct v_superblock* vsb, 
+                 unsigned int ino, struct ext2_fast_inode* fast_ino)
+{
+    int errno;
+    bbuf_t ino_tab;
+    struct ext2_gdesc* gd;
+    struct ext2_sbinfo* sb;
+    struct ext2b_inode* b_inode;
+    unsigned int ino_rel_id;
+
+    sb = EXT2_SB(vsb);
+    errno = __get_group_desc(vsb, ino, &gd);
+    if (errno) {
+        return errno;
+    }
+
+    ino_rel_id  = to_fsblock_id(ino) % sb->raw->s_ino_per_grp;
+    b_inode = __get_raw_inode(vsb, gd, &ino_tab, ino_rel_id);
+
+    fast_ino->buf = ino_tab;
+    fast_ino->ino = b_inode;
+
+    return 0;
+}
+
+int
+ext2ino_get(struct v_superblock* vsb, 
+            unsigned int ino, struct ext2_inode** out)
+{
+    struct ext2_sbinfo* sb;
+    struct ext2_inode* inode;
+    struct ext2_gdesc* gd;
+    struct ext2b_inode* b_inode;
+    unsigned int ino_rel_id;
+    unsigned int tab_partlen;
+    unsigned int ind_ents, prima_ind;
+    int errno = 0;
+    
+    sb = EXT2_SB(vsb);
+
+    if ((errno = __get_group_desc(vsb, ino, &gd))) {
+        return errno;
+    }
+
+    ino_rel_id  = to_fsblock_id(ino) % sb->raw->s_ino_per_grp;
+    inode = __create_inode(vsb, gd, ino_rel_id);
+    if (!inode) {
+        return EIO;
+    }
+    
+    b_inode = inode->ino;
+    prima_ind = b_inode->i_block.ind1;
+    *out = inode;
+
+    if (!prima_ind) {
+        return errno;
+    }
+
+    inode->ind_ord1 = fsblock_get(vsb, prima_ind);
+    if (blkbuf_errbuf(inode->ind_ord1)) {
+        vfree(inode->btlb);
+        vfree(inode);
+        *out = NULL;
+        return EIO;
+    }
+
+    return errno;
+}
+
+int
+ext2ino_alloc(struct v_superblock* vsb, 
+                 struct ext2_inode* hint, struct ext2_inode** out)
+{
+    int free_ino_idx;
+    struct ext2_gdesc* gd;
+    struct ext2_inode* inode;
+
+    free_ino_idx = ALLOC_FAIL;
+    if (hint) {
+        gd = hint->blk_grp;
+        free_ino_idx = ext2gd_alloc_inode(gd);
+    }
+
+    // locality hinted alloc failed, try entire fs
+    if (!valid_bmp_slot(free_ino_idx)) {
+        free_ino_idx = ext2ino_alloc_slot(vsb, &gd);
+    }
+
+    if (!valid_bmp_slot(free_ino_idx)) {
+        return EDQUOT;
+    }
+
+    inode = __create_inode(vsb, gd, free_ino_idx);
+    if (!inode) {
+        // what a shame!
+        ext2gd_free_inode(gd, free_ino_idx);
+        return EIO;
+    }
+
+    memset(inode->ino, 0, sizeof(*inode->ino));
+    fsblock_dirty(inode->buf);
+
+    *out = inode;
+    return 0;
+}
+
+static inline int
+__free_block_at(struct v_superblock *vsb, unsigned int block_pos)
+{
+    int errno, gd_index;
+    struct ext2_gdesc* gd;
+    struct ext2_sbinfo * sb;
+
+    if (!block_pos) {
+        return 0;
+    }
+
+    block_pos = ext2_datablock(vsb, block_pos);
+
+    sb = EXT2_SB(vsb);
+    gd_index = block_pos / sb->raw->s_blk_per_grp;
+
+    if ((errno = ext2gd_take(vsb, gd_index, &gd))) {
+        return errno;
+    }
+
+    assert(block_pos >= gd->base);
+    ext2gd_free_block(gd, block_pos - gd->base);
+
+    ext2gd_put(gd);
+    return 0;
+}
+
+static int
+__free_recurisve_from(struct v_superblock *vsb, struct ext2_inode* inode,
+                      struct walk_stack* stack, int depth)
+{
+    bbuf_t tab;
+    int idx, len, errno;
+    u32_t* db_tab;
+
+    int ind_entries = 1 << inode->inds_lgents;
+    int max_len[] = { 15, ind_entries, ind_entries, ind_entries }; 
+
+    u32_t* tables  = stack->tables;
+    u32_t* indices = stack->indices;
+
+    if (depth > MAX_INDS_DEPTH || !tables[depth]) {
+        return 0;
+    }
+
+    idx = indices[depth];
+    len = max_len[depth];
+    tab = fsblock_get(vsb, ext2_datablock(vsb, tables[depth]));
+
+    if (blkbuf_errbuf(tab)) {
+        return EIO;
+    }
+
+    db_tab = blkbuf_data(tab);
+    if (depth == 0) {
+        int offset = offsetof(struct ext2b_inode, i_block_arr);
+        db_tab = offset(db_tab, offset);
+    }
+    
+    errno = 0;
+    indices[depth] = 0;
+
+    for (; idx < len; idx++)
+    {
+        u32_t db_id = db_tab[idx];
+
+        if (!db_id) {
+            continue;
+        }
+
+        if (depth >= MAX_INDS_DEPTH) {
+            goto cont;
+        }
+
+        tables[depth] = db_id;
+        errno = __free_recurisve_from(vsb, inode, stack, depth + 1);
+        if (errno) {
+            break;
+        }
+
+cont:
+        __free_block_at(vsb, db_id);
+        db_tab[idx] = 0;
+    }
+
+    fsblock_dirty(tab);
+    fsblock_put(tab);
+    return errno;
+}
+
+int
+ext2ino_free(struct v_inode* inode)
+{
+    int errno = 0;
+    unsigned int ino_slot;
+    struct ext2_inode*  e_ino;
+    struct ext2_gdesc*  e_gd;
+    struct ext2b_inode* b_ino;
+    struct ext2_sbinfo* sb;
+
+    sb    = EXT2_SB(inode->sb);
+    e_ino = EXT2_INO(inode);
+    b_ino = e_ino->ino;
+    e_gd  = e_ino->blk_grp;
+
+    assert_fs(b_ino->i_lnk_cnt > 0);
+    fsblock_dirty(e_ino->buf);
+
+    b_ino->i_lnk_cnt--;
+    if (b_ino->i_lnk_cnt >= 1) {
+        return 0;
+    }
+
+    ext2ino_resizing(inode, 0);
+
+    ino_slot = e_ino->ino_id;
+    ino_slot = to_fsblock_id(ino_slot - e_gd->base);
+    ext2gd_free_inode(e_ino->blk_grp, ino_slot);
+
+    __destruct_ext2_inode(e_ino);
+
+    inode->data = NULL;
+
+    return errno;
+}
+
+static void
+__update_inode_access_metadata(struct ext2b_inode* b_ino, 
+                        struct v_inode* inode)
+{
+    b_ino->i_ctime = inode->ctime;
+    b_ino->i_atime = inode->atime;
+    b_ino->i_mtime = inode->mtime;
+}
+
+static inline void
+__update_inode_size(struct v_inode* inode, size_t size)
+{
+    struct ext2b_inode* b_ino;
+    struct ext2_inode*  e_ino;
+
+    e_ino = EXT2_INO(inode);
+    b_ino = e_ino->ino;
+
+    e_ino->isize = size;
+    
+    if (ext2_feature(inode->sb, FEAT_LARGE_FILE)) {
+        b_ino->i_size_l32 = (unsigned int)size;
+        b_ino->i_size_h32 = (unsigned int)((u64_t)size >> 32);
+    }
+    else {
+        b_ino->i_size  = size;
+    }
+
+    b_ino->i_blocks = ICEIL(size, 512);
+    b_ino->i_blocks += e_ino->indirect_blocks;
+}
+
+int
+ext2ino_make(struct v_superblock* vsb, unsigned int itype, 
+             struct ext2_inode* hint, struct v_inode** out)
+{
+    int errno = 0;
+    struct ext2_inode* e_ino;
+    struct ext2b_inode* b_ino;
+    struct v_inode* inode;
+
+    errno = ext2ino_alloc(vsb, hint, &e_ino);
+    if (errno) {
+        return errno;
+    }
+
+    b_ino = e_ino->ino;
+    inode = vfs_i_alloc(vsb);
+    
+    __ext2ino_fill_common(inode, e_ino->ino_id);
+
+    __update_inode_access_metadata(b_ino, inode);
+    b_ino->i_mode  = __translate_vfs_itype(itype);
+
+    fsapi_inode_settype(inode, itype);
+    fsapi_inode_complete(inode, e_ino);
+
+    *out = inode;
+    return errno;
+}
+
+int
+ext2_create(struct v_inode* this, struct v_dnode* dnode, unsigned int itype)
+{
+    int errno;
+    struct v_inode* created;
+    
+    errno = ext2ino_make(this->sb, itype, EXT2_INO(this), &created);
+    if (errno) {
+        return errno;
+    }
+
+    return ext2_link(created, dnode);
+}
+
+int
+ext2_link(struct v_inode* this, struct v_dnode* new_name)
+{
+    int errno = 0;
+    struct v_inode* parent;
+    struct ext2_inode* e_ino;
+    struct ext2_dnode* e_dno;
+    struct ext2b_dirent dirent;
+    
+    e_ino  = EXT2_INO(this);
+    parent = fsapi_dnode_parent(new_name);
+
+    ext2dr_setup_dirent(&dirent, this, &new_name->name);
+    ext2ino_linkto(e_ino, &dirent);
+    
+    errno = ext2dr_insert(parent, &dirent, &e_dno);
+    if (errno) {
+        goto done;
+    }
+
+    new_name->data = e_dno;
+    vfs_assign_inode(new_name, this);
+
+done:
+    return errno;
+}
+
+int
+ext2_unlink(struct v_inode* this, struct v_dnode* name)
+{
+    int errno = 0;
+    struct ext2_inode* e_ino;
+    struct ext2_dnode* e_dno;
+
+    e_ino = EXT2_INO(this);
+    e_dno = EXT2_DNO(name);
+
+    assert_fs(e_dno);
+    assert_fs(e_dno->self.dirent->inode == e_ino->ino_id);
+    
+    errno = ext2dr_remove(e_dno);
+    if (errno) {
+        return errno;
+    }
+
+    return ext2ino_free(this);
+}
+
+void
+ext2ino_update(struct v_inode* inode)
+{
+    struct ext2_inode* e_ino;
+    
+    e_ino = EXT2_INO(inode);
+    __update_inode_access_metadata(e_ino->ino, inode);
+
+    fsblock_dirty(e_ino->buf);
+}
+
+/* ******************* Data Blocks ******************* */
+
+static inline void
+__walkstate_set_stack(struct walk_state* state, int depth,
+                      bbuf_t tab, unsigned int index)
+{
+    state->stack.tables[depth] = fsblock_id(tab);
+    state->stack.indices[depth] = index;
+}
+
+/**
+ * @brief Walk the indrection chain given the position of data block
+ *        relative to the inode. Upon completed, walk_state will be
+ *        populated with result. On error, walk_state is untouched.
+ * 
+ *        Note, the result will always be one above the stopping level. 
+ *        That means, if your pos is pointed directly to file-content block
+ *        (i.e., a leaf block), then the state is the indirect block that
+ *        containing the ID of that leaf block.
+ *        
+ *        If `resolve` is set, it will resolve any absence encountered
+ *        during the walk by allocating and chaining indirect block.
+ *        It require the file system is mounted writable.
+ * 
+ * @param inode     inode to walk
+ * @param pos       flattened data block position to be located
+ * @param state     contain the walk result
+ * @param resolve   whether to auto allocate the indirection structure during 
+ *                  walk if `pos` is not exist.
+ * @return int 
+ */
+static int
+__walk_indirects(struct v_inode* inode, unsigned int pos,
+                 struct walk_state* state, bool resolve, bool full_walk)
+{
+    int errno;
+    int inds, stride, shifts, level;
+    unsigned int *slotref, index, next, mask;
+    struct ext2_inode* e_inode;
+    struct ext2b_inode* b_inode;
+    struct v_superblock* vsb;
+    bbuf_t table, next_table;
+
+    e_inode = EXT2_INO(inode);
+    b_inode = e_inode->ino;
+    vsb = inode->sb;
+    level = 0;
+    resolve = resolve && !EXT2_SB(vsb)->read_only;
+
+    if (pos < 12) {
+        index = pos;
+        slotref = &b_inode->i_block_arr[pos];
+        table = fsblock_take(e_inode->buf);
+        inds = 0;
+        goto _return;
+    }
+
+    pos -= 12;
+    stride = e_inode->inds_lgents;
+    if (!(pos >> stride)) {
+        inds = 1;
+    }
+    else if (!(pos >> (stride * 2))) {
+        inds = 2;
+    }
+    else if (!(pos >> (stride * 3))) {
+        inds = 3;
+    }
+    else {
+        fail("unrealistic block pos");
+    }
+
+    // bTLB cache the last level indirect block
+    if (!full_walk && (table = __btlb_hit(e_inode, pos))) {
+        level = inds;
+        index = pos & ((1 << stride) - 1);
+        slotref = &block_buffer(table, u32_t)[index];
+        goto _return;
+    }
+
+    shifts = stride * (inds - 1);
+    mask = ((1 << stride) - 1) << shifts;
+
+    index   = 12 + inds - 1;
+    slotref = &b_inode->i_block.inds[inds - 1];
+    table   = fsblock_take(e_inode->buf);
+
+    for (; level < inds; level++)
+    {
+        __walkstate_set_stack(state, level, table, index);
+
+        next = *slotref;
+        if (!next) {
+            if (!resolve) {
+                goto _return;
+            }
+
+            if ((errno = ext2db_alloc(inode, &next_table))) {
+                fsblock_put(table);
+                return errno;
+            }
+
+            e_inode->indirect_blocks++;
+            *slotref = fsblock_id(next_table);
+            fsblock_dirty(table);
+        }
+        else {
+            next_table = fsblock_get(vsb, next);
+        }
+
+        fsblock_put(table);
+        table = next_table;
+
+        if (blkbuf_errbuf(table)) {
+            return EIO;
+        }
+
+        assert(shifts >= 0);
+
+        index = (pos & mask) >> shifts;
+
+        slotref = &block_buffer(table, u32_t)[index];
+
+        shifts -= stride;
+        mask  = mask >> stride;
+    }
+
+    __btlb_insert(e_inode, pos, table);
+
+_return:
+    assert(blkbuf_refcounts(table) >= 1);
+    assert_fs(table);
+    assert_fs(slotref);
+
+    state->slot_ref = slotref;
+    state->table = table;
+    state->level = level;
+    state->indirections = inds;
+
+    __walkstate_set_stack(state, level, table, index);
+
+    return 0;
+}
+
+bbuf_t
+ext2db_get(struct v_inode* inode, unsigned int data_pos)
+{
+    int errno;
+    unsigned int blkid;
+    struct walk_state state;
+
+    ext2walk_init_state(&state);
+
+    errno = __walk_indirects(inode, data_pos, &state, false, false);
+    if (errno) {
+        return (bbuf_t)INVL_BUFFER;
+    }
+
+    blkid = *state.slot_ref;
+    
+    ext2walk_free_state(&state);
+    
+    if (!blkid) {
+        return NULL;
+    }
+
+    return fsblock_get(inode->sb, blkid);
+}
+
+int
+ext2db_acquire(struct v_inode* inode, unsigned int data_pos, bbuf_t* out)
+{
+    int errno = 0;
+    bbuf_t buf;
+    unsigned int block_id;
+    struct walk_state state;
+
+    ext2walk_init_state(&state);
+
+    errno = __walk_indirects(inode, data_pos, &state, true, false);
+    if (errno) {
+        return errno;
+    }
+
+    block_id = *state.slot_ref;
+    if (block_id) {
+        buf = fsblock_get(inode->sb, block_id);
+        goto done;
+    }
+
+    errno = ext2db_alloc(inode, &buf);
+    if (errno) {
+        ext2walk_free_state(&state);
+        return errno;
+    }
+
+    *state.slot_ref = fsblock_id(buf);
+    fsblock_dirty(state.table);
+
+done:
+    ext2walk_free_state(&state);
+
+    if (blkbuf_errbuf(buf)) {
+        return EIO;
+    }
+
+    *out = buf;
+    return 0;
+}
+
+int
+ext2db_alloc(struct v_inode* inode, bbuf_t* out)
+{
+    int free_ino_idx;
+    struct ext2_gdesc* gd;
+    struct ext2_inode* e_inode;
+    struct v_superblock* vsb;
+
+    free_ino_idx = ALLOC_FAIL;
+    e_inode = EXT2_INO(inode);
+    vsb = inode->sb;
+
+    gd = e_inode->blk_grp;
+    free_ino_idx = ext2gd_alloc_block(gd);
+
+    // locality alloc failed, try entire fs
+    if (!valid_bmp_slot(free_ino_idx)) {
+        free_ino_idx = ext2db_alloc_slot(vsb, &gd);
+    }
+
+    if (!valid_bmp_slot(free_ino_idx)) {
+        return EDQUOT;
+    }
+
+    free_ino_idx += gd->base;
+    free_ino_idx = ext2_datablock(vsb, free_ino_idx);
+    free_ino_idx = to_ext2ino_id(free_ino_idx);
+    
+    bbuf_t buf = fsblock_get(vsb, free_ino_idx);
+    if (blkbuf_errbuf(buf)) {
+        return EIO;
+    }
+
+    *out = buf;
+    return 0;
+}
+
+void
+ext2db_free_pos(struct v_inode* inode, unsigned int block_pos)
+{
+    struct ext2_inode* e_inode;
+    struct ext2_gdesc* gd;
+
+    e_inode = EXT2_INO(inode);
+    gd = e_inode->blk_grp;
+
+    assert(block_pos >= gd->base);
+    
+    block_pos -= gd->base;
+
+    ext2gd_free_block(gd, block_pos);
+}
+
+int
+ext2db_free(struct v_inode* inode, bbuf_t buf)
+{
+    assert(blkbuf_not_shared(buf));
+
+    ext2db_free_pos(inode, blkbuf_id(buf));
+    fsblock_put(buf);
+
+    return 0;
+}
+
+int
+ext2ino_resizing(struct v_inode* inode, size_t new_size)
+{
+    int errno;
+    unsigned int pos;
+    size_t oldsize;
+    struct walk_state state;
+    struct ext2_inode*  e_ino;
+    struct ext2b_inode* b_ino;
+
+    e_ino = EXT2_INO(inode);
+    b_ino = e_ino->ino;
+    oldsize = e_ino->isize;
+
+    if (oldsize == new_size) {
+        return 0;
+    }
+
+    __update_inode_size(inode, new_size);
+    fsblock_dirty(e_ino->buf);
+
+    if (check_symlink_node(inode)) {
+        return 0;
+    }
+
+    if (oldsize < new_size) {
+        return 0;
+    }
+
+    ext2walk_init_state(&state);
+
+    pos   = new_size / fsapi_block_size(inode->sb);
+    errno = __walk_indirects(inode, pos, &state, false, true);
+    if (errno) {
+        return errno;
+    }
+
+    errno = __free_recurisve_from(inode->sb, e_ino, &state.stack, 0);
+
+    ext2walk_free_state(&state);
+    return errno;
+}
\ No newline at end of file