# The LunaixOS Project
-LunaixOS - 一个简单的,详细的,POSIX兼容的(但愿!),带有浓重个人风格的操作系统。开发过程以视频教程形式在Bilibili呈现:[《从零开始自制操作系统系列》](https://space.bilibili.com/12995787/channel/collectiondetail?sid=196337)。
+LunaixOS - 一个简单的,详细的,POSIX兼容的(但愿!),带有浓重个人风格的操作系统,由 Lunaix 内核驱动。开发过程以视频教程形式在Bilibili呈现:[《从零开始自制操作系统系列》](https://space.bilibili.com/12995787/channel/collectiondetail?sid=196337)。
## 1. 一些实用资源
-如果有意研读LunaixOS的内核代码和其中的设计,或欲开始属于自己的OS开发之道,以下资料可能会对此有用。
+如果有意研读 Lunaix 内核代码和其中的设计,或欲开始属于自己的OS开发之道,以下资料可能会对此有用。
-+ [最新的LunaixOS源代码分析教程](docs/tutorial/0-教程介绍和环境搭建.md)
++ [LunaixOS源代码分析教程](docs/tutorial/0-教程介绍和环境搭建.md)
+ 内核虚拟内存的详细布局
+ [x86_32](docs/img/lunaix-mem-map/lunaix-mem-x86_32.png)
+ [x86_64](docs/img/lunaix-mem-map/lunaix-mem-x86_64.png)
+ 信号机制
+ PCI 3.0
+ PCIe 1.1 (WIP)
-+ 块设备驱动
++ 块设备IO与驱动
+ + 块IO通用缓存池
+ Serial ATA AHCI
+ ATA设备
+ ATAPI封装的SCSI协议
+ ISO9660
+ ECMA-119
+ IEEE P1282(Rock Ridge拓展)
+ + ext2
+ + Revision 0
+ + Revision 1 (额外特性不支持)
+ 远程GDB串口调试 (COM1@9600Bd)
+ 用户程序加载与执行
+ 通用设备抽象层
+ 参考:`lunaix-os/hal/term`
+ 线程模型
+ 用户线程支持(pthread系列)
- + 内核线程支持(抢占式内核设计)
+ + 内核线程支持
++ 抢占式内核设计
+ + 内核态上下文切换
+ + 内核态异常挂起/死锁自动检测机制
## 3. 目录结构
### 4.1 使用 GNU CC 工具链
-正如同大多数OS一样,LunaixOS 是一个混合了 C 和汇编的产物。这就意味着你得要使用一些标准的C编译器来构建Lunaix。在这里,我推荐使用 GNU CC 工具链来进行构建。因为Lunaix 在编写时使用了大量的GNU CC 相关编译器属性修饰 (`__attribute__`) 。假若使用其他工具链,如LLVM,我对此就不能做出任何保证了。
+正如同大多数内核一样,Lunaix 是一个混合了 C 和汇编的产物。这就意味着你得要使用一些标准的C编译器来构建Lunaix。在这里,我推荐使用 GNU CC 工具链来进行构建。因为Lunaix 在编写时使用了大量的GNU CC 相关编译器属性修饰 (`__attribute__`) 。假若使用其他工具链,如LLVM,我对此就不能做出任何保证了。
如果你使用的是基于 x86 指令集的Linux系统,不论是64位还是32位,**其本机自带的gcc就足以编译Lunaix**。 当然了,如果说你的平台是其他非x86的,你也可以指定使用某个针对x86_32的gcc套件来进行交叉编译——在`make`时通过`CX_PREFIX`变量来指定gcc套件的前缀。如下例所示,我们可以在任意平台上,如risc-v,单独使用一个面向x86_32的gcc来进行交叉编译:
该脚本的运行需要设置 `ARCH=<isa>` 环境变量,其值需要与编译时制定的值一致。
+如:
+
+```sh
+ARCH=x86_64 ./live_debug.sh
+```
+
## 5. 运行,分支以及 Issue
### 5.1 代码稳定性
2. `readdir(2)`
2. `readlink(2)`
2. `readlinkat(2)`
-2. `rmdir(2)`※
-2. `unlink(2)`※
-2. `unlinkat(2)`※
+2. `rmdir(2)`
+2. `unlink(2)`
+2. `unlinkat(2)`
2. `link(2)`※
-2. `fsync(2)`※
+2. `fsync(2)`
2. `dup(2)`
2. `dup2(2)`
2. `symlink(2)`
extermly unstable
"""
type(bool)
- default(False)
\ No newline at end of file
+ default(False)
+
+ @Term("Report on stalled thread")
+ def check_stall():
+ """
+ Check and report on any thread that spend too much time in kernel.
+ """
+
+ type(bool)
+ default(True)
+
+ @Term("Max kernel time allowance")
+ def stall_timeout():
+ """
+ Set the maximum time (in seconds) spent in kernel before considered
+ to be stalled.
+ """
+
+ type(int)
+ default(10)
+
+ return v(check_stall)
+
+ @Term("Max number of preemptions")
+ def stall_max_preempts():
+ """
+ Set the maximum number of preemptions that a task can take
+ before it is considered to be stucked in some loops.
+
+ Setting it to 0 disable this check
+ """
+
+ type(int)
+ default(0)
+
+ return v(check_stall)
\ No newline at end of file
.section .bss
.align 16
lo_tmp_stack:
- .skip 256
+ .skip 1024
tmp_stack:
/*
# is required to prevent corrupt existing stack
movl $tmp_stack, %esp
- call signal_dispatch # kernel/signal.c
+ call switch_signposting # kernel/process/switch.c
movl current_thread, %ebx
test %eax, %eax # do we have signal to handle?
tmp_store:
.skip 8
lo_tmp_stack:
- .skip 256
+ .skip 1024
tmp_stack:
# is required to prevent corrupt existing stack
movq $tmp_stack, %rsp
- call signal_dispatch # kernel/signal.c
+ call switch_signposting # kernel/process/switch.c
movq current_thread, %rbx
test %rax, %rax # do we have signal to handle?
{
#ifdef CONFIG_ARCH_X86_64
asm volatile("1:\n"
+ "subq $8, %1\n"
"rdrand %%rax\n"
- "movq %%rax, (%0)\n"
+ "movq %%rax, (%0, %1, 1)\n"
"addq $8, %%rax\n"
- "subq $8, %1\n"
+ "testq %1, %1\n"
"jnz 1b"
::
"r"((ptr_t)data),
"r"((len & ~0x7))
:
- "%eax");
+ "rax");
#else
asm volatile("1:\n"
+ "subl $4, %1\n"
"rdrand %%eax\n"
- "movl %%eax, (%0)\n"
+ "movl %%eax, (%0, %1, 1)\n"
"addl $4, %%eax\n"
- "subl $4, %1\n"
+ "testl %1, %1\n"
"jnz 1b"
::
"r"((ptr_t)data),
extern int failsafe_stack_top[];
#ifdef CONFIG_ARCH_X86_64
asm (
- "movq %%rsp, %%rax\n"
- "movq %%rbp, %%rbx\n"
+ "movq %%rsp, %%rdi\n"
+ "movq %%rbp, %%rsi\n"
"movq %0, %%rsp\n"
-
- "pushq %%rax\n"
- "pushq %%rbx\n"
"call do_failsafe_unrecoverable\n"
::"r"(failsafe_stack_top)
__base = (base); \
if (__builtin_constant_p(__base) && is_pot(__base)) { \
__mod = n & (__base - 1); \
- n >>= ILOG2(__base); \
+ n >>= ilog2(__base); \
} else { \
asm("" : "=a"(__low), "=d"(__high) : "A"(n)); \
__upper = __high; \
pushl 12(%ebx) /* edx - #3 arg */
pushl 8(%ebx) /* ecx - #2 arg */
pushl 4(%ebx) /* ebx - #1 arg */
+ pushl (%eax)
- call *(%eax)
+ call dispatch_syscall
- addl $20, %esp /* remove the parameters from stack */
+ addl $24, %esp /* remove the parameters from stack */
popl %ebx
movl %eax, (%ebx) /* save the return value */
ret
1:
-
- movq irbx(%rbx), %rdi /* rbx -> rdi #1 arg */
- movq ircx(%rbx), %rsi /* rcx -> rsi #2 arg */
- movq irdx(%rbx), %rdx /* rdx -> rdx #3 arg */
- movq irdi(%rbx), %rcx /* rdi -> rcx #4 arg */
- movq irsi(%rbx), %r8 /* rsi -> r8 #5 arg */
+ movq (%rax), %rdi
+ movq irbx(%rbx), %rsi /* rbx -> rsi #1 arg */
+ movq ircx(%rbx), %rdx /* rcx -> rdx #2 arg */
+ movq irdx(%rbx), %rcx /* rdx -> rcx #3 arg */
+ movq irdi(%rbx), %r8 /* rdi -> r8 #4 arg */
+ movq irsi(%rbx), %r9 /* rsi -> r9 #5 arg */
- call *(%rax)
+ call dispatch_syscall
movq %rax, irax(%rbx) /* save the return value */
void
trace_print_transistion_short(struct hart_state* hstate)
{
- trace_log(" trigger: iv=%d, ecause=%p",
+ trace_log(" trigger: iv=%d, ecause=%p, frame=%p",
hart_vector_stamp(hstate),
- hart_ecause(hstate));
+ hart_ecause(hstate),
+ hart_stack_frame(hstate));
}
#ifdef CONFIG_ARCH_X86_64
trace_log(" rdi=0x%016lx, rsi=0x%016lx",
rh->rdi, rh->rsi);
- trace_log(" r8=0x%016lx, r9=0x%016lx",
+ trace_log(" r08=0x%016lx, r09=0x%016lx",
rh->r8, rh->r9);
trace_log(" r10=0x%016lx, r11=0x%016lx",
rh->r10, rh->r11);
do {
assert_msg(i < HBA_MAX_PRDTE, "HBA: Too many PRDTEs");
assert_msg(pos->buf.size <= 0x400000U, "HBA: Buffer too big");
+ assert_msg(pos->buf.size, "HBA: expect a non-zero buffer size");
cmdt->entries[i++] =
(struct hba_prdte){ .data_base = vmm_v2p((ptr_t)pos->buf.buffer),
ahci_identify_device(struct hba_device* device)
{
// 用于重新识别设备(比如在热插拔的情况下)
+ // FIXME this is not right...
vfree(device);
return ahci_init_device(device->port);
}
if (!hba->base[HBA_RIS])
return;
- u32_t port_num = 31 - clz(hba->base[HBA_RIS]);
+ u32_t port_num = msbiti - clz(hba->base[HBA_RIS]);
struct hba_port* port = hba->ports[port_num];
struct hba_cmd_context* cmdctx = &port->cmdctx;
u32_t processed = port->regs[HBA_RPxCI] ^ cmdctx->tracked_ci;
goto done;
}
- u32_t slot = 31 - clz(processed);
+ u32_t slot = msbiti - clz(processed);
struct hba_cmd_state* cmdstate = cmdctx->issued[slot];
if (!cmdstate) {
#include <hal/term.h>
#include <lunaix/clock.h>
-#include <lunaix/sched.h>
+#include <lunaix/kpreempt.h>
#include <usr/lunaix/term.h>
min = MIN(min, (size_t)line_in->sz_hlf);
while (sz <= min && dt <= expr) {
// XXX should we held the device lock while we are waiting?
- sched_pass();
+ yield_current();
dt = clock_systime() - t;
t += dt;
* @param val
* @return u32_t
*/
-inline u32_t
+static inline u32_t
hash_32(const u32_t val, u32_t truncate_to)
{
return (val * 0x61C88647u) >> (HASH_SIZE_BITS - truncate_to);
int
streq(const char* a, const char* b);
+int
+strneq(const char* a, const char* b, unsigned long n);
+
void
strrtrim(char* str);
--- /dev/null
+#ifndef __LUNAIX_BCACHE_H
+#define __LUNAIX_BCACHE_H
+
+#include <lunaix/ds/btrie.h>
+#include <lunaix/ds/lru.h>
+#include <lunaix/ds/spinlock.h>
+#include <lunaix/ds/llist.h>
+#include <lunaix/spike.h>
+
+/*
+ Block cache. A cache built on top of
+ sparse array (trie tree) allow caching
+ any blocks that have spatial structure
+ attach to them. With intention to unify
+ all the existing caching construct, as
+ well as potential future use case.
+
+ NB. block is not necessarily
+ equivalence to disk sector nor filesystem
+ logical block. block can be anything
+ discrete.
+
+ NB2. not to be confused with page cahce
+ (pcache), which is a special case of
+ bcache.
+*/
+
+struct bcache;
+struct bcache_ops
+{
+ void (*release_on_evict)(struct bcache*, void* data);
+ void (*sync_cached)(struct bcache*, unsigned long tag, void* data);
+};
+
+struct bcache
+{
+ struct {
+ unsigned int blksz;
+ };
+
+ struct btrie root;
+ struct lru_zone* lru;
+ struct bcache_ops ops;
+ struct llist_header objs;
+ struct spinlock lock;
+}; // block cache
+
+struct bcache_node
+{
+ void* data;
+
+ unsigned long tag;
+
+ struct bcache* holder;
+ unsigned int refs;
+ struct lru_node lru_node;
+ struct llist_header objs;
+};
+
+typedef void * bcobj_t;
+typedef struct lru_zone* bcache_zone_t;
+
+static inline void*
+bcached_data(bcobj_t obj)
+{
+ return ((struct bcache_node*)obj)->data;
+}
+
+#define to_bcache_node(cobj) \
+ ((struct bcache_node*)(cobj))
+
+#define bcache_holder_embed(cobj, type, member) \
+ container_of(to_bcache_node(cobj)->holder, type, member)
+
+/**
+ * @brief Create a block cache with shared bcache zone
+ *
+ * @param cache to be initialized
+ * @param name name of this cache
+ * @param log_ways ways-associative of this cache
+ * @param cap capacity of this cache, -1 for 'infinity' cache
+ * @param blk_size size of each cached object
+ * @param ops block cache operation
+ */
+void
+bcache_init_zone(struct bcache* cache, bcache_zone_t zone,
+ unsigned int log_ways, int cap,
+ unsigned int blk_size, struct bcache_ops* ops);
+
+bcache_zone_t
+bcache_create_zone(char* name);
+
+bcobj_t
+bcache_put_and_ref(struct bcache* cache, unsigned long tag, void* block);
+
+/**
+ * @brief Try look for a hit and return the reference to block.
+ * Now, this create a unmanaged pointer that could end up in
+ * everywhere and unsafe to evict. One should called `bcache_tryhit_unref`
+ * when the reference is no longer needed.
+ *
+ * @param cache
+ * @param tag
+ * @param block_out
+ * @return true
+ * @return false
+ */
+bool
+bcache_tryget(struct bcache* cache, unsigned long tag, bcobj_t* result);
+
+/**
+ * @brief Unreference a cached block that is returned
+ * by `bcache_tryhit_ref`
+ *
+ * @param cache
+ * @param tag
+ * @param block_out
+ * @return true
+ * @return false
+ */
+void
+bcache_return(bcobj_t obj);
+
+static inline void
+bcache_refonce(bcobj_t obj)
+{
+ struct bcache_node* b_node;
+ b_node = to_bcache_node(obj);
+
+ assert(b_node->refs);
+ b_node->refs++;
+}
+
+void
+bcache_promote(bcobj_t obj);
+
+void
+bcache_evict(struct bcache* cache, unsigned long tag);
+
+static inline void
+bcache_evict_one(struct bcache* cache)
+{
+ lru_evict_one(cache->lru);
+}
+
+void
+bcache_flush(struct bcache* cache);
+
+void
+bcache_free(struct bcache* cache);
+
+void
+bcache_zone_free(bcache_zone_t zone);
+
+/**
+ * @brief Create a block cache
+ *
+ * @param cache to be initialized
+ * @param name name of this cache
+ * @param log_ways ways-associative of this cache
+ * @param cap capacity of this cache, -1 for 'infinity' cache
+ * @param blk_size size of each cached object
+ * @param ops block cache operation
+ */
+static inline void
+bcache_init(struct bcache* cache, char* name, unsigned int log_ways,
+ int cap, unsigned int blk_size, struct bcache_ops* ops)
+{
+ bcache_init_zone(cache, bcache_create_zone(name),
+ log_ways, cap, blk_size, ops);
+}
+
+static inline void
+bcache_put(struct bcache* cache, unsigned long tag, void* block)
+{
+ bcache_return(bcache_put_and_ref(cache, tag, block));
+}
+
+#endif /* __LUNAIX_BCACHE_H */
--- /dev/null
+#ifndef __LUNAIX_BLKBUF_H
+#define __LUNAIX_BLKBUF_H
+
+#include <lunaix/blkio.h>
+#include <lunaix/bcache.h>
+#include <lunaix/block.h>
+#include <lunaix/ds/mutex.h>
+
+struct blkbuf_cache
+{
+ union {
+ struct {
+ unsigned int blksize;
+ };
+ struct bcache cached;
+ };
+ struct llist_header dirty;
+ struct block_dev* blkdev;
+ mutex_t lock;
+};
+
+struct blk_buf {
+ void* raw;
+ bcobj_t cobj;
+ struct llist_header dirty;
+ struct blkio_req* breq;
+};
+
+typedef void* bbuf_t;
+
+#define BLOCK_BUFFER(type, name) \
+ union { \
+ type* name; \
+ bbuf_t bb_##name; \
+ }
+
+#define INVL_BUFFER 0xdeadc0de
+
+#define bbuf_null ((bbuf_t)0)
+
+static inline bool
+blkbuf_errbuf(bbuf_t buf) {
+ return (ptr_t)buf == INVL_BUFFER;
+}
+
+static inline bool
+blkbuf_nullbuf(bbuf_t buf) {
+ return buf == bbuf_null;
+}
+
+static inline unsigned int
+blkbuf_id(bbuf_t buf)
+{
+ return to_bcache_node(((struct blk_buf*)buf)->cobj)->tag;
+}
+
+static inline unsigned int
+blkbuf_refcounts(bbuf_t buf)
+{
+ return to_bcache_node(((struct blk_buf*)buf)->cobj)->refs;
+}
+
+static inline bool
+blkbuf_not_shared(bbuf_t buf)
+{
+ return blkbuf_refcounts(buf) == 1;
+}
+
+
+struct blkbuf_cache*
+blkbuf_create(struct block_dev* blkdev, unsigned int blk_size);
+
+bbuf_t
+blkbuf_take(struct blkbuf_cache* bc, unsigned int block_id);
+
+static inline bbuf_t
+blkbuf_refonce(bbuf_t buf)
+{
+ if (likely(buf && !blkbuf_errbuf(buf))) {
+ bcache_refonce(((struct blk_buf*)buf)->cobj);
+ }
+
+ return buf;
+}
+
+static inline void*
+blkbuf_data(bbuf_t buf)
+{
+ assert(!blkbuf_errbuf(buf));
+ return ((struct blk_buf*)buf)->raw;
+}
+#define block_buffer(buf, type) \
+ ((type*)blkbuf_data(buf))
+
+void
+blkbuf_dirty(bbuf_t buf);
+
+void
+blkbuf_schedule_sync(bbuf_t buf);
+
+void
+blkbuf_release(struct blkbuf_cache* bc);
+
+void
+blkbuf_put(bbuf_t buf);
+
+bool
+blkbuf_syncall(struct blkbuf_cache* bc, bool async);
+
+#endif /* __LUNAIX_BLKBUF_H */
#ifndef __LUNAIX_BLKIO_H
#define __LUNAIX_BLKIO_H
+#include <lunaix/buffer.h>
#include <lunaix/buffer.h>
#include <lunaix/ds/llist.h>
#include <lunaix/ds/waitq.h>
+#include <lunaix/ds/mutex.h>
#include <lunaix/types.h>
#define BLKIO_WRITE 0x1
#define BLKIO_BUSY 0x4
#define BLKIO_PENDING 0x8
+// Free on complete
+#define BLKIO_FOC 0x10
+#define BLKIO_SHOULD_WAIT 0x20
#define BLKIO_WAIT 0x1
+#define BLKIO_NOWAIT 0
#define BLKIO_NOASYNC 0x2
-// Free on complete
-#define BLKIO_FOC 0x10
-
#define BLKIO_SCHED_IDEL 0x1
struct blkio_req;
struct blkio_context
{
struct llist_header queue;
+
struct
{
u32_t seektime;
u32_t rotdelay;
} metrics;
+
req_handler handle_one;
u32_t state;
u32_t busy;
void* driver;
+
+ mutex_t lock;
};
+static inline void
+blkio_lock(struct blkio_context* contex)
+{
+ mutex_lock(&contex->lock);
+}
+
+static inline void
+blkio_unlock(struct blkio_context* contex)
+{
+ mutex_unlock(&contex->lock);
+}
+
+static inline bool
+blkio_stalled(struct blkio_context* contex)
+{
+ return !contex->busy;
+}
+
void
blkio_init();
void* evt_args,
u32_t options);
+/**
+ * @brief Vectorized request (no write/read preference)
+ *
+ * @param vbuf
+ * @param start_lba
+ * @param completed
+ * @param evt_args
+ * @param options
+ * @return struct blkio_req*
+ */
+static inline struct blkio_req*
+blkio_vreq(struct vecbuf* buffer,
+ u64_t start_lba,
+ blkio_cb completed,
+ void* evt_args,
+ u32_t options) {
+ /*
+ This is currently aliased to blkio_vrd. Although `no preference`
+ does essentially mean `default read`, the blkio_vreq just used
+ to enhance readability
+ */
+ return blkio_vrd(buffer, start_lba, completed, evt_args, options);
+}
+
+
+/**
+ * @brief Bind a block IO context to request
+ *
+ * @param ctx
+ * @param req
+ */
+static inline void
+blkio_bindctx(struct blkio_req* req, struct blkio_context* ctx)
+{
+ req->io_ctx = ctx;
+}
+
+/**
+ * @brief Set block IO request to read
+ *
+ * @param ctx
+ * @param req
+ */
+static inline void
+blkio_setread(struct blkio_req* req)
+{
+ if ((req->flags & BLKIO_PENDING)) {
+ return;
+ }
+
+ req->flags &= ~BLKIO_WRITE;
+}
+
+/**
+ * @brief Set block IO request to write
+ *
+ * @param ctx
+ * @param req
+ */
+static inline void
+blkio_setwrite(struct blkio_req* req)
+{
+ if ((req->flags & BLKIO_PENDING)) {
+ return;
+ }
+
+ req->flags |= BLKIO_WRITE;
+}
+
+/**
+ * @brief Set callback when request complete
+ *
+ * @param req
+ * @param on_completed
+ */
+static inline void
+blkio_when_completed(struct blkio_req* req, blkio_cb on_completed)
+{
+ req->completed = on_completed;
+}
+
+static inline bool
+blkio_is_pending(struct blkio_req* req)
+{
+ return (req->flags & BLKIO_PENDING);
+}
+
+/**
+ * @brief Mark request to be freed-on-completion (FOC)
+ *
+ * @param req
+ */
+static inline void
+blkio_mark_foc(struct blkio_req* req)
+{
+ req->flags |= BLKIO_FOC;
+}
+
+/**
+ * @brief Mark request to be not-freed-on-completion (nFOC)
+ *
+ * @param req
+ */
+static inline void
+blkio_mark_nfoc(struct blkio_req* req)
+{
+ req->flags &= ~BLKIO_FOC;
+}
+
+int
+blkio_read_aligned(struct blkio_context* ctx,
+ unsigned long lba, void* block, size_t n_blk);
+
+int
+blkio_read(struct blkio_context* ctx,
+ unsigned long offset, void* block, size_t len);
+
+int
+blkio_write_aligned(struct blkio_context* ctx,
+ unsigned long lba, void* block, size_t n_blk);
+
+int
+blkio_read(struct blkio_context* ctx,
+ unsigned long offset, void* block, size_t len);
+
void
blkio_free_req(struct blkio_req* req);
* @param req
*/
void
-blkio_commit(struct blkio_context* ctx, struct blkio_req* req, int options);
+blkio_commit(struct blkio_req* req, int options);
+
/**
* @brief Schedule an IO request to be handled.
typedef u32_t bdev_t;
typedef void (*devfs_exporter)(struct block_dev* bdev, void* fsnode);
+static inline struct block_dev*
+block_dev(struct device* dev)
+{
+ return (struct block_dev*)dev->underlay;
+}
+
void
block_init();
#define defualt weak
+#define msbiti (sizeof(int) * 8 - 1)
#define clz(bits) __builtin_clz(bits)
-#define sadd_overflow(a, b, of) __builtin_sadd_overflow(a, b, of)
-#define umul_overflow(a, b, of) __builtin_umul_overflow(a, b, of)
+
+#ifdef CONFIG_ARCH_BITS_64
+#define msbitl (sizeof(long) * 8 - 1)
+#define clzl(bits) __builtin_clzl(bits)
+#else
+#define msbitl msbiti
+#define clzl(bits) clz(bits)
+#endif
+
+#define sadd_of(a, b, of) __builtin_sadd_overflow(a, b, of)
+#define saddl_of(a, b, of) __builtin_saddl_overflow(a, b, of)
+#define umul_of(a, b, of) __builtin_umul_overflow(a, b, of)
+#define umull_of(a, b, of) __builtin_umull_overflow(a, b, of)
#define offsetof(f, m) __builtin_offsetof(f, m)
#define prefetch_rd(ptr, ll) __builtin_prefetch((ptr), 0, ll)
struct btrie
{
struct btrie_node* btrie_root;
- int truncated;
+ unsigned int order;
};
struct btrie_node
struct llist_header siblings;
struct llist_header nodes;
struct btrie_node* parent;
- u32_t index;
+ unsigned long index;
void* data;
};
void
-btrie_init(struct btrie* btrie, u32_t trunc_bits);
+btrie_init(struct btrie* btrie, unsigned int order);
void*
-btrie_get(struct btrie* root, u32_t index);
+btrie_get(struct btrie* root, unsigned long index);
void
-btrie_set(struct btrie* root, u32_t index, void* data);
+btrie_set(struct btrie* root, unsigned long index, void* data);
void*
-btrie_remove(struct btrie* root, u32_t index);
+btrie_remove(struct btrie* root, unsigned long index);
void
btrie_release(struct btrie* tree);
#define HSTR_EQ(str1, str2) ((str1)->hash == (str2)->hash)
+#define HSTR_VAL(hstr) ((hstr).value)
+#define HSTR_LEN(hstr) ((hstr).len)
+#define HSTR_HASH(hstr) ((hstr).hash)
+
inline void
hstr_rehash(struct hstr* hash_str, u32_t truncate_to)
{
#define __LUNAIX_LRU_H
#include <lunaix/ds/llist.h>
+#include <lunaix/ds/spinlock.h>
#include <lunaix/types.h>
struct lru_node
{
struct llist_header lead_node;
struct llist_header zones;
- u32_t objects;
+ char name[32];
evict_cb try_evict;
+ spinlock_t lock;
+
+ unsigned int objects;
+ unsigned int hotness;
+ struct {
+ unsigned int n_single;
+ unsigned int n_half;
+ unsigned int n_full;
+ } evict_stats;
+
+ union {
+ struct {
+ bool delayed_free:1;
+ unsigned char attempts;
+ };
+ unsigned int flags;
+ };
};
struct lru_zone*
-lru_new_zone(evict_cb try_evict_cb);
+lru_new_zone(const char* name, evict_cb try_evict_cb);
void
lru_use_one(struct lru_zone* zone, struct lru_node* node);
void
lru_evict_half(struct lru_zone* zone);
+void
+lru_evict_all(struct lru_zone* zone);
+
+void
+lru_free_zone(struct lru_zone* zone);
+
#endif /* __LUNAIX_LRU_H */
void
mutex_unlock(mutex_t* mutex);
+void
+mutex_lock_nested(mutex_t* mutex);
+
+void
+mutex_unlock_nested(mutex_t* mutex);
+
void
mutex_unlock_for(mutex_t* mutex, pid_t pid);
--- /dev/null
+#ifndef __LUNAIX_SPIN_H
+#define __LUNAIX_SPIN_H
+
+#include <lunaix/types.h>
+
+struct spinlock
+{
+ volatile bool flag;
+};
+
+typedef struct spinlock spinlock_t;
+
+/*
+ TODO we might use our own construct for atomic ops
+ But we will do itlater, currently this whole
+ kernel is on a single long thread of fate,
+ there won't be any hardware concurrent access
+ happened here.
+*/
+
+static inline void
+spinlock_init(spinlock_t* lock)
+{
+ lock->flag = false;
+}
+
+static inline bool spinlock_try_acquire(spinlock_t* lock)
+{
+ if (lock->flag){
+ return false;
+ }
+
+ return (lock->flag = true);
+}
+
+static inline void spinlock_acquire(spinlock_t* lock)
+{
+ while (lock->flag);
+ lock->flag = true;
+}
+
+static inline void spinlock_release(spinlock_t* lock)
+{
+ lock->flag = false;
+}
+
+#define DEFINE_SPINLOCK_OPS(type, lock_accessor) \
+ static inline void lock(type obj) { spinlock_acquire(&obj->lock_accessor); } \
+ static inline void unlock(type obj) { spinlock_release(&obj->lock_accessor); }
+
+#endif /* __LUNAIX_SPIN_H */
llist_delete(&waitq->waiters);
}
+void
+prepare_to_wait(waitq_t* waitq);
+
+void
+try_wait();
+
+void
+try_wait_check_stall();
+
void
pwait(waitq_t* queue);
+void
+pwait_check_stall(waitq_t* queue);
+
void
pwake_one(waitq_t* queue);
#include <lunaix/ds/lru.h>
#include <lunaix/ds/mutex.h>
#include <lunaix/status.h>
+#include <lunaix/spike.h>
+#include <lunaix/bcache.h>
#include <stdatomic.h>
#define VFS_NAME_MAXLEN 128
#define VFS_MAX_FD 32
-#define VFS_IFDIR F_DIR
-#define VFS_IFFILE F_FILE
-#define VFS_IFDEV (F_DEV | F_FILE)
-#define VFS_IFSEQDEV (F_SEQDEV | F_FILE)
-#define VFS_IFVOLDEV (F_VOLDEV | F_FILE)
-#define VFS_IFSYMLINK (F_SYMLINK | F_FILE)
-
-#define VFS_DEVFILE(type) ((type) & F_DEV)
-#define VFS_DEVTYPE(type) ((type) & ((F_SEQDEV | F_VOLDEV) ^ F_DEV))
+#define VFS_IFFILE F_FILE
+#define VFS_IFDIR (F_FILE | F_DIR )
+#define VFS_IFDEV (F_FILE | F_DEV )
+#define VFS_IFSYMLINK (F_FILE | F_SYMLINK)
+#define VFS_IFVOLDEV (F_FILE | F_SVDEV )
+#define VFS_IFSEQDEV VFS_IFDEV
// Walk, mkdir if component encountered is non-exists.
#define VFS_WALK_MKPARENT 0x1
#define VFS_PATH_DELIM '/'
-#define FSTYPE_ROFS 0x1
+#define FSTYPE_ROFS 0b00000001
+#define FSTYPE_PSEUDO 0x00000010
#define TEST_FD(fd) (fd >= 0 && fd < VFS_MAX_FD)
lru_use_one(dnode_lru, &dnode->lru); \
})
+#define assert_fs(cond) assert_p(cond, "FS")
+#define fail_fs(msg) fail_p(msg, "FS")
+
typedef u32_t inode_t;
struct v_dnode;
extern struct hstr vfs_dot;
extern struct v_dnode* vfs_sysroot;
+typedef int (*mntops_mnt)(struct v_superblock* vsb, struct v_dnode* mount_point);
+typedef int (*mntops_umnt)(struct v_superblock* vsb);
+
struct filesystem
{
struct llist_header fs_flat;
struct hstr fs_name;
u32_t types;
int fs_id;
- int (*mount)(struct v_superblock* vsb, struct v_dnode* mount_point);
- int (*unmount)(struct v_superblock* vsb);
+ mntops_mnt mount;
+ mntops_umnt unmount;
};
struct v_superblock
struct device* dev;
struct v_dnode* root;
struct filesystem* fs;
+ struct blkbuf_cache* blks;
struct hbucket* i_cache;
void* data;
+ unsigned int ref_count;
size_t blksize;
struct
{
- u32_t (*read_capacity)(struct v_superblock* vsb);
- u32_t (*read_usage)(struct v_superblock* vsb);
+ size_t (*read_capacity)(struct v_superblock* vsb);
+ size_t (*read_usage)(struct v_superblock* vsb);
void (*init_inode)(struct v_superblock* vsb, struct v_inode* inode);
+ void (*release)(struct v_superblock* vsb);
} ops;
};
struct dir_context
{
- int index;
void* cb_data;
void (*read_complete_callback)(struct dir_context* dctx,
const char* name,
int (*read_page)(struct v_inode* inode, void* pg, size_t fpos);
int (*readdir)(struct v_file* file, struct dir_context* dctx);
- int (*seek)(struct v_inode* inode, size_t offset); // optional
+ int (*seek)(struct v_file* file, size_t offset);
int (*close)(struct v_file* file);
int (*sync)(struct v_file* file);
};
struct v_inode_ops
{
- int (*create)(struct v_inode* this, struct v_dnode* dnode);
+ int (*create)(struct v_inode* this, struct v_dnode* dnode,
+ unsigned int itype);
+
int (*open)(struct v_inode* this, struct v_file* file);
int (*sync)(struct v_inode* this);
+
int (*mkdir)(struct v_inode* this, struct v_dnode* dnode);
- int (*rmdir)(struct v_inode* this, struct v_dnode* dir);
- int (*unlink)(struct v_inode* this);
+ int (*rmdir)(struct v_inode* this, struct v_dnode* dnode);
+ int (*unlink)(struct v_inode* this, struct v_dnode* name);
int (*link)(struct v_inode* this, struct v_dnode* new_name);
+
int (*read_symlink)(struct v_inode* this, const char** path_out);
int (*set_symlink)(struct v_inode* this, const char* target);
+
int (*dir_lookup)(struct v_inode* this, struct v_dnode* dnode);
+
int (*rename)(struct v_inode* from_inode,
struct v_dnode* from_dnode,
struct v_dnode* to_dnode);
+
int (*getxattr)(struct v_inode* this,
struct v_xattr_entry* entry); // optional
int (*setxattr)(struct v_inode* this,
struct llist_header* f_list;
u32_t f_pos;
atomic_ulong ref_count;
+ void* data;
struct v_file_ops* ops; // for caching
};
atomic_ulong ref_count;
void* data;
+
+ void (*destruct)(struct v_dnode* dnode);
};
struct v_fdtable
struct pcache
{
struct v_inode* master;
- struct btrie tree;
- struct llist_header pages;
+ struct bcache cache;
struct llist_header dirty;
u32_t n_dirty;
u32_t n_pages;
struct pcache_pg
{
- struct llist_header pg_list;
struct llist_header dirty_list;
- struct lru_node lru;
- struct pcache* holder;
- void* pg;
- u32_t flags;
- u32_t fpos;
- u32_t len;
+
+ union {
+ struct {
+ bool dirty:1;
+ };
+ u32_t flags;
+ };
+
+ void* data;
+ unsigned int index;
};
static inline bool
void
vfs_sb_free(struct v_superblock* sb);
+void
+vfs_sb_ref(struct v_superblock* sb);
+
+#define vfs_assign_sb(sb_accessor, sb) \
+ ({ \
+ if (sb_accessor) { \
+ vfs_sb_free(sb_accessor); \
+ } \
+ vfs_sb_ref(((sb_accessor) = (sb))); \
+ })
+
+static inline void
+vfs_i_assign_sb(struct v_inode* inode, struct v_superblock* sb)
+{
+ vfs_assign_sb(inode->sb, sb);
+}
+
+static inline void
+vfs_d_assign_sb(struct v_dnode* dnode, struct v_superblock* sb)
+{
+ vfs_assign_sb(dnode->super_block, sb);
+}
+
+static inline void
+vfs_vmnt_assign_sb(struct v_mount* vmnt, struct v_superblock* sb)
+{
+ vfs_assign_sb(vmnt->super_block, sb);
+}
+
struct v_dnode*
vfs_d_alloc();
void
pcache_init(struct pcache* pcache);
-void
-pcache_release_page(struct pcache* pcache, struct pcache_pg* page);
-
-struct pcache_pg*
-pcache_new_page(struct pcache* pcache, u32_t index);
-
-void
-pcache_set_dirty(struct pcache* pcache, struct pcache_pg* pg);
-
-int
-pcache_get_page(struct pcache* pcache,
- u32_t index,
- u32_t* offset,
- struct pcache_pg** page);
-
int
pcache_write(struct v_inode* inode, void* data, u32_t len, u32_t fpos);
default_file_close(struct v_file* file);
int
-default_file_seek(struct v_inode* inode, size_t offset);
+default_file_seek(struct v_file* file, size_t offset);
int
default_inode_open(struct v_inode* this, struct v_file* file);
void
xattr_addcache(struct v_inode* inode, struct v_xattr_entry* xattr);
+
+/* --- misc stuff --- */
+
+#define check_itype(to_check, itype) \
+ (((to_check) & (itype)) == (itype))
+
+/**
+ * @brief Check if node represent a regular file (nothing but a file)
+ *
+ * @param inode
+ * @return true
+ * @return false
+ */
+static inline bool
+check_regfile_node(struct v_inode* inode)
+{
+ return inode->itype == VFS_IFFILE;
+}
+
+/**
+ * @brief Check if node represent a file.
+ * This is basically everything within file system (dir, dev, etc.)
+ *
+ * @param inode
+ * @return true
+ * @return false
+ */
+static inline bool
+check_file_node(struct v_inode* inode)
+{
+ return check_itype(inode->itype, VFS_IFFILE);
+}
+
+static inline bool
+check_directory_node(struct v_inode* inode)
+{
+ return check_itype(inode->itype, VFS_IFDIR);
+}
+
+static inline bool
+check_device_node(struct v_inode* inode)
+{
+ return check_itype(inode->itype, VFS_IFDEV);
+}
+
+static inline bool
+check_seqdev_node(struct v_inode* inode)
+{
+ return check_device_node(inode);
+}
+
+static inline bool
+check_voldev_node(struct v_inode* inode)
+{
+ return check_itype(inode->itype, VFS_IFVOLDEV);
+}
+
+static inline bool
+check_symlink_node(struct v_inode* inode)
+{
+ return check_itype(inode->itype, VFS_IFSYMLINK);
+}
+
#endif /* __LUNAIX_VFS_H */
--- /dev/null
+#ifndef __LUNAIX_FSAPI_H
+#define __LUNAIX_FSAPI_H
+
+#include <lunaix/fs.h>
+#include <lunaix/fcntl_defs.h>
+#include <lunaix/blkbuf.h>
+#include <klibc/string.h>
+
+#include <usr/lunaix/dirent_defs.h>
+
+struct fsapi_vsb_ops
+{
+ size_t (*read_capacity)(struct v_superblock* vsb);
+ size_t (*read_usage)(struct v_superblock* vsb);
+ void (*init_inode)(struct v_superblock* vsb, struct v_inode* inode);
+ void (*release)(struct v_superblock* vsb);
+};
+
+static inline struct device*
+fsapi_blockdev(struct v_superblock* vsb)
+{
+ if (!(vsb->fs->types & FSTYPE_PSEUDO)) {
+ assert_fs(vsb->dev);
+ }
+
+ return vsb->dev;
+}
+
+typedef void (*inode_init)(struct v_superblock* vsb, struct v_inode* inode) ;
+typedef void (*inode_free)(struct v_inode* inode) ;
+typedef void (*dnode_free)(struct v_dnode* dnode) ;
+
+static inline void
+fsapi_set_inode_initiator(struct v_superblock* vsb, inode_init inode_initiator)
+{
+ vsb->ops.init_inode = inode_initiator;
+}
+
+static inline size_t
+fsapi_block_size(struct v_superblock* vsb)
+{
+ return vsb->blksize;
+}
+
+static inline void
+fsapi_set_vsb_ops(struct v_superblock* vsb, struct fsapi_vsb_ops* basic_ops)
+{
+ vsb->ops.read_capacity = basic_ops->read_capacity;
+ vsb->ops.read_usage = basic_ops->read_usage;
+ vsb->ops.release = basic_ops->release;
+ vsb->ops.init_inode = basic_ops->init_inode;
+}
+
+static inline void
+fsapi_complete_vsb_setup(struct v_superblock* vsb, void* cfs_sb)
+{
+ assert_fs(vsb->ops.init_inode);
+ assert_fs(vsb->ops.read_capacity);
+ assert_fs(vsb->blksize);
+ assert_fs(vsb->blks);
+
+ vsb->data = cfs_sb;
+}
+
+static inline void
+fsapi_begin_vsb_setup(struct v_superblock* vsb, size_t blksz)
+{
+ assert(!vsb->blks);
+ assert(blksz);
+
+ vsb->blksize = blksz;
+ vsb->blks = blkbuf_create(block_dev(vsb->dev), blksz);
+}
+
+static inline void
+fsapi_reset_vsb(struct v_superblock* vsb)
+{
+ assert(vsb->blks);
+ blkbuf_release(vsb->blks);
+
+ vsb->blks = NULL;
+ vsb->data = NULL;
+ vsb->blksize = 0;
+ vsb->root->mnt->flags = 0;
+ memset(&vsb->ops, 0, sizeof(vsb->ops));
+}
+
+static inline bool
+fsapi_readonly_mount(struct v_superblock* vsb)
+{
+ return (vsb->root->mnt->flags & MNT_RO);
+}
+
+static inline void
+fsapi_set_readonly_mount(struct v_superblock* vsb)
+{
+ vsb->root->mnt->flags |= MNT_RO;
+}
+
+#define fsapi_impl_data(vfs_obj, type) (type*)((vfs_obj)->data)
+
+static inline void
+fsapi_inode_setid(struct v_inode* inode,
+ inode_t i_id, unsigned int blk_addr)
+{
+ inode->id = i_id;
+ inode->lb_addr = blk_addr;
+}
+
+static inline void
+fsapi_inode_settype(struct v_inode* inode, unsigned int type)
+{
+ inode->itype = type;
+}
+
+static inline void
+fsapi_inode_setsize(struct v_inode* inode, unsigned int fsize)
+{
+ inode->lb_usage = ICEIL(fsize, inode->sb->blksize);
+ inode->fsize = fsize;
+}
+
+static inline void
+fsapi_inode_setops(struct v_inode* inode,
+ struct v_inode_ops* ops)
+{
+ inode->ops = ops;
+}
+
+static inline void
+fsapi_inode_setfops(struct v_inode* inode,
+ struct v_file_ops* fops)
+{
+ inode->default_fops = fops;
+}
+
+static inline void
+fsapi_inode_setdector(struct v_inode* inode,
+ inode_free free_cb)
+{
+ inode->destruct = free_cb;
+}
+
+static inline void
+fsapi_inode_complete(struct v_inode* inode, void* data)
+{
+ assert_fs(inode->ops);
+ assert_fs(inode->default_fops);
+ assert_fs(inode->default_fops);
+
+ inode->data = data;
+}
+
+static inline void
+fsapi_inode_settime(struct v_inode* inode,
+ time_t ctime, time_t mtime, time_t atime)
+{
+ inode->ctime = ctime;
+ inode->mtime = mtime;
+ inode->atime = atime;
+}
+
+static inline void
+fsapi_dnode_setdector(struct v_dnode* dnode,
+ dnode_free free_cb)
+{
+ dnode->destruct = free_cb;
+}
+
+static inline struct v_inode*
+fsapi_dnode_parent(struct v_dnode* dnode)
+{
+ assert(dnode->parent);
+ return dnode->parent->inode;
+}
+
+static inline void
+fsapi_dir_report(struct dir_context *dctx,
+ const char *name, const int len, const int dtype)
+{
+ dctx->read_complete_callback(dctx, name, len, dtype);
+}
+
+/**
+ * @brief Get a block with file-system defined block size
+ * from underlying storage medium at given block id
+ * (block address). Depending on the device attribute,
+ * it may or may not go through the block cache layer.
+ *
+ * @param vsb super-block
+ * @param block_id block address
+ * @return bbuf_t
+ */
+static inline bbuf_t
+fsblock_get(struct v_superblock* vsb, unsigned int block_id)
+{
+ return blkbuf_take(vsb->blks, block_id);
+}
+
+/**
+ * @brief put the block back into cache, must to pair with
+ * fsblock_get. Otherwise memory leakage will occur.
+ *
+ * @param blkbuf
+ */
+static inline void
+fsblock_put(bbuf_t blkbuf)
+{
+ return blkbuf_put(blkbuf);
+}
+
+
+static inline bbuf_t
+fsblock_take(bbuf_t blk)
+{
+ return blkbuf_refonce(blk);
+}
+
+static inline unsigned int
+fsblock_id(bbuf_t blkbuf)
+{
+ return blkbuf_id(blkbuf);
+}
+
+/**
+ * @brief Mark the block dirty and require scheduling a device
+ * write request to sync it with underlying medium. Lunaix
+ * will do the scheduling when it sees fit.
+ *
+ * @param blkbuf
+ */
+static inline void
+fsblock_dirty(bbuf_t blkbuf)
+{
+ return blkbuf_dirty(blkbuf);
+}
+
+/**
+ * @brief Manually trigger a sync cycle, regardless the
+ * dirty property.
+ *
+ * @param blkbuf
+ */
+static inline void
+fsblock_sync(bbuf_t blkbuf)
+{
+ /*
+ XXX delay the sync for better write aggregation
+ scheduled sync event may happened immediately (i.e., blkio queue is
+ empty or nearly empty), any susequent write to the same blkbuf must
+ schedule another write. Which could thrash the disk IO when intensive
+ workload
+ */
+ return blkbuf_schedule_sync(blkbuf);
+}
+
+static inline bool
+fsapi_handle_pseudo_dirent(struct v_file* file, struct dir_context* dctx)
+{
+ if (file->f_pos == 0) {
+ fsapi_dir_report(dctx, ".", 1, vfs_get_dtype(VFS_IFDIR));
+ return true;
+ }
+
+ if (file->f_pos == 1) {
+ fsapi_dir_report(dctx, "..", 2, vfs_get_dtype(VFS_IFDIR));
+ return true;
+ }
+
+ return false;
+}
+
+static inline struct filesystem*
+fsapi_fs_declare(const char* name, unsigned int type)
+{
+ struct filesystem* fs;
+
+ fs = fsm_new_fs(name, -1);
+ assert_fs(fs);
+
+ fs->types = type;
+ return fs;
+}
+
+static inline void
+fsapi_fs_set_mntops(struct filesystem* fs,
+ mntops_mnt mnt, mntops_umnt umnt)
+{
+ fs->mount = mnt;
+ fs->unmount = umnt;
+}
+
+static inline void
+fsapi_fs_finalise(struct filesystem* fs)
+{
+ assert_fs(fs->mount);
+ assert_fs(fs->unmount);
+ fsm_register(fs);
+}
+
+#endif /* __LUNAIX_FSAPI_H */
struct ram_inode
{
u32_t flags;
+ size_t size;
char* symlink;
};
#define RAM_INODE(data) ((struct ram_inode*)(data))
-void
-ramfs_init();
-
#endif /* __LUNAIX_RAMFS_H */
#ifndef __LUNAIX_TASKFS_H
#define __LUNAIX_TASKFS_H
-#include <lunaix/fs.h>
+#include <lunaix/fs/api.h>
#include <lunaix/fs/twimap.h>
struct task_attribute
#define __LUNAIX_KPREEMPT_H
#include <sys/abi.h>
+#include <sys/cpu.h>
+#include <lunaix/process.h>
#define _preemptible \
__attribute__((section(".kf.preempt"))) no_inline
"caller must be kernel preemptible"); \
} while(0)
+static inline void
+set_preemption()
+{
+ cpu_enable_interrupt();
+}
+
+static inline void
+no_preemption()
+{
+ cpu_disable_interrupt();
+}
+
+static inline void
+__schedule_away()
+{
+ current_thread->stats.last_reentry = clock_systime();
+
+ cpu_trap_sched();
+ set_preemption();
+}
+
+/**
+ * @brief preempt the current thread, and yield the remaining
+ * time slice to other threads.
+ *
+ * The current thread is marked as if it is being
+ * preempted involuntarily by kernel.
+ *
+ */
+static inline void
+preempt_current()
+{
+ no_preemption();
+ thread_stats_update_kpreempt();
+ __schedule_away();
+}
+
+/**
+ * @brief yield the remaining time slice to other threads.
+ *
+ * The current thread is marked as if it is being
+ * preempted voluntarily by itself.
+ *
+ */
+static inline void
+yield_current()
+{
+ no_preemption();
+ __schedule_away();
+}
+
+bool
+preempt_check_stalled(struct thread* th);
+
#endif /* __LUNAIX_KPREEMPT_H */
pte_t
alloc_kpage_at(pte_t* ptep, pte_t pte, int order);
+static inline void*
+vmalloc_page(int order)
+{
+ struct leaflet* leaf = alloc_leaflet(0);
+ if (!leaf) {
+ return NULL;
+ }
+
+ return (void*)vmap(leaf, KERNEL_DATA);
+}
+
+static inline void
+vmfree(void* ptr)
+{
+ struct leaflet* leaf = ppfn_leaflet(pfn((ptr_t)ptr));
+ leaflet_return(leaf);
+}
+
#endif /* __LUNAIX_PAGE_H */
static inline unsigned int
count_order(size_t page_count) {
- unsigned int po = ILOG2(page_count);
+ unsigned int po = ilog2(page_count);
assert(!(page_count % (1 << po)));
return po;
}
#include <lunaix/ds/ldga.h>
+/**
+ * @brief stage where only basic memory management service
+ * is present
+ */
#define on_earlyboot c_earlyboot
+
+/**
+ * @brief stage where most kernel service is ready, non-preempt
+ * kernel.
+ *
+ * boot-stage initialization is about to conclude.
+ */
#define on_boot c_boot
+
+/**
+ * @brief stage where all services started, kernel is in preempt
+ * state
+ */
#define on_postboot c_postboot
#define owloysius_fetch_init(func, call_stage) \
#define proc_runnable(proc) (!(proc)->state || !(((proc)->state) & ~PS_Rn))
-#define TH_DETACHED 0b0001
+#define TH_DETACHED 0b00000001
+#define TH_STALLED 0b00000010
#define thread_detached(th) ((th)->flags & TH_DETACHED)
#define detach_thread(th) ((th)->flags |= TH_DETACHED)
+#define thread_flags_set(th, flag) ((th)->flags |= (flag))
+#define thread_flags_clear(th, flag) ((th)->flags &= ~(flag))
+#define thread_flags_test(th, flag) ((th)->flags & (flag))
+#define thread_flags_test_all(th, flag) (((th)->flags & (flag)) == (flag))
+
struct proc_sig
{
int sig_num;
time_t alarm_time;
};
+struct thread_stats
+{
+ // number of times the thread entering kernel space involuntarily
+ unsigned long entry_count_invol;
+ // number of times the thread entering kernel space voluntarily
+ unsigned long entry_count_vol;
+
+ // number of times the thread is preempted in kerenl space
+ unsigned long kpreempt_count;
+
+ // timestamp of last time kernel entry
+ time_t last_entry;
+ // timestamp of last time kernel reentry
+ time_t last_reentry;
+
+ // timestamp of last time kernel leave
+ time_t last_leave;
+ // timestamp of last time the thread is resumed
+ time_t last_resume;
+
+ union {
+ struct {
+ bool at_user;
+ };
+ int flags;
+ };
+};
+
struct thread
{
/*
struct mm_region* ustack; // process local user stack (NULL for kernel thread)
};
+ struct thread_stats stats;
+
struct haybed sleep;
struct proc_info* process;
void
thread_setsignal(struct thread* thread, signum_t signum);
+void
+thread_stats_update(bool inbound, bool voluntary);
+
+static inline void
+thread_stats_update_entering(bool voluntary)
+{
+ thread_stats_update(true, voluntary);
+}
+
+static inline void
+thread_stats_update_leaving()
+{
+ thread_stats_update(false, true);
+}
+
+static inline void
+thread_stats_update_kpreempt()
+{
+ current_thread->stats.kpreempt_count++;
+}
+
+static inline void
+thread_stats_reset_kpreempt()
+{
+ current_thread->stats.kpreempt_count = 0;
+}
+
+static inline ticks_t
+thread_stats_kernel_elapse(struct thread* thread)
+{
+ return clock_systime() - thread->stats.last_reentry;
+}
+
+static inline ticks_t
+thread_stats_user_elapse(struct thread* thread)
+{
+ struct thread_stats* stats;
+ stats = &thread->stats;
+
+ return stats->last_entry - stats->last_leave;
+}
+
#endif /* __LUNAIX_PROCESS_H */
void noret
schedule();
-void
-sched_pass();
-
void noret
run(struct thread* thread);
* https://elixir.bootlin.com/linux/v4.4/source/include/linux/log2.h#L85
*
*/
-#define ILOG2(x) \
+#define ilog2(x) \
(__builtin_constant_p(x) ? ((x) == 0 ? 0 \
: ((x) & (1ul << 31)) ? 31 \
: ((x) & (1ul << 30)) ? 30 \
: ((x) & (1ul << 2)) ? 2 \
: ((x) & (1ul << 1)) ? 1 \
: 0) \
- : (31 - clz(x)))
+ : (msbiti - clz(x)))
+
+#define llog2(x) (msbitl - clzl(x))
#ifndef CONFIG_NO_ASSERT
#define assert(cond) \
do { \
- if (unlikely(!(cond))) { \
+ if (unlikely(!(cond))) { \
__assert_fail(#cond, __FILE__, __LINE__); \
} \
} while(0)
+#define assert_p(cond, prefix) \
+ do { \
+ if (unlikely(!(cond))) { \
+ __assert_fail(prefix ": " #cond, __FILE__, __LINE__); \
+ } \
+ } while(0)
+
#define assert_msg(cond, msg) \
do { \
- if (unlikely(!(cond))) { \
+ if (unlikely(!(cond))) { \
__assert_fail(msg, __FILE__, __LINE__); \
} \
} while(0)
+#define assert_msg_p(cond, prefix, msg) \
+ do { \
+ if (unlikely(!(cond))) { \
+ __assert_fail(prefix ":" msg, __FILE__, __LINE__); \
+ } \
+ } while(0)
+
#define must_success(statement) \
do { \
int err = (statement); \
} while(0)
#define fail(msg) __assert_fail(msg, __FILE__, __LINE__);
+#define fail_p(msg, prefix) fail(prefix msg);
void
__assert_fail(const char* expr, const char* file, unsigned int line)
--- /dev/null
+#ifndef __LUNAIX_SWITCH_H
+#define __LUNAIX_SWITCH_H
+
+#define SWITCH_MODE_NORMAL 0
+#define SWITCH_MODE_FAST 1
+#define SWITCH_MODE_GIVEUP 2
+
+#ifndef __ASM__
+
+#include <lunaix/types.h>
+
+struct signpost_result
+{
+ int mode;
+ ptr_t stack;
+};
+
+/**
+ * @brief Decide how current thread should perform the context switch
+ * back to it's previously saved context.
+ *
+ * Return a user stack pointer perform a temporary fast redirected
+ * context switch.
+ * No redirection is required if such pointer is null.
+ *
+ * This function might never return if the decision is made to give up
+ * this switching.
+ *
+ * NOTE: This function might have side effects, it can only be
+ * called within the twilight zone of context restore. (after entering
+ * do_switch and before returning from exception)
+ *
+ * @return ptr_t
+ */
+ptr_t
+switch_signposting();
+
+static inline void
+redirect_switch(struct signpost_result* res, ptr_t stack)
+{
+ res->mode = SWITCH_MODE_FAST;
+ res->stack = stack;
+}
+
+static inline void
+continue_switch(struct signpost_result* res)
+{
+ res->mode = SWITCH_MODE_NORMAL;
+ res->stack = 0;
+}
+
+static inline void
+giveup_switch(struct signpost_result* res)
+{
+ res->mode = SWITCH_MODE_GIVEUP;
+ res->stack = 0;
+}
+
+#endif
+#endif /* __LUNAIX_SWITCH_H */
va_end(args); \
}
+#define printk(fmt, ...) kprintf_v(__FILE__, fmt, ##__VA_ARGS__)
+
#define DEBUG(fmt, ...) kprintf(KDEBUG fmt, ##__VA_ARGS__)
#define INFO(fmt, ...) kprintf(KINFO fmt, ##__VA_ARGS__)
#define WARN(fmt, ...) kprintf(KWARN fmt, ##__VA_ARGS__)
void
kprintf_m(const char* component, const char* fmt, va_list args);
+
+void
+kprintf_v(const char* component, const char* fmt, ...);
#endif /* __LUNAIX_SYSLOG_H */
u8_t second;
} datetime_t;
+static inline ticks_t
+ticks_seconds(unsigned int seconds)
+{
+ return seconds * 1000;
+}
+
+static inline ticks_t
+ticks_minutes(unsigned int min)
+{
+ return ticks_seconds(min * 60);
+}
+
+static inline ticks_t
+ticks_msecs(unsigned int ms)
+{
+ return ms;
+}
+
static inline time_t
datetime_tounix(datetime_t* dt)
{
#define container_of(ptr, type, member) \
({ \
const typeof(((type*)0)->member)* __mptr = (ptr); \
- (ptr) ? (type*)((char*)__mptr - offsetof(type, member)) : 0; \
+ ((ptr_t)ptr != 0UL) ? (type*)((char*)__mptr - offsetof(type, member)) : 0; \
})
+#define offset(data, off) \
+ ((void*)(__ptr(data) + (off)))
+
#define __ptr(val) ((ptr_t)(val))
typedef va_list* sc_va_list;
#include "fstypes.h"
#include "types.h"
-#define FO_CREATE 0x1
-#define FO_APPEND 0x2
-#define FO_DIRECT 0x4
-#define FO_WRONLY 0x8
-#define FO_RDONLY 0x10
-#define FO_RDWR 0x20
+#define FO_CREATE 0x1
+#define FO_APPEND 0x2
+#define FO_DIRECT 0x4
+#define FO_WRONLY 0x8
+#define FO_RDONLY 0x10
+#define FO_RDWR 0x20
+#define FO_TRUNC 0x40
#define FO_NOFOLLOW 0x10000
#define O_WRONLY FO_WRONLY
#define O_RDONLY FO_RDONLY
#define O_RDWR FO_RDWR
+#define O_TRUNC FO_TRUNC
-#define MNT_RO 0x1
+/* Mount with read-only flag */
+#define MNT_RO (1 << 0)
+
+/* Mount with block-cache-disabled flag */
+#define MNT_NC (1 << 1)
struct file_stat
{
#ifndef __LUNAIX_FSTYPES_H
#define __LUNAIX_FSTYPES_H
-#define F_DIR 0x0
-#define F_FILE 0x1
-#define F_DEV 0x2
-#define F_SEQDEV 0x6
-#define F_VOLDEV 0xa
-#define F_SYMLINK 0x10
+/*
+ 7 6 5 4 3 2 1 0
+ * * s P SV D d f
+ | | | | | |_ file
+ | | | | |___ directory
+ | | | |_____ Device
+ | | |_________ Seq/Vol (0: Seq; 1: Vol)
+ | |___________ Pipe
+ |_____________ symlink
+
+*/
-#define F_MFILE 0b00001
-#define F_MDEV 0b01110
-#define F_MSLNK 0b10000
+#define F_FILE 0b00000001
+#define F_DIR 0b00000010
+#define F_DEV 0b00000100
+#define F_SVDEV 0b00001000
+#define F_PIPE 0b00010000
+#define F_SYMLINK 0b00100000
#endif /* __LUNAIX_FSTYPES_H */
#define ELIBBAD -29
#define EAGAIN -30
#define EDEADLK -31
+#define EDQUOT -32
#endif /* __LUNAIX_STATUS_H */
"kinit.c",
"lunad.c",
"spike.c",
+ "lrud.c",
+ "bcache.c",
+ "syscall.c",
"kprint/kp_records.c",
"kprint/kprintf.c",
"time/clock.c",
- "time/timer.c"
+ "time/timer.c",
])
\ No newline at end of file
""" Config kernel features """
pass
-include("mm")
\ No newline at end of file
+include("fs")
+include("mm")
--- /dev/null
+#include <lunaix/bcache.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/spike.h>
+
+static struct lru_zone* bcache_global_lru;
+
+#define lock(bc) spinlock_acquire(&((bc)->lock))
+#define unlock(bc) spinlock_release(&((bc)->lock))
+
+static void
+__evict_internal_locked(struct bcache_node* node)
+{
+ struct bcache* cache;
+
+ cache = node->holder;
+ cache->ops.sync_cached(cache, node->tag, node->data);
+
+ cache->ops.release_on_evict(cache, node->data);
+}
+
+static int
+__try_evict_bcache(struct lru_node* node)
+{
+ struct bcache_node* bnode;
+ struct bcache* cache;
+
+ bnode = container_of(node, struct bcache_node, lru_node);
+ cache = bnode->holder;
+
+ lock(cache);
+
+ if (bnode->refs) {
+ unlock(cache);
+ return false;
+ }
+
+ __evict_internal_locked(bnode);
+ btrie_remove(&cache->root, bnode->tag);
+ llist_delete(&bnode->objs);
+
+ vfree(bnode);
+
+ unlock(cache);
+
+ return true;
+}
+
+bcache_zone_t
+bcache_create_zone(char* name)
+{
+ return lru_new_zone(name, __try_evict_bcache);
+}
+
+void
+bcache_init_zone(struct bcache* cache, bcache_zone_t lru, unsigned int log_ways,
+ int cap, unsigned int blk_size, struct bcache_ops* ops)
+{
+ // TODO handle cap
+
+ *cache = (struct bcache) {
+ .lru = lru,
+ .ops = *ops,
+ .blksz = blk_size
+ };
+
+ btrie_init(&cache->root, log_ways);
+ llist_init_head(&cache->objs);
+ spinlock_init(&cache->lock);
+}
+
+bcobj_t
+bcache_put_and_ref(struct bcache* cache, unsigned long tag, void* block)
+{
+ struct bcache_node* node;
+
+ lock(cache);
+
+ node = (struct bcache_node*)btrie_get(&cache->root, tag);
+
+ if (node != NULL) {
+ assert(!node->refs);
+ __evict_internal_locked(node);
+ // Now the node is ready to be reused.
+ }
+ else {
+ node = vzalloc(sizeof(*node));
+ btrie_set(&cache->root, tag, node);
+ }
+
+ *node = (struct bcache_node) {
+ .data = block,
+ .holder = cache,
+ .tag = tag,
+ .refs = 1
+ };
+
+ lru_use_one(cache->lru, &node->lru_node);
+ llist_append(&cache->objs, &node->objs);
+
+ unlock(cache);
+
+ return (bcobj_t)node;
+}
+
+bool
+bcache_tryget(struct bcache* cache, unsigned long tag, bcobj_t* result)
+{
+ struct bcache_node* node;
+
+ lock(cache);
+
+ node = (struct bcache_node*)btrie_get(&cache->root, tag);
+ if (!node) {
+ unlock(cache);
+ *result = NULL;
+
+ return false;
+ }
+
+ node->refs++;
+
+ *result = (bcobj_t)node;
+
+ unlock(cache);
+
+ return true;
+}
+
+void
+bcache_return(bcobj_t obj)
+{
+ struct bcache_node* node = (struct bcache_node*) obj;
+
+ assert(node->refs);
+
+ // non bisogno bloccare il cache, perche il lru ha la serratura propria.
+ lru_use_one(node->holder->lru, &node->lru_node);
+ node->refs--;
+}
+
+void
+bcache_promote(bcobj_t obj)
+{
+ struct bcache_node* node;
+
+ node = (struct bcache_node*) obj;
+ assert(node->refs);
+ lru_use_one(node->holder->lru, &node->lru_node);
+}
+
+void
+bcache_evict(struct bcache* cache, unsigned long tag)
+{
+ struct bcache_node* node;
+
+ lock(cache);
+
+ node = (struct bcache_node*)btrie_get(&cache->root, tag);
+
+ if (!node || node->refs) {
+ unlock(cache);
+ return;
+ }
+
+ __evict_internal_locked(node);
+
+ btrie_remove(&cache->root, tag);
+ lru_remove(cache->lru, &node->lru_node);
+ llist_delete(&node->objs);
+
+ vfree(node);
+
+ unlock(cache);
+}
+
+static void
+bcache_flush_locked(struct bcache* cache)
+{
+ struct bcache_node *pos, *n;
+ llist_for_each(pos, n, &cache->objs, objs) {
+ __evict_internal_locked(pos);
+ btrie_remove(&cache->root, pos->tag);
+ lru_remove(cache->lru, &pos->lru_node);
+ llist_delete(&pos->objs);
+ }
+}
+
+void
+bcache_flush(struct bcache* cache)
+{
+ lock(cache);
+
+ bcache_flush_locked(cache);
+
+ unlock(cache);
+}
+
+void
+bcache_free(struct bcache* cache)
+{
+ lock(cache);
+
+ bcache_flush_locked(cache);
+ btrie_release(&cache->root);
+
+ unlock(cache);
+
+ vfree(cache);
+}
+
+void
+bcache_zone_free(bcache_zone_t zone)
+{
+ lru_free_zone(zone);
+}
\ No newline at end of file
"blkpart_gpt.c",
"blk_mapping.c",
"blkio.c",
- "block.c"
+ "block.c",
+ "blkbuf.c"
])
\ No newline at end of file
--- /dev/null
+#include <lunaix/blkbuf.h>
+#include <lunaix/mm/cake.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/owloysius.h>
+#include <lunaix/syslog.h>
+
+LOG_MODULE("blkbuf")
+
+#define bb_cache_obj(bcache) \
+ container_of(bcache, struct blkbuf_cache, cached)
+
+#define to_blkbuf(bbuf) ((struct blk_buf*)(bbuf))
+
+static bcache_zone_t bb_zone;
+static struct cake_pile* bb_pile;
+
+static inline u64_t
+__tolba(struct blkbuf_cache* cache, unsigned int blk_id)
+{
+ return ((u64_t)cache->blksize * (u64_t)blk_id) / cache->blkdev->blk_size;
+}
+
+static void
+__blkbuf_do_sync(struct bcache* bc, unsigned long tag, void* data)
+{
+ return;
+}
+
+static void
+__blkbuf_sync_callback(struct blkio_req* req)
+{
+ struct blk_buf* buf;
+
+ buf = (struct blk_buf*)req->evt_args;
+
+ if (req->errcode) {
+ ERROR("sync failed: io error, 0x%x", req->errcode);
+ return;
+ }
+}
+
+static void
+__blkbuf_evict_callback(struct blkio_req* req)
+{
+ struct blk_buf* buf;
+
+ buf = (struct blk_buf*)req->evt_args;
+
+ if (req->errcode) {
+ ERROR("sync on evict failed (io error, 0x%x)", req->errcode);
+ }
+
+ vfree(buf->raw);
+ vbuf_free(req->vbuf);
+ cake_release(bb_pile, buf);
+}
+
+static void
+__blkbuf_do_try_release(struct bcache* bc, void* data)
+{
+ struct blkio_req* req;
+ struct blk_buf* buf;
+
+ buf = (struct blk_buf*)data;
+ req = buf->breq;
+
+ if (llist_empty(&buf->dirty)) {
+ __blkbuf_evict_callback(req);
+ blkio_free_req(req);
+ return;
+ }
+
+ // since we are evicting, don't care if the sync is failed
+ llist_delete(&buf->dirty);
+
+ blkio_when_completed(req, __blkbuf_evict_callback);
+ blkio_mark_foc(req);
+ blkio_commit(req, 0);
+}
+
+static struct bcache_ops cache_ops = {
+ .release_on_evict = __blkbuf_do_try_release,
+ .sync_cached = __blkbuf_do_sync
+};
+
+static bbuf_t
+__blkbuf_take_slow_lockness(struct blkbuf_cache* bc, unsigned int block_id)
+{
+ struct blk_buf* buf;
+ struct blkio_req* req;
+ struct vecbuf* vbuf;
+ void* data;
+ u64_t lba;
+
+ data = valloc(bc->blksize);
+
+ vbuf = NULL;
+ vbuf_alloc(&vbuf, data, bc->blksize);
+
+ lba = __tolba(bc, block_id);
+ buf = (struct blk_buf*)cake_grab(bb_pile);
+ req = blkio_vreq(vbuf, lba, __blkbuf_sync_callback, buf, 0);
+
+ // give dirty a know state
+ llist_init_head(&buf->dirty);
+
+ blkio_setread(req);
+ blkio_bindctx(req, bc->blkdev->blkio);
+ blkio_commit(req, BLKIO_WAIT);
+
+ if (req->errcode) {
+ ERROR("block io error (0x%x)", req->errcode);
+ cake_release(bb_pile, buf);
+ return (bbuf_t)INVL_BUFFER;
+ }
+
+ buf->raw = data;
+ buf->cobj = bcache_put_and_ref(&bc->cached, block_id, buf);
+ buf->breq = req;
+
+ return buf;
+}
+
+struct blkbuf_cache*
+blkbuf_create(struct block_dev* blkdev, unsigned int blk_size)
+{
+ struct blkbuf_cache* bb_cache;
+
+ assert(is_pot(blk_size));
+
+ bb_cache = valloc(sizeof(*bb_cache));
+ bb_cache->blkdev = blkdev;
+
+ bcache_init_zone(&bb_cache->cached, bb_zone, 3, -1, blk_size, &cache_ops);
+ llist_init_head(&bb_cache->dirty);
+ mutex_init(&bb_cache->lock);
+
+ return bb_cache;
+}
+
+bbuf_t
+blkbuf_take(struct blkbuf_cache* bc, unsigned int block_id)
+{
+ bcobj_t cobj;
+ mutex_lock(&bc->lock);
+ if (bcache_tryget(&bc->cached, block_id, &cobj)) {
+ mutex_unlock(&bc->lock);
+ return (bbuf_t)bcached_data(cobj);
+ }
+
+ bbuf_t buf = __blkbuf_take_slow_lockness(bc, block_id);
+
+ mutex_unlock(&bc->lock);
+ return buf;
+}
+
+void
+blkbuf_put(bbuf_t buf)
+{
+ if (unlikely(!buf || blkbuf_errbuf(buf))) {
+ return;
+ }
+
+ struct blk_buf* bbuf;
+ bbuf = to_blkbuf(buf);
+
+ bcache_return(bbuf->cobj);
+}
+
+void
+blkbuf_dirty(bbuf_t buf)
+{
+ assert(buf && !blkbuf_errbuf(buf));
+
+ struct blk_buf* bbuf;
+ struct blkbuf_cache* bc;
+
+ bbuf = ((struct blk_buf*)buf);
+ bc = bcache_holder_embed(bbuf->cobj, struct blkbuf_cache, cached);
+
+ mutex_lock(&bc->lock);
+
+ if (llist_empty(&bbuf->dirty)) {
+ llist_append(&bc->dirty, &bbuf->dirty);
+ }
+
+ mutex_unlock(&bc->lock);
+}
+
+static inline void
+__schedule_sync_event(struct blk_buf* bbuf, bool wait)
+{
+ struct blkio_req* blkio;
+
+ blkio = bbuf->breq;
+
+ blkio_setwrite(blkio);
+ blkio_commit(blkio, wait ? BLKIO_WAIT : BLKIO_NOWAIT);
+
+ llist_delete(&bbuf->dirty);
+}
+
+void
+blkbuf_schedule_sync(bbuf_t buf)
+{
+ struct blk_buf* bbuf;
+ bbuf = to_blkbuf(buf);
+
+ __schedule_sync_event(bbuf, false);
+}
+
+bool
+blkbuf_syncall(struct blkbuf_cache* bc, bool async)
+{
+ struct blk_buf *pos, *n;
+
+ mutex_lock(&bc->lock);
+
+ llist_for_each(pos, n, &bc->dirty, dirty) {
+ __schedule_sync_event(pos, !async);
+ }
+
+ mutex_unlock(&bc->lock);
+
+ if (async) {
+ return true;
+ }
+
+ return llist_empty(&bc->dirty);
+}
+
+void
+blkbuf_release(struct blkbuf_cache* bc)
+{
+ bcache_free(&bc->cached);
+ vfree(bc);
+}
+
+static void
+__init_blkbuf()
+{
+ bb_zone = bcache_create_zone("blk_buf");
+ bb_pile = cake_new_pile("blk_buf", sizeof(struct blk_buf), 1, 0);
+}
+owloysius_fetch_init(__init_blkbuf, on_earlyboot)
\ No newline at end of file
#include <lunaix/blkio.h>
+#include <lunaix/syslog.h>
#include <lunaix/mm/cake.h>
#include <lunaix/mm/valloc.h>
static struct cake_pile* blkio_reqpile;
+LOG_MODULE("blkio")
+
void
blkio_init()
{
ctx->handle_one = handler;
llist_init_head(&ctx->queue);
+ mutex_init(&ctx->lock);
return ctx;
}
void
-blkio_commit(struct blkio_context* ctx, struct blkio_req* req, int options)
+blkio_commit(struct blkio_req* req, int options)
{
+ struct blkio_context* ctx;
+
+ if (blkio_is_pending(req)) {
+ // prevent double submition
+ return;
+ }
+
+ assert(req->io_ctx);
+
req->flags |= BLKIO_PENDING;
- req->io_ctx = ctx;
- llist_append(&ctx->queue, &req->reqs);
+
+ if ((options & BLKIO_WAIT)) {
+ req->flags |= BLKIO_SHOULD_WAIT;
+ prepare_to_wait(&req->wait);
+ }
+ else {
+ req->flags &= ~BLKIO_SHOULD_WAIT;
+ }
+ ctx = req->io_ctx;
+
+ blkio_lock(ctx);
+
+ llist_append(&ctx->queue, &req->reqs);
+
+ blkio_unlock(ctx);
// if the pipeline is not running (e.g., stalling). Then we should schedule
// one immediately and kick it started.
// NOTE: Possible race condition between blkio_commit and pwait.
// As we don't want to overwhelming the interrupt context and also keep the
// request RTT as small as possible, hence #1 is preferred.
- if (!ctx->busy) {
+ /*
+ FIXME
+ Potential racing here.
+ happened when blkio is committed at high volumn, while the
+ block device has very little latency.
+ This is particular serious for non-async blkio, it could
+ completed before we do pwait, causing the thread hanged indefinitely
+ */
+
+ if (blkio_stalled(ctx)) {
if ((options & BLKIO_WAIT)) {
- cpu_disable_interrupt();
blkio_schedule(ctx);
- pwait(&req->wait);
+ try_wait_check_stall();
return;
}
blkio_schedule(ctx);
} else if ((options & BLKIO_WAIT)) {
- pwait(&req->wait);
+ try_wait_check_stall();
}
}
void
blkio_schedule(struct blkio_context* ctx)
{
+ // stall the pipeline if ctx is locked by others.
+ // we must not try to hold the lock in this case, as
+ // blkio_schedule will be in irq context most of the
+ // time, we can't afford the waiting there.
+ if (mutex_on_hold(&ctx->lock)) {
+ return;
+ }
+
+ // will always successed when in irq context
+ blkio_lock(ctx);
+
if (llist_empty(&ctx->queue)) {
+ blkio_unlock(ctx);
return;
}
llist_delete(&head->reqs);
head->flags |= BLKIO_BUSY;
- head->io_ctx->busy++;
+ ctx->busy++;
+
+ blkio_unlock(ctx);
ctx->handle_one(head);
}
void
blkio_complete(struct blkio_req* req)
{
+ struct blkio_context* ctx;
+
+ ctx = req->io_ctx;
req->flags &= ~(BLKIO_BUSY | BLKIO_PENDING);
+ // Wake all blocked processes on completion,
+ // albeit should be no more than one process in everycase (by design)
+ if ((req->flags & BLKIO_SHOULD_WAIT)) {
+ assert(!waitq_empty(&req->wait));
+ pwake_all(&req->wait);
+ }
+
+ if (req->errcode) {
+ WARN("request completed with error. (errno=0x%x, ctx=%p)",
+ req->errcode, (ptr_t)ctx);
+ }
+
if (req->completed) {
req->completed(req);
}
- // Wake all blocked processes on completion,
- // albeit should be no more than one process in everycase (by design)
- pwake_all(&req->wait);
-
if ((req->flags & BLKIO_FOC)) {
blkio_free_req(req);
}
- req->io_ctx->busy--;
+ ctx->busy--;
}
\ No newline at end of file
__block_commit(struct blkio_context* blkio, struct blkio_req* req, int flags)
{
int errno;
- blkio_commit(blkio, req, flags);
+
+ blkio_bindctx(req, blkio);
+ blkio_mark_nfoc(req);
+ blkio_commit(req, flags);
if ((errno = req->errcode)) {
errno = -errno;
__block_read(struct device* dev, void* buf, size_t offset, size_t len)
{
int errno;
- struct block_dev* bdev = (struct block_dev*)dev->underlay;
- size_t bsize = bdev->blk_size, rd_block = offset / bsize + bdev->start_lba,
+ struct block_dev* bdev = block_dev(dev);
+ size_t bsize = bdev->blk_size,
+ rd_block = offset / bsize + bdev->start_lba,
r = offset % bsize, rd_size = 0;
if (!(len = MIN(len, ((size_t)bdev->end_lba - rd_block + 1) * bsize))) {
int
__block_write(struct device* dev, void* buf, size_t offset, size_t len)
{
- struct block_dev* bdev = (struct block_dev*)dev->underlay;
+ struct block_dev* bdev = block_dev(dev);
size_t bsize = bdev->blk_size, wr_block = offset / bsize + bdev->start_lba,
r = offset % bsize, wr_size = 0;
__block_read_page(struct device* dev, void* buf, size_t offset)
{
struct vecbuf* vbuf = NULL;
- struct block_dev* bdev = (struct block_dev*)dev->underlay;
+ struct block_dev* bdev = block_dev(dev);
u32_t lba = offset / bdev->blk_size + bdev->start_lba;
u32_t rd_lba = MIN(lba + PAGE_SIZE / bdev->blk_size, bdev->end_lba);
__block_write_page(struct device* dev, void* buf, size_t offset)
{
struct vecbuf* vbuf = NULL;
- struct block_dev* bdev = (struct block_dev*)dev->underlay;
+ struct block_dev* bdev = block_dev(dev);
u32_t lba = offset / bdev->blk_size + bdev->start_lba;
u32_t wr_lba = MIN(lba + PAGE_SIZE / bdev->blk_size, bdev->end_lba);
#include <lunaix/device.h>
-#include <lunaix/fs.h>
+#include <lunaix/fs/api.h>
#include <lunaix/fs/devfs.h>
#include <lunaix/spike.h>
int
devfs_get_itype(struct device_meta* dm)
{
- int itype = VFS_IFFILE;
+ int itype = VFS_IFDEV;
if (valid_device_subtype_ref(dm, DEV_CAT)) {
return VFS_IFDIR;
if (dev_if == DEV_IFVOL) {
itype |= VFS_IFVOLDEV;
- } else if (dev_if == DEV_IFSEQ) {
- itype |= VFS_IFSEQDEV;
- } else {
- itype |= VFS_IFDEV;
}
+
+ // otherwise, the mapping is considered to be generic seq dev.
return itype;
}
return ENOTDIR;
}
+ if (fsapi_handle_pseudo_dirent(file, dctx)) {
+ return 1;
+ }
+
struct device_meta* dev =
- device_getbyoffset(rootdev, dctx->index);
+ device_getbyoffset(rootdev, file->f_pos - 2);
if (!dev) {
return 0;
void
devfs_init()
{
- struct filesystem* fs = fsm_new_fs("devfs", 5);
- fsm_register(fs);
- fs->mount = devfs_mount;
- fs->unmount = devfs_unmount;
+ struct filesystem* fs;
+ fs = fsapi_fs_declare("devfs", FSTYPE_PSEUDO);
+
+ fsapi_fs_set_mntops(fs, devfs_mount, devfs_unmount);
+ fsapi_fs_finalise(fs);
}
EXPORT_FILE_SYSTEM(devfs, devfs_init);
#include <lunaix/spike.h>
#include <lunaix/syscall.h>
#include <lunaix/syscall_utils.h>
+#include <lunaix/kpreempt.h>
#define MAX_POLLER_COUNT 16
__wait_until_event()
{
block_current_thread();
- sched_pass();
+ yield_current();
}
void
sources([
"waitq.c",
"buffer.c",
- "lru.c",
"rbuffer.c",
"btrie.c",
"semaphore.c",
#define BTRIE_INSERT 1
struct btrie_node*
-__btrie_traversal(struct btrie* root, u32_t index, int options)
+__btrie_traversal(struct btrie* root, unsigned long index, int options)
{
- index = index >> root->truncated;
- u32_t lz = index ? ROUNDDOWN(31 - clz(index), BTRIE_BITS) : 0;
- u32_t bitmask = ((1 << BTRIE_BITS) - 1) << lz;
- u32_t i = 0;
+ unsigned long lz;
+ unsigned long bitmask;
+ unsigned long i = 0;
struct btrie_node* tree = root->btrie_root;
+ lz = index ? ICEIL(msbitl - clzl(index), root->order) : 0;
+ lz = lz * root->order;
+ bitmask = ((1 << root->order) - 1) << lz;
+
// Time complexity: O(log_2(log_2(N))) where N is the index to lookup
while (bitmask && tree) {
i = (index & bitmask) >> lz;
} else {
tree = subtree;
}
- bitmask = bitmask >> BTRIE_BITS;
- lz -= BTRIE_BITS;
+ bitmask = bitmask >> root->order;
+ lz -= root->order;
}
return tree;
}
void
-btrie_init(struct btrie* btrie, u32_t trunc_bits)
+btrie_init(struct btrie* btrie, unsigned int order)
{
btrie->btrie_root = vzalloc(sizeof(struct btrie_node));
llist_init_head(&btrie->btrie_root->nodes);
llist_init_head(&btrie->btrie_root->children);
- btrie->truncated = trunc_bits;
+ btrie->order = order;
}
void*
-btrie_get(struct btrie* root, u32_t index)
+btrie_get(struct btrie* root, unsigned long index)
{
struct btrie_node* node = __btrie_traversal(root, index, 0);
if (!node) {
}
void
-btrie_set(struct btrie* root, u32_t index, void* data)
+btrie_set(struct btrie* root, unsigned long index, void* data)
{
struct btrie_node* node = __btrie_traversal(root, index, BTRIE_INSERT);
node->data = data;
}
void*
-btrie_remove(struct btrie* root, u32_t index)
+btrie_remove(struct btrie* root, unsigned long index)
{
struct btrie_node* node = __btrie_traversal(root, index, 0);
if (!node) {
+++ /dev/null
-#include <lunaix/ds/lru.h>
-#include <lunaix/mm/valloc.h>
-
-struct llist_header zone_lead = { .next = &zone_lead, .prev = &zone_lead };
-
-struct lru_zone*
-lru_new_zone(evict_cb try_evict_cb)
-{
- struct lru_zone* zone = vzalloc(sizeof(struct lru_zone));
- if (!zone) {
- return NULL;
- }
-
- zone->try_evict = try_evict_cb;
-
- llist_init_head(&zone->lead_node);
- llist_append(&zone_lead, &zone->zones);
-
- return zone;
-}
-
-void
-lru_use_one(struct lru_zone* zone, struct lru_node* node)
-{
- if (node->lru_nodes.next && node->lru_nodes.prev) {
- llist_delete(&node->lru_nodes);
- }
-
- llist_prepend(&zone->lead_node, &node->lru_nodes);
- zone->objects++;
-}
-
-static void
-__do_evict(struct lru_zone* zone, struct llist_header* elem)
-{
- llist_delete(elem);
- if (!zone->try_evict(container_of(elem, struct lru_node, lru_nodes))) {
- llist_append(&zone->lead_node, elem);
- } else {
- zone->objects--;
- }
-}
-
-void
-lru_evict_one(struct lru_zone* zone)
-{
- struct llist_header* tail = zone->lead_node.prev;
- if (tail == &zone->lead_node) {
- return;
- }
-
- __do_evict(zone, tail);
-}
-
-void
-lru_evict_half(struct lru_zone* zone)
-{
- int target = (int)(zone->objects / 2);
- struct llist_header* tail = zone->lead_node.prev;
- while (tail != &zone->lead_node && target > 0) {
- __do_evict(zone, tail);
- tail = tail->prev;
- target--;
- }
-}
-
-void
-lru_remove(struct lru_zone* zone, struct lru_node* node)
-{
- if (node->lru_nodes.next && node->lru_nodes.prev) {
- llist_delete(&node->lru_nodes);
- }
- zone->objects--;
-}
\ No newline at end of file
#include <lunaix/ds/mutex.h>
#include <lunaix/process.h>
-#include <lunaix/sched.h>
+#include <lunaix/kpreempt.h>
-void
-mutex_lock(mutex_t* mutex)
+static inline bool must_inline
+__mutex_check_owner(mutex_t* mutex)
{
- if (atomic_load(&mutex->lk) && mutex->owner == __current->pid) {
- atomic_fetch_add(&mutex->lk, 1);
- return;
- }
+ return mutex->owner == __current->pid;
+}
+static inline void must_inline
+__mutext_lock(mutex_t* mutex)
+{
while (atomic_load(&mutex->lk)) {
- sched_pass();
+ preempt_current();
}
atomic_fetch_add(&mutex->lk, 1);
mutex->owner = __current->pid;
}
+static inline void must_inline
+__mutext_unlock(mutex_t* mutex)
+{
+ if (__mutex_check_owner(mutex))
+ atomic_fetch_sub(&mutex->lk, 1);
+}
+
+void
+mutex_lock(mutex_t* mutex)
+{
+ __mutext_lock(mutex);
+}
+
void
mutex_unlock(mutex_t* mutex)
{
- mutex_unlock_for(mutex, __current->pid);
+ __mutext_unlock(mutex);
}
void
if (mutex->owner != pid || !atomic_load(&mutex->lk)) {
return;
}
- atomic_fetch_sub(&mutex->lk, 1);
+ __mutext_unlock(mutex);
+}
+
+void
+mutex_lock_nested(mutex_t* mutex)
+{
+ if (atomic_load(&mutex->lk) && __mutex_check_owner(mutex)) {
+ atomic_fetch_add(&mutex->lk, 1);
+ return;
+ }
+
+ __mutext_lock(mutex);
+}
+
+void
+mutex_unlock_nested(mutex_t* mutex)
+{
+ mutex_unlock_for(mutex, __current->pid);
}
\ No newline at end of file
#include <lunaix/ds/semaphore.h>
-#include <lunaix/sched.h>
+#include <lunaix/kpreempt.h>
void
sem_init(struct sem_t* sem, unsigned int initial)
{
while (!atomic_load(&sem->counter)) {
// FIXME: better thing like wait queue
- sched_pass();
+ preempt_current();
}
atomic_fetch_sub(&sem->counter, 1);
}
#include <lunaix/process.h>
#include <lunaix/sched.h>
#include <lunaix/spike.h>
+#include <lunaix/kpreempt.h>
-void
-pwait(waitq_t* queue)
+static inline void must_inline
+__try_wait(bool check_stall)
{
- assert(current_thread);
- // prevent race condition.
- cpu_disable_interrupt();
-
+ unsigned int nstall;
waitq_t* current_wq = ¤t_thread->waitqueue;
- assert(llist_empty(¤t_wq->waiters));
-
- llist_append(&queue->waiters, ¤t_wq->waiters);
-
+ if (waitq_empty(current_wq)) {
+ return;
+ }
+
block_current_thread();
- sched_pass();
+
+ if (!check_stall) {
+ // if we are not checking stall, we give up voluntarily
+ yield_current();
+ } else {
+ // otherwise, treat it as being preempted by kernel
+ preempt_current();
+ }
// In case of SIGINT-forced awaken
llist_delete(¤t_wq->waiters);
- cpu_enable_interrupt();
+}
+
+static inline void must_inline
+__pwait(waitq_t* queue, bool check_stall)
+{
+ // prevent race condition.
+ no_preemption();
+
+ prepare_to_wait(queue);
+ __try_wait(check_stall);
+
+ set_preemption();
+}
+
+void
+pwait(waitq_t* queue)
+{
+ __pwait(queue, false);
+}
+
+void
+pwait_check_stall(waitq_t* queue)
+{
+ __pwait(queue, true);
}
void
{
thread = container_of(pos, struct thread, waitqueue);
- assert(thread->state == PS_BLOCKED);
- thread->state = PS_READY;
+ if (thread->state == PS_BLOCKED) {
+ thread->state = PS_READY;
+ }
+
+ // already awaken or killed by other event, just remove it
llist_delete(&pos->waiters);
}
-}
\ No newline at end of file
+}
+
+void
+prepare_to_wait(waitq_t* queue)
+{
+ assert(current_thread);
+
+ waitq_t* current_wq = ¤t_thread->waitqueue;
+ assert(llist_empty(¤t_wq->waiters));
+
+ llist_append(&queue->waiters, ¤t_wq->waiters);
+}
+
+void
+try_wait()
+{
+ __try_wait(false);
+}
+
+void
+try_wait_check_stall()
+{
+ __try_wait(true);
+}
use("twifs")
use("ramfs")
-use("iso9660")
+
+if configured("fs_iso9660"):
+ use("iso9660")
+
+if configured("fs_ext2"):
+ use("ext2")
sources([
"twimap.c",
"fsm.c",
"fs_export.c",
"probe_boot.c"
-])
\ No newline at end of file
+])
+
--- /dev/null
+
+@Collection
+def file_system():
+ """ Config feature related to file system supports """
+
+ add_to_collection(kernel_feature)
+
+ @Term
+ def fs_ext2():
+ """ Enable ext2 file system support """
+
+ type(bool)
+ default(True)
+
+ @Term
+ def fs_iso9660():
+ """ Enable iso9660 file system support """
+
+ type(bool)
+ default(True)
+
-#include <lunaix/fs.h>
+#include <lunaix/fs/api.h>
int
default_file_close(struct v_file* file)
}
int
-default_file_seek(struct v_inode* inode, size_t offset)
+default_file_seek(struct v_file* file, size_t offset)
{
+ file->f_pos = offset;
return 0;
}
int
default_file_readdir(struct v_file* file, struct dir_context* dctx)
{
- int i = 0;
+ unsigned int i = 0;
struct v_dnode *pos, *n;
+
+ if (fsapi_handle_pseudo_dirent(file, dctx)) {
+ return 1;
+ }
+
llist_for_each(pos, n, &file->dnode->children, siblings)
{
- if (i < dctx->index) {
+ if (i < file->f_pos) {
i++;
continue;
}
dctx->read_complete_callback(dctx, pos->name.value, pos->name.len, 0);
- break;
+ return 1;
}
- return i;
+ return 0;
}
int
--- /dev/null
+sources([
+ "alloc.c",
+ "dir.c",
+ "file.c",
+ "group.c",
+ "inode.c",
+ "mount.c"
+])
\ No newline at end of file
--- /dev/null
+#include "ext2.h"
+
+static inline unsigned int
+__ext2_global_slot_alloc(struct v_superblock* vsb, int type_sel,
+ struct ext2_gdesc** gd_out)
+{
+ struct ext2_sbinfo* sb;
+ struct ext2_gdesc *pos;
+ struct llist_header *header;
+
+ sb = EXT2_SB(vsb);
+ header = &sb->free_list_sel[type_sel];
+
+ if (type_sel == GDESC_INO_SEL) {
+ pos = list_entry(header->next, struct ext2_gdesc, free_grps_ino);
+ }
+ else {
+ pos = list_entry(header->next, struct ext2_gdesc, free_grps_blk);
+ }
+
+ int alloc = ext2gd_alloc_slot(pos, type_sel);
+
+ if (valid_bmp_slot(alloc)) {
+ *gd_out = pos;
+ }
+
+ return alloc;
+}
+
+int
+ext2ino_alloc_slot(struct v_superblock* vsb, struct ext2_gdesc** gd_out)
+{
+ return __ext2_global_slot_alloc(vsb, GDESC_INO_SEL, gd_out);
+}
+
+int
+ext2db_alloc_slot(struct v_superblock* vsb, struct ext2_gdesc** gd_out)
+{
+ return __ext2_global_slot_alloc(vsb, GDESC_BLK_SEL, gd_out);
+}
+
+int
+ext2gd_alloc_slot(struct ext2_gdesc* gd, int type_sel)
+{
+ struct ext2_bmp* bmp;
+ struct ext2_sbinfo *sb;
+ int alloc;
+
+ sb = gd->sb;
+ bmp = &gd->bmps[type_sel];
+ alloc = ext2bmp_alloc_one(bmp);
+
+ if (alloc < 0) {
+ return alloc;
+ }
+
+ if (!ext2bmp_check_free(bmp)) {
+ llist_delete(&gd->free_list_sel[type_sel]);
+ }
+
+ if (type_sel == GDESC_INO_SEL) {
+ gd->info->bg_free_ino_cnt--;
+ sb->raw->s_free_ino_cnt--;
+ } else {
+ gd->info->bg_free_blk_cnt--;
+ sb->raw->s_free_blk_cnt--;
+ }
+
+ fsblock_dirty(gd->buf);
+ fsblock_dirty(sb->buf);
+ return alloc;
+}
+
+void
+ext2gd_free_slot(struct ext2_gdesc* gd, int type_sel, int slot)
+{
+ struct llist_header *free_ent, *free_list;
+ struct ext2_sbinfo *sb;
+
+ ext2bmp_free_one(&gd->bmps[type_sel], slot);
+
+ sb = gd->sb;
+ free_ent = &gd->free_list_sel[slot];
+ free_list = &gd->sb->free_list_sel[slot];
+ if (llist_empty(free_ent)) {
+ llist_append(free_list, free_ent);
+ }
+
+ if (type_sel == GDESC_INO_SEL) {
+ gd->info->bg_free_ino_cnt++;
+ sb->raw->s_free_ino_cnt++;
+ } else {
+ gd->info->bg_free_blk_cnt++;
+ sb->raw->s_free_blk_cnt++;
+ }
+
+ fsblock_dirty(gd->buf);
+ fsblock_dirty(sb->buf);
+}
\ No newline at end of file
--- /dev/null
+#include <lunaix/mm/valloc.h>
+#include <lunaix/spike.h>
+#include <klibc/string.h>
+
+#include "ext2.h"
+
+static inline bool
+aligned_reclen(struct ext2b_dirent* dirent)
+{
+ return !(dirent->rec_len % 4);
+}
+
+static int
+__find_dirent_byname(struct v_inode* inode, struct hstr* name,
+ struct ext2_dnode* e_dnode_out)
+{
+ int errno = 0;
+ struct ext2_iterator iter;
+ struct ext2b_dirent *dir = NULL, *prev = NULL;
+ bbuf_t prev_buf = NULL;
+
+ ext2dr_itbegin(&iter, inode);
+
+ while (ext2dr_itnext(&iter)) {
+ dir = iter.dirent;
+
+ if (dir->name_len != name->len) {
+ goto cont;
+ }
+
+ if (strneq(dir->name, name->value, name->len)) {
+ goto done;
+ }
+
+cont:
+ prev = dir;
+ if (prev_buf) {
+ fsblock_put(prev_buf);
+ }
+ prev_buf = fsblock_take(iter.sel_buf);
+ }
+
+ errno = ENOENT;
+ goto _ret;
+
+done:
+ e_dnode_out->self = (struct ext2_dnode_sub) {
+ .buf = fsblock_take(iter.sel_buf),
+ .dirent = dir
+ };
+
+ e_dnode_out->prev = (struct ext2_dnode_sub) {
+ .buf = fsblock_take(prev_buf),
+ .dirent = prev
+ };
+
+_ret:
+ fsblock_put(prev_buf);
+ ext2dr_itend(&iter);
+ return itstate_sel(&iter, errno);
+}
+
+static size_t
+__dirent_realsize(struct ext2b_dirent* dirent)
+{
+ return sizeof(*dirent) - sizeof(dirent->name) + dirent->name_len;
+}
+
+#define DIRENT_SLOT_MID 0
+#define DIRENT_SLOT_LAST 1
+#define DIRENT_SLOT_EMPTY 2
+
+static int
+__find_free_dirent_slot(struct v_inode* inode, size_t size,
+ struct ext2_dnode* e_dnode_out, size_t *reclen)
+{
+ struct ext2_iterator iter;
+ struct ext2b_dirent *dir = NULL;
+ bbuf_t prev_buf = bbuf_null;
+ bool found = false;
+
+ ext2db_itbegin(&iter, inode);
+
+ size_t sz = 0;
+ unsigned int rec = 0, total_rec = 0;
+
+ while (!found && ext2db_itnext(&iter))
+ {
+ rec = 0;
+ do {
+ dir = (struct ext2b_dirent*)offset(iter.data, rec);
+
+ sz = dir->rec_len - __dirent_realsize(dir);
+ sz = ROUNDDOWN(sz, 4);
+ if (sz >= size) {
+ found = true;
+ break;
+ }
+
+ rec += dir->rec_len;
+ total_rec += dir->rec_len;
+ } while(rec < iter.blksz);
+
+ if (likely(prev_buf)) {
+ fsblock_put(prev_buf);
+ }
+
+ prev_buf = fsblock_take(iter.sel_buf);
+ }
+
+ if (blkbuf_nullbuf(prev_buf)) {
+ // this dir is brand new
+ return DIRENT_SLOT_EMPTY;
+ }
+
+ e_dnode_out->prev = (struct ext2_dnode_sub) {
+ .buf = fsblock_take(prev_buf),
+ .dirent = dir
+ };
+
+ if (!found) {
+ // if prev is the last, and no more space left behind.
+ assert_fs(rec == iter.blksz);
+
+ e_dnode_out->self.buf = bbuf_null;
+ ext2db_itend(&iter);
+ return itstate_sel(&iter, DIRENT_SLOT_LAST);
+ }
+
+ unsigned int dir_size;
+
+ dir_size = ROUNDUP(__dirent_realsize(dir), 4);
+ *reclen = dir_size;
+
+ rec = total_rec + dir_size;
+ dir = (struct ext2b_dirent*)offset(iter.data, rec);
+
+ e_dnode_out->self = (struct ext2_dnode_sub) {
+ .buf = fsblock_take(iter.sel_buf),
+ .dirent = dir
+ };
+
+ ext2db_itend(&iter);
+ return DIRENT_SLOT_MID;
+}
+
+static inline void
+__destruct_ext2_dnode(struct ext2_dnode* e_dno)
+{
+ fsblock_put(e_dno->prev.buf);
+ fsblock_put(e_dno->self.buf);
+ vfree(e_dno);
+}
+
+static inline bool
+__check_special(struct v_dnode* dnode)
+{
+ return HSTR_EQ(&dnode->name, &vfs_dot)
+ || HSTR_EQ(&dnode->name, &vfs_ddot);
+}
+
+static bool
+__check_empty_dir(struct v_inode* dir_ino)
+{
+ struct ext2_iterator iter;
+ struct ext2b_dirent* dir;
+
+ ext2dr_itbegin(&iter, dir_ino);
+ while (ext2dr_itnext(&iter))
+ {
+ dir = iter.dirent;
+ if (strneq(dir->name, vfs_dot.value, 1)) {
+ continue;
+ }
+
+ if (strneq(dir->name, vfs_ddot.value, 2)) {
+ continue;
+ }
+
+ ext2dr_itend(&iter);
+ return false;
+ }
+
+ ext2dr_itend(&iter);
+ return true;
+}
+
+void
+ext2dr_itbegin(struct ext2_iterator* iter, struct v_inode* inode)
+{
+ *iter = (struct ext2_iterator){
+ .pos = 0,
+ .inode = inode,
+ .blksz = inode->sb->blksize
+ };
+
+ iter->sel_buf = ext2db_get(inode, 0);
+ ext2_itcheckbuf(iter);
+}
+
+void
+ext2dr_itreset(struct ext2_iterator* iter)
+{
+ fsblock_put(iter->sel_buf);
+ iter->sel_buf = ext2db_get(iter->inode, 0);
+ ext2_itcheckbuf(iter);
+
+ iter->pos = 0;
+}
+
+int
+ext2dr_itffw(struct ext2_iterator* iter, int count)
+{
+ int i = 0;
+ while (i < count && ext2dr_itnext(iter)) {
+ i++;
+ }
+
+ return i;
+}
+
+void
+ext2dr_itend(struct ext2_iterator* iter)
+{
+ if (iter->sel_buf) {
+ fsblock_put(iter->sel_buf);
+ }
+}
+
+bool
+ext2dr_itnext(struct ext2_iterator* iter)
+{
+ struct ext2b_dirent* d;
+ unsigned int blkpos, db_index;
+ bbuf_t buf;
+
+ buf = iter->sel_buf;
+
+ if (iter->has_error) {
+ return false;
+ }
+
+ if (likely(iter->dirent)) {
+ d = iter->dirent;
+
+ assert_fs(!(d->rec_len % 4));
+ iter->pos += d->rec_len;
+
+ if (!d->rec_len || !d->inode) {
+ return false;
+ }
+ }
+
+ blkpos = iter->pos % iter->blksz;
+ db_index = iter->pos / iter->blksz;
+
+ if (unlikely(iter->pos >= iter->blksz)) {
+ fsblock_put(buf);
+
+ buf = ext2db_get(iter->inode, db_index);
+ iter->sel_buf = buf;
+
+ if (!buf || !ext2_itcheckbuf(iter)) {
+ return false;
+ }
+ }
+
+ d = (struct ext2b_dirent*)offset(blkbuf_data(buf), blkpos);
+ iter->dirent = d;
+
+ return true;
+}
+
+int
+ext2dr_open(struct v_inode* this, struct v_file* file)
+{
+ struct ext2_file* e_file;
+
+ e_file = EXT2_FILE(file);
+
+ ext2dr_itbegin(&e_file->iter, this);
+
+ return itstate_sel(&e_file->iter, 0);
+}
+
+int
+ext2dr_close(struct v_inode* this, struct v_file* file)
+{
+ struct ext2_file* e_file;
+
+ e_file = EXT2_FILE(file);
+
+ ext2dr_itend(&e_file->iter);
+
+ return 0;
+}
+
+int
+ext2dr_lookup(struct v_inode* inode, struct v_dnode* dnode)
+{
+ int errno;
+ struct ext2b_dirent* dir;
+ struct ext2_dnode* e_dnode;
+ struct v_inode* dir_inode;
+
+ e_dnode = valloc(sizeof(struct ext2_dnode));
+ errno = __find_dirent_byname(inode, &dnode->name, e_dnode);
+ if (errno) {
+ vfree(e_dnode);
+ return errno;
+ }
+
+ dir = e_dnode->self.dirent;
+ if (!(dir_inode = vfs_i_find(inode->sb, dir->inode))) {
+ dir_inode = vfs_i_alloc(inode->sb);
+ ext2ino_fill(dir_inode, dir->inode);
+ }
+
+ dnode->data = e_dnode;
+ vfs_assign_inode(dnode, dir_inode);
+
+ return 0;
+}
+
+#define FT_NUL 0
+#define FT_REG 1
+#define FT_DIR 2
+#define FT_CHR 3
+#define FT_BLK 4
+#define FT_SYM 7
+#define check_imode(val, imode) (((val) & (imode)) == (imode))
+
+static inline unsigned int
+__imode_to_filetype(unsigned int imode)
+{
+ if (check_imode(imode, IMODE_IFLNK)) {
+ return FT_SYM;
+ }
+
+ if (check_imode(imode, IMODE_IFBLK)) {
+ return FT_BLK;
+ }
+
+ if (check_imode(imode, IMODE_IFCHR)) {
+ return FT_CHR;
+ }
+
+ if (check_imode(imode, IMODE_IFDIR)) {
+ return FT_DIR;
+ }
+
+ if (check_imode(imode, IMODE_IFREG)) {
+ return FT_REG;
+ }
+
+ return FT_NUL;
+}
+
+static int
+__dir_filetype(struct v_superblock* vsb, struct ext2b_dirent* dir)
+{
+ int errno;
+ unsigned int type;
+
+ if (ext2_feature(vsb, FEAT_FILETYPE)) {
+ type = dir->file_type;
+ }
+ else {
+ struct ext2_fast_inode e_fino;
+
+ errno = ext2ino_get_fast(vsb, dir->inode, &e_fino);
+ if (errno) {
+ return errno;
+ }
+
+ type = __imode_to_filetype(e_fino.ino->i_mode);
+
+ fsblock_put(e_fino.buf);
+ }
+
+ if (type == FT_DIR) {
+ return DT_DIR;
+ }
+
+ if (type == FT_SYM) {
+ return DT_SYMLINK;
+ }
+
+ return DT_FILE;
+}
+
+int
+ext2dr_read(struct v_file *file, struct dir_context *dctx)
+{
+ struct ext2_file* e_file;
+ struct ext2b_dirent* dir;
+ struct ext2_iterator* iter;
+ struct v_superblock* vsb;
+ int dirtype;
+
+ e_file = EXT2_FILE(file);
+ vsb = file->inode->sb;
+ iter = &e_file->iter;
+
+ if (!ext2dr_itnext(&e_file->iter)) {
+ return itstate_sel(iter, 0);
+ }
+
+ dir = e_file->iter.dirent;
+ dirtype = __dir_filetype(vsb, dir);
+ if (dirtype < 0) {
+ return dirtype;
+ }
+
+ fsapi_dir_report(dctx, dir->name, dir->name_len, dirtype);
+
+ return 1;
+}
+
+int
+ext2dr_seek(struct v_file* file, size_t offset)
+{
+ struct ext2_file* e_file;
+ struct ext2_iterator* iter;
+ unsigned int fpos;
+
+ e_file = EXT2_FILE(file);
+ iter = &e_file->iter;
+ fpos = file->f_pos;
+
+ if (offset == fpos) {
+ return 0;
+ }
+
+ if (offset > fpos) {
+ fpos = ext2dr_itffw(iter, fpos - offset);
+ return 0;
+ }
+
+ if (!offset || offset < fpos) {
+ ext2dr_itreset(iter);
+ }
+
+ fpos = ext2dr_itffw(iter, offset);
+
+ return itstate_sel(iter, 0);
+}
+
+int
+ext2dr_insert(struct v_inode* this, struct ext2b_dirent* dirent,
+ struct ext2_dnode** e_dno_out)
+{
+ int errno;
+ size_t size, new_reclen, old_reclen;
+ struct ext2_inode* e_self;
+ struct ext2_dnode* e_dno;
+ struct ext2b_dirent* prev_dirent;
+ bbuf_t buf;
+
+ e_self = EXT2_INO(this);
+ e_dno = vzalloc(sizeof(*e_dno));
+
+ size = __dirent_realsize(dirent);
+ errno = __find_free_dirent_slot(this, size, e_dno, &new_reclen);
+ if (errno < 0) {
+ goto failed;
+ }
+
+ if (errno == DIRENT_SLOT_EMPTY) {
+ if ((errno = ext2db_acquire(this, 0, &buf))) {
+ goto failed;
+ }
+
+ this->fsize += fsapi_block_size(this->sb);
+ ext2ino_update(this);
+
+ old_reclen = fsapi_block_size(this->sb);
+ e_dno->self.buf = buf;
+ e_dno->self.dirent = blkbuf_data(buf);
+
+ goto place_dir;
+ }
+
+ prev_dirent = e_dno->prev.dirent;
+ old_reclen = prev_dirent->rec_len;
+
+ if (errno == DIRENT_SLOT_LAST) {
+ // prev is last record
+ if ((errno = ext2db_alloc(this, &buf))) {
+ goto failed;
+ }
+
+ this->fsize += fsapi_block_size(this->sb);
+ ext2ino_update(this);
+
+ new_reclen = __dirent_realsize(prev_dirent);
+ new_reclen = ROUNDUP(new_reclen, sizeof(int));
+ e_dno->self = (struct ext2_dnode_sub) {
+ .buf = buf,
+ .dirent = block_buffer(buf, struct ext2b_dirent)
+ };
+ }
+
+ /*
+ --- +--------+ ---
+ ^ | prev | |
+ | +--------+ |
+ | | new_reclen
+ | |
+ | v
+ | +--------+ --- -
+ | | dirent | | | size
+ old_reclen | +--------+ | -
+ | | dirent.reclen
+ | |
+ v v
+ --- +--------+ ---
+ | next |
+ +--------+
+ */
+
+ old_reclen -= new_reclen;
+ prev_dirent->rec_len = new_reclen;
+ fsblock_dirty(e_dno->prev.buf);
+
+place_dir:
+ dirent->rec_len = ROUNDUP(old_reclen, sizeof(int));
+ memcpy(e_dno->self.dirent, dirent, size);
+ fsblock_dirty(e_dno->self.buf);
+
+ if (!e_dno_out) {
+ __destruct_ext2_dnode(e_dno);
+ }
+ else {
+ *e_dno_out = e_dno;
+ }
+
+ return errno;
+
+failed:
+ __destruct_ext2_dnode(e_dno);
+ return errno;
+}
+
+int
+ext2dr_remove(struct ext2_dnode* e_dno)
+{
+ struct ext2_dnode_sub *dir_prev, *dir;
+ assert(e_dno->prev.dirent);
+
+ dir_prev = &e_dno->prev;
+ dir = &e_dno->self;
+
+ dir_prev->dirent->rec_len += dir->dirent->rec_len;
+ dir->dirent->rec_len = 0;
+ dir->dirent->inode = 0;
+
+ fsblock_dirty(dir_prev->buf);
+ fsblock_dirty(dir->buf);
+
+ __destruct_ext2_dnode(e_dno);
+
+ return 0;
+}
+
+int
+ext2_rmdir(struct v_inode* this, struct v_dnode* dnode)
+{
+ int errno;
+ struct v_inode* self;
+ struct ext2_dnode* e_dno;
+
+ self = dnode->inode;
+ e_dno = EXT2_DNO(dnode);
+
+ if (__check_special(dnode)) {
+ return EINVAL;
+ }
+
+ if (!__check_empty_dir(self)) {
+ return ENOTEMPTY;
+ }
+
+ if ((errno = ext2ino_free(self))) {
+ return errno;
+ }
+
+ return ext2dr_remove(e_dno);
+}
+
+static int
+__d_insert(struct v_inode* parent, struct v_inode* self,
+ struct ext2b_dirent* dirent,
+ struct hstr* name, struct ext2_dnode** e_dno_out)
+{
+ ext2dr_setup_dirent(dirent, self, name);
+
+ dirent->inode = self->id;
+ return ext2dr_insert(parent, dirent, e_dno_out);
+}
+
+int
+ext2_mkdir(struct v_inode* this, struct v_dnode* dnode)
+{
+ int errno;
+ struct ext2_inode *e_contain, *e_created;
+ struct v_inode* i_created;
+ struct ext2_dnode* e_dno = NULL;
+ struct ext2b_dirent dirent;
+
+ e_contain = EXT2_INO(this);
+
+ errno = ext2ino_make(this->sb, VFS_IFDIR, e_contain, &i_created);
+ if (errno) {
+ return errno;
+ }
+
+ e_created = EXT2_INO(i_created);
+
+ if ((errno = __d_insert(this, i_created, &dirent, &dnode->name, &e_dno))) {
+ goto cleanup1;
+ }
+
+ // link the created dir inode to dirent
+ ext2ino_linkto(e_created, &dirent);
+ dnode->data = e_dno;
+
+ // insert . and ..
+ // we don't need ext2ino_linkto here.
+
+ if ((errno = __d_insert(i_created, i_created, &dirent, &vfs_dot, NULL))) {
+ goto cleanup;
+ }
+
+ if ((errno = __d_insert(i_created, this, &dirent, &vfs_ddot, NULL))) {
+ goto cleanup;
+ }
+
+ vfs_assign_inode(dnode, i_created);
+ return 0;
+
+cleanup:
+ __destruct_ext2_dnode(e_dno);
+
+cleanup1:
+ dnode->data = NULL;
+ ext2ino_free(i_created);
+ vfs_i_free(i_created);
+
+ return errno;
+}
+
+void
+ext2dr_setup_dirent(struct ext2b_dirent* dirent,
+ struct v_inode* inode, struct hstr* name)
+{
+ unsigned int imode;
+
+ imode = EXT2_INO(inode)->ino->i_mode;
+ *dirent = (struct ext2b_dirent){
+ .name_len = name->len
+ };
+
+ strncpy(dirent->name, name->value, name->len);
+
+ if (ext2_feature(inode->sb, FEAT_FILETYPE)) {
+ dirent->file_type = __imode_to_filetype(imode);
+ }
+}
+
+int
+ext2_rename(struct v_inode* from_inode, struct v_dnode* from_dnode,
+ struct v_dnode* to_dnode)
+{
+ int errno;
+ struct v_inode* to_parent;
+
+ if (EXT2_DNO(to_dnode)) {
+ errno = ext2_unlink(to_dnode->inode, to_dnode);
+ if (errno) {
+ return errno;
+ }
+ }
+
+ errno = ext2_link(from_inode, to_dnode);
+ if (errno) {
+ return errno;
+ }
+
+ return ext2_unlink(from_inode, from_dnode);
+}
\ No newline at end of file
--- /dev/null
+#ifndef __LUNAIX_EXT2_H
+#define __LUNAIX_EXT2_H
+
+#include <lunaix/fs/api.h>
+#include <lunaix/types.h>
+#include <lunaix/ds/llist.h>
+#include <lunaix/ds/hashtable.h>
+#include <lunaix/ds/lru.h>
+
+#define FEAT_COMPRESSION 0b00000001
+#define FEAT_RESIZE_INO 0b00000010
+#define FEAT_FILETYPE 0b00000100
+#define FEAT_SPARSE_SB 0b00001000
+#define FEAT_LARGE_FILE 0b00010000
+
+#define IMODE_IFSOCK 0xC000
+#define IMODE_IFLNK 0xA000
+#define IMODE_IFREG 0x8000
+#define IMODE_IFBLK 0x6000
+#define IMODE_IFDIR 0x4000
+#define IMODE_IFCHR 0x2000
+#define IMODE_IFFIFO 0x1000
+
+#define IMODE_URD 0x0100
+#define IMODE_UWR 0x0080
+#define IMODE_UEX 0x0040
+#define IMODE_GRD 0x0020
+#define IMODE_GWR 0x0010
+#define IMODE_GEX 0x0008
+#define IMODE_ORD 0x0004
+#define IMODE_OWR 0x0002
+#define IMODE_OEX 0x0001
+
+#define ext2_aligned compact align(4)
+#define to_ext2ino_id(fsblock_id) ((fsblock_id) + 1)
+#define to_fsblock_id(ext2_ino) ((ext2_ino) - 1)
+
+extern bcache_zone_t gdesc_bcache_zone;
+
+struct ext2b_super {
+ u32_t s_ino_cnt;
+ u32_t s_blk_cnt;
+ u32_t s_r_blk_cnt;
+ u32_t s_free_blk_cnt;
+ u32_t s_free_ino_cnt;
+ u32_t s_first_data_cnt;
+
+ u32_t s_log_blk_size;
+ u32_t s_log_frg_size;
+
+ u32_t s_blk_per_grp;
+ u32_t s_frg_per_grp;
+ u32_t s_ino_per_grp;
+
+ u32_t s_mtime;
+ u32_t s_wtime;
+
+ u16_t s_mnt_cnt;
+ u16_t s_max_mnt_cnt;
+ u16_t s_magic;
+ u16_t s_state;
+ u16_t s_error;
+ u16_t s_minor_rev;
+
+ u32_t s_last_check;
+ u32_t s_checkinterval;
+ u32_t s_creator_os;
+ u32_t s_rev;
+
+ u16_t s_def_resuid;
+ u16_t s_def_resgid;
+
+ // EXT2_DYNAMIC_REV
+
+ struct {
+ u32_t s_first_ino;
+ u16_t s_ino_size;
+ u16_t s_blkgrp_nr;
+ u32_t s_optional_feat;
+ u32_t s_required_feat;
+ u32_t s_ro_feat;
+ u8_t s_uuid[16];
+ u8_t s_volname[16];
+ u8_t s_last_mnt[64];
+ u32_t s_algo_bmp;
+ } compact;
+
+} ext2_aligned;
+
+struct ext2b_gdesc
+{
+ u32_t bg_blk_map;
+ u32_t bg_ino_map;
+ u32_t bg_ino_tab;
+
+ u16_t bg_free_blk_cnt;
+ u16_t bg_free_ino_cnt;
+ u16_t bg_used_dir_cnt;
+ u16_t bg_pad;
+} align(32) compact;
+
+struct ext2b_inode
+{
+ u16_t i_mode;
+ u16_t i_uid;
+ union {
+ u32_t i_size;
+ u32_t i_size_l32;
+ };
+
+ u32_t i_atime;
+ u32_t i_ctime;
+ u32_t i_mtime;
+ u32_t i_dtime;
+
+ u16_t i_gid;
+ u16_t i_lnk_cnt;
+
+ u32_t i_blocks;
+ u32_t i_flags;
+ u32_t i_osd1;
+
+ union {
+ struct
+ {
+ u32_t directs[12]; // directum
+ union {
+ struct {
+ u32_t ind1; // prima indirecta
+ u32_t ind23[2]; // secunda et tertia indirecta
+ } ext2_aligned;
+ u32_t inds[3];
+ };
+ } ext2_aligned i_block;
+ u32_t i_block_arr[15];
+ };
+
+ u32_t i_gen;
+ u32_t i_file_acl;
+ union {
+ u32_t i_dir_acl;
+ u32_t i_size_h32;
+ };
+ u32_t i_faddr;
+
+ u8_t i_osd2[12];
+} ext2_aligned;
+
+struct ext2b_dirent
+{
+ u32_t inode;
+ u16_t rec_len;
+ u8_t name_len;
+ u8_t file_type;
+ char name[256];
+} ext2_aligned;
+#define EXT2_DRE(v_dnode) (fsapi_impl_data(v_dnode, struct ext2b_dirent))
+
+
+#define GDESC_INO_SEL 0
+#define GDESC_BLK_SEL 1
+
+#define GDESC_FREE_LISTS \
+ union { \
+ struct { \
+ struct llist_header free_grps_ino; \
+ struct llist_header free_grps_blk; \
+ }; \
+ struct llist_header free_list_sel[2]; \
+ }
+
+#define check_gdesc_type_sel(sel) \
+ assert_msg(sel == GDESC_INO_SEL || sel == GDESC_BLK_SEL, \
+ "invalid type_sel");
+
+struct ext2_sbinfo
+{
+ /**
+ * @brief
+ * offset to inode table (in terms of blocks) within each block group.
+ * to account the difference of backup presence between rev 0/1
+ */
+ int ino_tab_len;
+
+ bool read_only;
+ unsigned int block_size;
+ unsigned int nr_gdesc_pb;
+ unsigned int nr_gdesc;
+ unsigned int all_feature;
+
+ struct device* bdev;
+ struct v_superblock* vsb;
+
+ struct ext2b_super* raw;
+ bbuf_t* gdt_frag;
+ struct bcache gd_caches;
+
+ bbuf_t buf;
+
+ struct {
+ struct llist_header gds;
+ GDESC_FREE_LISTS;
+ };
+};
+#define EXT2_SB(vsb) (fsapi_impl_data(vsb, struct ext2_sbinfo))
+
+
+struct ext2_bmp
+{
+ bbuf_t raw;
+ u8_t* bmp;
+ unsigned int nr_bytes;
+ int next_free;
+};
+
+struct ext2_gdesc
+{
+ struct llist_header groups;
+ GDESC_FREE_LISTS;
+
+ union {
+ struct {
+ struct ext2_bmp ino_bmp;
+ struct ext2_bmp blk_bmp;
+ };
+ struct ext2_bmp bmps[2];
+ };
+
+ unsigned int base;
+ unsigned int ino_base;
+
+ struct ext2b_gdesc* info;
+ struct ext2_sbinfo* sb;
+ bbuf_t buf;
+ bcobj_t cache_ref;
+};
+
+/*
+ Indriection Block Translation Look-aside Buffer
+
+ Provide a look-aside buffer for all last-level indirect block
+ that is at least two indirection away.
+
+ For 4KiB block size:
+ 16 sets, 256 ways, capacity 4096 blocks
+*/
+
+struct ext2_btlb_entry
+{
+ unsigned int tag;
+ bbuf_t block;
+};
+
+#define BTLB_SETS 16
+struct ext2_btlb
+{
+ struct ext2_btlb_entry buffer[BTLB_SETS];
+};
+
+struct ext2_fast_inode
+{
+ struct ext2b_inode* ino;
+ bbuf_t buf;
+};
+
+struct ext2_inode
+{
+ bbuf_t buf; // partial inotab that holds this inode
+ unsigned int inds_lgents; // log2(# of block in an indirection level)
+ unsigned int ino_id;
+ size_t indirect_blocks;
+ size_t isize;
+
+ struct ext2b_inode* ino; // raw ext2 inode
+ struct ext2_btlb* btlb; // block-TLB
+ struct ext2_gdesc* blk_grp; // block group
+
+ union {
+ struct {
+ /*
+ (future)
+ dirent fragmentation degree, we will perform
+ full reconstruction on dirent table when this goes too high.
+ */
+ unsigned int dir_fragdeg;
+ };
+ };
+
+ // prefetched block for 1st order of indirection
+ bbuf_t ind_ord1;
+ char* symlink;
+};
+#define EXT2_INO(v_inode) (fsapi_impl_data(v_inode, struct ext2_inode))
+
+struct ext2_dnode_sub
+{
+ bbuf_t buf;
+ struct ext2b_dirent* dirent;
+};
+
+struct ext2_dnode
+{
+ struct ext2_dnode_sub self;
+ struct ext2_dnode_sub prev;
+};
+#define EXT2_DNO(v_dnode) (fsapi_impl_data(v_dnode, struct ext2_dnode))
+
+
+/**
+ * @brief General purpose iterator for ext2 objects
+ *
+ */
+struct ext2_iterator
+{
+ struct v_inode* inode;
+
+ union {
+ struct ext2b_dirent* dirent;
+ void* data;
+ };
+
+ union {
+ struct {
+ bool has_error:1;
+ };
+ unsigned int flags;
+ };
+
+ size_t pos;
+ unsigned int blksz;
+ size_t end_pos;
+ bbuf_t sel_buf;
+};
+
+struct ext2_file
+{
+ struct ext2_iterator iter;
+ struct ext2_inode* b_ino;
+};
+#define EXT2_FILE(v_file) (fsapi_impl_data(v_file, struct ext2_file))
+
+
+#define MAX_INDS_DEPTH 4
+
+struct walk_stack
+{
+ unsigned int tables[MAX_INDS_DEPTH];
+ unsigned int indices[MAX_INDS_DEPTH];
+};
+
+struct walk_state
+{
+ unsigned int* slot_ref;
+ bbuf_t table;
+ int indirections;
+ int level;
+
+ struct walk_stack stack;
+};
+
+static inline unsigned int
+ext2_datablock(struct v_superblock* vsb, unsigned int id)
+{
+ return EXT2_SB(vsb)->raw->s_first_data_cnt + id;
+}
+
+static inline bool
+ext2_feature(struct v_superblock* vsb, unsigned int feat)
+{
+ return !!(EXT2_SB(vsb)->all_feature & feat);
+}
+
+/* ************ Inodes ************ */
+
+void
+ext2ino_init(struct v_superblock* vsb, struct v_inode* inode);
+
+int
+ext2ino_get(struct v_superblock* vsb,
+ unsigned int ino, struct ext2_inode** out);
+
+int
+ext2ino_get_fast(struct v_superblock* vsb,
+ unsigned int ino, struct ext2_fast_inode* fast_ino);
+
+int
+ext2ino_fill(struct v_inode* inode, ino_t ino_id);
+
+int
+ext2ino_make(struct v_superblock* vsb, unsigned int itype,
+ struct ext2_inode* hint, struct v_inode** out);
+
+void
+ext2ino_update(struct v_inode* inode);
+
+int
+ext2ino_resizing(struct v_inode* inode, size_t new_size);
+
+static inline void
+ext2ino_linkto(struct ext2_inode* e_ino, struct ext2b_dirent* dirent)
+{
+ dirent->inode = e_ino->ino_id;
+ e_ino->ino->i_lnk_cnt++;
+ fsblock_dirty(e_ino->buf);
+}
+
+void
+ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode);
+
+void
+ext2db_itend(struct ext2_iterator* iter);
+
+bool
+ext2db_itnext(struct ext2_iterator* iter);
+
+int
+ext2db_itffw(struct ext2_iterator* iter, int count);
+
+void
+ext2db_itreset(struct ext2_iterator* iter);
+
+
+/**
+ * @brief Get the data block at given data pos associated with the
+ * inode, return NULL if not present
+ *
+ * @param inode
+ * @param data_pos
+ * @return bbuf_t
+ */
+bbuf_t
+ext2db_get(struct v_inode* inode, unsigned int data_pos);
+
+/**
+ * @brief Get the data block at given data pos associated with the
+ * inode, allocate one if not present.
+ *
+ * @param inode
+ * @param data_pos
+ * @param out
+ * @return int
+ */
+int
+ext2db_acquire(struct v_inode* inode, unsigned int data_pos, bbuf_t* out);
+
+void
+ext2db_free_pos(struct v_inode* inode, unsigned int block_pos);
+
+/* ************* Walker ************* */
+
+static inline void
+ext2walk_init_state(struct walk_state* state)
+{
+ *state = (struct walk_state) { };
+}
+
+static inline void
+ext2walk_free_state(struct walk_state* state)
+{
+ fsblock_put(state->table);
+}
+
+/* ************* Iterator ************* */
+
+static inline bool
+ext2_iterror(struct ext2_iterator* iter) {
+ return iter->has_error;
+}
+
+static inline bool
+ext2_itcheckbuf(struct ext2_iterator* iter) {
+ return !(iter->has_error = blkbuf_errbuf(iter->sel_buf));
+}
+
+#define itstate_sel(iter, value) \
+ (ext2_iterror(iter) ? EIO : (int)(value))
+
+
+/* ************ Block Group ************ */
+
+void
+ext2gd_prepare_gdt(struct v_superblock* vsb);
+
+void
+ext2gd_release_gdt(struct v_superblock* vsb);
+
+int
+ext2gd_take(struct v_superblock* vsb,
+ unsigned int index, struct ext2_gdesc** out);
+
+static inline void
+ext2gd_put(struct ext2_gdesc* gd) {
+ bcache_return(gd->cache_ref);
+}
+
+
+/* ************ Directory ************ */
+
+int
+ext2dr_lookup(struct v_inode* this, struct v_dnode* dnode);
+
+int
+ext2dr_read(struct v_file *file, struct dir_context *dctx);
+
+void
+ext2dr_itbegin(struct ext2_iterator* iter, struct v_inode* inode);
+
+void
+ext2dr_itend(struct ext2_iterator* iter);
+
+static inline bool
+ext2dr_itdrain(struct ext2_iterator* iter)
+{
+ return iter->pos > iter->end_pos;
+}
+
+bool
+ext2dr_itnext(struct ext2_iterator* iter);
+
+int
+ext2dr_itffw(struct ext2_iterator* iter, int count);
+
+void
+ext2dr_itreset(struct ext2_iterator* iter);
+
+int
+ext2dr_open(struct v_inode* this, struct v_file* file);
+
+int
+ext2dr_close(struct v_inode* this, struct v_file* file);
+
+int
+ext2dr_seek(struct v_file* file, size_t offset);
+
+int
+ext2dr_insert(struct v_inode* this, struct ext2b_dirent* dirent,
+ struct ext2_dnode** e_dno_out);
+
+int
+ext2dr_remove(struct ext2_dnode* e_dno);
+
+int
+ext2_rmdir(struct v_inode* parent, struct v_dnode* dnode);
+
+int
+ext2_mkdir(struct v_inode* parent, struct v_dnode* dnode);
+
+int
+ext2_rename(struct v_inode* from_inode, struct v_dnode* from_dnode,
+ struct v_dnode* to_dnode);
+
+void
+ext2dr_setup_dirent(struct ext2b_dirent* dirent,
+ struct v_inode* inode, struct hstr* name);
+
+
+/* ************ Files ************ */
+
+int
+ext2_open_inode(struct v_inode* this, struct v_file* file);
+
+int
+ext2_close_inode(struct v_file* file);
+
+int
+ext2_sync_inode(struct v_inode* inode);
+
+int
+ext2_file_sync(struct v_file* file);
+
+int
+ext2_inode_read(struct v_inode *inode, void *buffer, size_t len, size_t fpos);
+
+int
+ext2_inode_read_page(struct v_inode *inode, void *buffer, size_t fpos);
+
+int
+ext2_inode_write(struct v_inode *inode, void *buffer, size_t len, size_t fpos);
+
+int
+ext2_inode_write_page(struct v_inode *inode, void *buffer, size_t fpos);
+
+int
+ext2_seek_inode(struct v_file* file, size_t offset);
+
+int
+ext2_create(struct v_inode* this, struct v_dnode* dnode, unsigned int itype);
+
+int
+ext2_link(struct v_inode* this, struct v_dnode* new_name);
+
+int
+ext2_unlink(struct v_inode* this, struct v_dnode* name);
+
+int
+ext2_get_symlink(struct v_inode *this, const char **path_out);
+
+int
+ext2_set_symlink(struct v_inode *this, const char *target);
+
+/* *********** Bitmap *********** */
+
+void
+ext2bmp_init(struct ext2_bmp* e_bmp, bbuf_t bmp_buf, unsigned int nr_bits);
+
+bool
+ext2bmp_check_free(struct ext2_bmp* e_bmp);
+
+int
+ext2bmp_alloc_one(struct ext2_bmp* e_bmp);
+
+void
+ext2bmp_free_one(struct ext2_bmp* e_bmp, unsigned int pos);
+
+void
+ext2bmp_discard(struct ext2_bmp* e_bmp);
+
+/* *********** Allocations *********** */
+
+#define ALLOC_FAIL -1
+
+static inline bool
+valid_bmp_slot(int slot)
+{
+ return slot != ALLOC_FAIL;
+}
+
+int
+ext2gd_alloc_slot(struct ext2_gdesc* gd, int type_sel);
+
+void
+ext2gd_free_slot(struct ext2_gdesc* gd, int type_sel, int slot);
+
+static inline int
+ext2gd_alloc_inode(struct ext2_gdesc* gd)
+{
+ return ext2gd_alloc_slot(gd, GDESC_INO_SEL);
+}
+
+static inline int
+ext2gd_alloc_block(struct ext2_gdesc* gd)
+{
+ return ext2gd_alloc_slot(gd, GDESC_BLK_SEL);
+}
+
+static inline void
+ext2gd_free_inode(struct ext2_gdesc* gd, int slot)
+{
+ ext2gd_free_slot(gd, GDESC_INO_SEL, slot);
+}
+
+static inline void
+ext2gd_free_block(struct ext2_gdesc* gd, int slot)
+{
+ ext2gd_free_slot(gd, GDESC_BLK_SEL, slot);
+}
+
+
+/**
+ * @brief Allocate a free inode
+ *
+ * @param vsb
+ * @param hint locality hint
+ * @param out
+ * @return int
+ */
+int
+ext2ino_alloc(struct v_superblock* vsb,
+ struct ext2_inode* hint, struct ext2_inode** out);
+
+/**
+ * @brief Allocate a free data block
+ *
+ * @param inode inode where the data block goes, also used as locality hint
+ * @return bbuf_t
+ */
+int
+ext2db_alloc(struct v_inode* inode, bbuf_t* out);
+
+/**
+ * @brief free an inode
+ *
+ * @param vsb
+ * @param hint locality hint
+ * @param out
+ * @return int
+ */
+int
+ext2ino_free(struct v_inode* inode);
+
+/**
+ * @brief Free a data block
+ *
+ * @param inode inode where the data block goes, also used as locality hint
+ * @return bbuf_t
+ */
+int
+ext2db_free(struct v_inode* inode, bbuf_t buf);
+
+int
+ext2ino_alloc_slot(struct v_superblock* vsb, struct ext2_gdesc** gd_out);
+
+int
+ext2db_alloc_slot(struct v_superblock* vsb, struct ext2_gdesc** gd_out);
+
+
+#endif /* __LUNAIX_EXT2_H */
--- /dev/null
+#include <lunaix/mm/valloc.h>
+#include <lunaix/mm/page.h>
+#include "ext2.h"
+
+#define blkpos(e_sb, fpos) ((fpos) / (e_sb)->block_size)
+#define blkoff(e_sb, fpos) ((fpos) % (e_sb)->block_size)
+
+int
+ext2_open_inode(struct v_inode* inode, struct v_file* file)
+{
+ int errno = 0;
+ struct ext2_file* e_file;
+
+ e_file = valloc(sizeof(*e_file));
+ e_file->b_ino = EXT2_INO(inode);
+
+ file->data = e_file;
+
+ if (check_directory_node(inode)) {
+ errno = ext2dr_open(inode, file);
+ goto done;
+ }
+
+ // XXX anything for regular file?
+
+done:
+ if (!errno) {
+ return 0;
+ }
+
+ vfree(e_file);
+ file->data = NULL;
+ return errno;
+}
+
+int
+ext2_close_inode(struct v_file* file)
+{
+ ext2ino_update(file->inode);
+
+ if (check_directory_node(file->inode)) {
+ ext2dr_close(file->inode, file);
+ }
+
+ vfree(file->data);
+ file->data = NULL;
+ return 0;
+}
+
+int
+ext2_sync_inode(struct v_inode* inode)
+{
+ // TODO
+ // a modification to an inode may involves multiple
+ // blkbuf scattering among different groups.
+ // For now, we just sync everything, until we figure out
+ // a way to track each dirtied blkbuf w.r.t inode
+ ext2ino_resizing(inode, inode->fsize);
+ blkbuf_syncall(inode->sb->blks, false);
+
+ return 0;
+}
+
+int
+ext2_file_sync(struct v_file* file)
+{
+ return ext2_sync_inode(file->inode);
+}
+
+int
+ext2_seek_inode(struct v_file* file, size_t offset)
+{
+ if (check_directory_node(file->inode)) {
+ return ext2dr_seek(file, offset);
+ }
+
+ // nothing to do, seek on file pos handled by vfs
+ return 0;
+}
+
+int
+ext2_inode_read(struct v_inode *inode,
+ void *buffer, size_t len, size_t fpos)
+{
+ struct ext2_sbinfo* e_sb;
+ struct ext2_iterator iter;
+ struct ext2b_inode* b_ino;
+ struct ext2_inode* e_ino;
+ unsigned int off;
+ unsigned int end;
+ unsigned int sz = 0, blksz, movsz;
+
+ e_sb = EXT2_SB(inode->sb);
+ e_ino = EXT2_INO(inode);
+ b_ino = e_ino->ino;
+ blksz = e_sb->block_size;
+ end = fpos + len;
+
+ ext2db_itbegin(&iter, inode);
+ ext2db_itffw(&iter, fpos / blksz);
+
+ while (fpos < end && ext2db_itnext(&iter)) {
+ off = fpos % blksz;
+ movsz = MIN(end - fpos, blksz - off);
+
+ memcpy(buffer, offset(iter.data, off), movsz);
+
+ buffer = offset(buffer, movsz);
+ fpos += movsz;
+ sz += movsz;
+ }
+
+ ext2db_itend(&iter);
+ return itstate_sel(&iter, MIN(sz, e_ino->isize));
+}
+
+int
+ext2_inode_read_page(struct v_inode *inode, void *buffer, size_t fpos)
+{
+ struct ext2_sbinfo* e_sb;
+ struct ext2_iterator iter;
+ struct ext2_inode* e_ino;
+ struct ext2b_inode* b_ino;
+ unsigned int blk_start, n,
+ transfer_sz, total_sz = 0;
+
+ assert(!va_offset(fpos));
+
+ e_sb = EXT2_SB(inode->sb);
+ e_ino = EXT2_INO(inode);
+ b_ino = e_ino->ino;
+
+ blk_start = fpos / e_sb->block_size;
+ n = PAGE_SIZE / e_sb->block_size;
+ transfer_sz = MIN(PAGE_SIZE, e_sb->block_size);
+
+ ext2db_itbegin(&iter, inode);
+ ext2db_itffw(&iter, blk_start);
+
+ while (n-- && ext2db_itnext(&iter))
+ {
+ memcpy(buffer, iter.data, transfer_sz);
+ buffer = offset(buffer, transfer_sz);
+ total_sz += transfer_sz;
+ }
+
+ ext2db_itend(&iter);
+ return itstate_sel(&iter, MIN(total_sz, e_ino->isize));
+}
+
+int
+ext2_inode_write(struct v_inode *inode,
+ void *buffer, size_t len, size_t fpos)
+{
+ int errno;
+ unsigned int acc, blk_off, end, size;
+ struct ext2_sbinfo* e_sb;
+ bbuf_t buf;
+
+ e_sb = EXT2_SB(inode->sb);
+
+ acc = 0;
+ end = fpos + len;
+ while (fpos < end) {
+ errno = ext2db_acquire(inode, blkpos(e_sb, fpos), &buf);
+ if (errno) {
+ return errno;
+ }
+
+ blk_off = blkoff(e_sb, fpos);
+ size = e_sb->block_size - blk_off;
+
+ memcpy(offset(blkbuf_data(buf), blk_off), buffer, size);
+ buffer = offset(buffer, size);
+
+ fsblock_dirty(buf);
+ fsblock_put(buf);
+
+ fpos += size;
+ acc += size;
+ }
+
+ return (int)acc;
+}
+
+int
+ext2_inode_write_page(struct v_inode *inode, void *buffer, size_t fpos)
+{
+ return ext2_inode_write(inode, buffer, PAGE_SIZE, fpos);
+}
+
+#define SYMLNK_INPLACE \
+ sizeof(((struct ext2b_inode*)0)->i_block_arr)
+
+static inline int
+__readlink_symlink(struct v_inode *this, char* path)
+{
+ size_t size;
+ char* link = NULL;
+ int errno;
+ bbuf_t buf;
+ struct ext2_inode* e_ino;
+
+ e_ino = EXT2_INO(this);
+ size = e_ino->isize;
+ if (size <= SYMLNK_INPLACE) {
+ link = (char*) e_ino->ino->i_block_arr;
+ strncpy(path, link, size);
+ }
+ else {
+ buf = ext2db_get(this, 0);
+ if (blkbuf_errbuf(buf)) {
+ return EIO;
+ }
+
+ link = blkbuf_data(buf);
+ strncpy(path, link, size);
+
+ fsblock_put(buf);
+ }
+
+ return 0;
+}
+
+int
+ext2_get_symlink(struct v_inode *this, const char **path_out)
+{
+ int errno;
+ size_t size;
+ char* symlink;
+ struct ext2_inode* e_ino;
+
+ e_ino = EXT2_INO(this);
+ size = e_ino->isize;
+
+ if (!size) {
+ return ENOENT;
+ }
+
+ if (!e_ino->symlink) {
+ symlink = valloc(size);
+ if ((errno = __readlink_symlink(this, symlink))) {
+ vfree(symlink);
+ return errno;
+ }
+
+ e_ino->symlink = symlink;
+ }
+
+ *path_out = e_ino->symlink;
+
+ return size;
+}
+
+int
+ext2_set_symlink(struct v_inode *this, const char *target)
+{
+ int errno = 0;
+ bbuf_t buf = NULL;
+ char* link;
+ size_t size, new_len;
+ struct ext2_inode* e_ino;
+
+ e_ino = EXT2_INO(this);
+ size = e_ino->isize;
+ new_len = strlen(target);
+
+ if (new_len > this->sb->blksize) {
+ return ENAMETOOLONG;
+ }
+
+ if (size != new_len) {
+ vfree_safe(e_ino->symlink);
+ e_ino->symlink = valloc(new_len);
+ }
+
+ link = (char*) e_ino->ino->i_block_arr;
+
+ // if new size is shrinked to inplace range
+ if (size > SYMLNK_INPLACE && new_len <= SYMLNK_INPLACE)
+ {
+ ext2db_free_pos(this, 0);
+ }
+
+ // if new size is too big to fit inpalce
+ if (new_len > SYMLNK_INPLACE) {
+
+ // repurpose the i_block array back to normal
+ if (size <= SYMLNK_INPLACE) {
+ memset(link, 0, SYMLNK_INPLACE);
+ }
+
+ errno = ext2db_acquire(this, 0, &buf);
+ if (errno) {
+ goto done;
+ }
+
+ link = blkbuf_data(buf);
+ }
+
+ strncpy(e_ino->symlink, target, new_len);
+ strncpy(link, target, new_len);
+
+ ext2ino_update(this);
+ ext2ino_resizing(this, new_len);
+
+ if (buf) {
+ fsblock_put(buf);
+ }
+
+done:
+ return errno;
+}
\ No newline at end of file
--- /dev/null
+#include <lunaix/mm/valloc.h>
+
+#include "ext2.h"
+
+bcache_zone_t gdesc_bcache_zone;
+
+static void
+__cached_gdesc_evict(struct bcache* bc, void* data)
+{
+ struct ext2_gdesc* gd;
+ gd = (struct ext2_gdesc*)data;
+
+ llist_delete(&gd->groups);
+ llist_delete(&gd->free_grps_blk);
+ llist_delete(&gd->free_grps_ino);
+
+ fsblock_put(gd->buf);
+
+ vfree(gd);
+}
+
+static void
+__cached_gdesc_sync(struct bcache*, unsigned long tag, void* data)
+{
+ // since all mods to gdesc goes directly into fs buffer,
+ // we just need to invoke the sync on the underlying.
+
+ struct ext2_gdesc* gd;
+ gd = (struct ext2_gdesc*)data;
+
+ fsblock_sync(gd->buf);
+}
+
+static struct bcache_ops gdesc_bc_ops = {
+ .release_on_evict = __cached_gdesc_evict,
+ .sync_cached = __cached_gdesc_sync
+};
+
+void
+ext2gd_prepare_gdt(struct v_superblock* vsb)
+{
+ struct ext2b_super* sb;
+ unsigned int nr_parts;
+ unsigned int nr_gd_pb, nr_gd;
+ struct ext2_sbinfo* ext2sb;
+
+ ext2sb = EXT2_SB(vsb);
+ sb = ext2sb->raw;
+
+ nr_gd_pb = ext2sb->block_size / sizeof(struct ext2b_gdesc);
+ nr_gd = ICEIL(sb->s_blk_cnt, sb->s_blk_per_grp);
+ nr_parts = ICEIL(nr_gd, nr_gd_pb);
+
+ ext2sb->gdt_frag = (bbuf_t*)vcalloc(sizeof(bbuf_t), nr_parts);
+ ext2sb->nr_gdesc_pb = nr_gd_pb;
+ ext2sb->nr_gdesc = nr_gd;
+
+ bcache_init_zone(&ext2sb->gd_caches, gdesc_bcache_zone,
+ ilog2(64), 0, sizeof(struct ext2b_gdesc), &gdesc_bc_ops);
+
+ llist_init_head(&ext2sb->gds);
+ llist_init_head(&ext2sb->free_grps_blk);
+ llist_init_head(&ext2sb->free_grps_ino);
+}
+
+void
+ext2gd_release_gdt(struct v_superblock* vsb)
+{
+ unsigned int parts_cnt;
+ struct ext2_sbinfo* ext2sb;
+
+ ext2sb = EXT2_SB(vsb);
+ parts_cnt = ICEIL(ext2sb->nr_gdesc, ext2sb->nr_gdesc_pb);
+ for (size_t i = 0; i < parts_cnt; i++)
+ {
+ if (!ext2sb->gdt_frag[i]) {
+ continue;
+ }
+
+ fsblock_put(ext2sb->gdt_frag[i]);
+ ext2sb->gdt_frag[i] = NULL;
+ }
+}
+
+static inline bool
+__try_load_bitmap(struct v_superblock* vsb,
+ struct ext2_gdesc* gd, int type)
+{
+ struct ext2_sbinfo* ext2sb;
+ struct ext2_bmp* bmp;
+ struct llist_header* flist, *flist_entry;
+ bbuf_t buf;
+ unsigned int blk_id, bmp_blk_id, bmp_size;
+
+ ext2sb = EXT2_SB(vsb);
+
+ if (type == GDESC_INO_SEL) {
+ bmp_blk_id = gd->info->bg_ino_map;
+ bmp_size = ext2sb->raw->s_ino_per_grp;
+ bmp = &gd->ino_bmp;
+ }
+ else if (type == GDESC_BLK_SEL) {
+ bmp_blk_id = gd->info->bg_blk_map;
+ bmp_size = ext2sb->raw->s_blk_per_grp;
+ bmp = &gd->blk_bmp;
+ }
+ else {
+ fail_fs("unknown bitmap type");
+ }
+
+ flist = &ext2sb->free_list_sel[type];
+ flist_entry = &gd->free_list_sel[type];
+
+ blk_id = ext2_datablock(vsb, bmp_blk_id);
+ buf = fsblock_get(vsb, blk_id);
+ if (blkbuf_errbuf(buf)) {
+ return false;
+ }
+
+ ext2bmp_init(bmp, buf, bmp_size);
+
+ if (ext2bmp_check_free(bmp)) {
+ llist_append(flist, flist_entry);
+ }
+
+ return true;
+}
+
+int
+ext2gd_take(struct v_superblock* vsb,
+ unsigned int index, struct ext2_gdesc** out)
+{
+ bbuf_t part, buf;
+ struct ext2_sbinfo* ext2sb;
+ unsigned int blk_id, blk_off;
+
+ ext2sb = EXT2_SB(vsb);
+
+ if (index >= ext2sb->nr_gdesc) {
+ return ENOENT;
+ }
+
+ bcobj_t cached;
+ if (bcache_tryget(&ext2sb->gd_caches, index, &cached)) {
+ *out = (struct ext2_gdesc*)bcached_data(cached);
+ return 0;
+ }
+
+ blk_id = index / ext2sb->nr_gdesc_pb;
+ blk_off = index % ext2sb->nr_gdesc_pb;
+
+ part = ext2sb->gdt_frag[blk_id];
+ if (!part) {
+ blk_id = ext2_datablock(vsb, blk_id + 1);
+ part = fsblock_get(vsb, blk_id);
+ if (!part) {
+ return EIO;
+ }
+
+ ext2sb->gdt_frag[blk_id] = part;
+ }
+
+ struct ext2_gdesc* gd;
+
+ gd = valloc(sizeof(struct ext2_gdesc));
+ *gd = (struct ext2_gdesc) {
+ .info = &block_buffer(part, struct ext2b_gdesc)[blk_off],
+ .buf = part,
+ .base = index * ext2sb->raw->s_blk_per_grp,
+ .ino_base = index * ext2sb->raw->s_ino_per_grp
+ };
+
+ *out = gd;
+
+ if (!ext2sb->read_only) {
+ if (!__try_load_bitmap(vsb, gd, GDESC_INO_SEL)) {
+ goto cleanup;
+ }
+
+ if (!__try_load_bitmap(vsb, gd, GDESC_BLK_SEL)) {
+ llist_delete(&gd->free_grps_ino);
+ goto cleanup;
+ }
+ }
+
+ llist_append(&ext2sb->gds, &gd->groups);
+
+ cached = bcache_put_and_ref(&ext2sb->gd_caches, index, gd);
+ gd->cache_ref = cached;
+ gd->sb = ext2sb;
+
+ return 0;
+
+cleanup:
+ *out = NULL;
+
+ vfree(gd);
+ return EIO;
+}
+
+static void
+__ext2bmp_update_next_free_cell(struct ext2_bmp* e_bmp)
+{
+ unsigned int i;
+ unsigned int end;
+
+ i = valid_bmp_slot(e_bmp->next_free) ? e_bmp->next_free : 0;
+ end = i;
+
+ // next fit, try to maximize our locality without going after
+ // some crazy algorithm
+ do {
+ if (e_bmp->bmp[i] != 0xff) {
+ e_bmp->next_free = i;
+ return;
+ }
+
+ if (++i == e_bmp->nr_bytes) {
+ i = 0;
+ }
+ }
+ while (i != end);
+
+ e_bmp->next_free = ALLOC_FAIL;
+}
+
+void
+ext2bmp_init(struct ext2_bmp* e_bmp, bbuf_t bmp_buf, unsigned int nr_bits)
+{
+ assert(nr_bits % 8 == 0);
+
+ e_bmp->bmp = blkbuf_data(bmp_buf);
+ e_bmp->raw = bmp_buf;
+ e_bmp->nr_bytes = nr_bits / 8;
+
+ __ext2bmp_update_next_free_cell(e_bmp);
+}
+
+bool
+ext2bmp_check_free(struct ext2_bmp* e_bmp)
+{
+ assert(e_bmp->raw);
+
+ return valid_bmp_slot(e_bmp->next_free);
+}
+
+int
+ext2bmp_alloc_one(struct ext2_bmp* e_bmp)
+{
+ assert(e_bmp->raw);
+
+ u8_t cell;
+ int slot, next_free;
+
+ if (!valid_bmp_slot(e_bmp->next_free)) {
+ return ALLOC_FAIL;
+ }
+
+ slot = 0;
+ next_free = e_bmp->next_free;
+ cell = e_bmp->bmp[next_free];
+ assert(cell != 0xff);
+
+ while ((cell & (1 << slot++)));
+
+ cell |= (1 << --slot);
+ slot += (next_free * 8);
+ e_bmp->bmp[next_free] = cell;
+
+ if (cell == 0xff) {
+ __ext2bmp_update_next_free_cell(e_bmp);
+ }
+
+ fsblock_dirty(e_bmp->raw);
+ return slot;
+}
+
+void
+ext2bmp_free_one(struct ext2_bmp* e_bmp, unsigned int pos)
+{
+ assert(e_bmp->raw);
+
+ int cell_idx = pos / 8;
+ u8_t cell_mask = 1 << (pos % 8);
+ e_bmp->bmp[cell_idx] &= ~cell_mask;
+
+ if (!valid_bmp_slot(e_bmp->next_free)) {
+ e_bmp->next_free = cell_idx;
+ }
+
+ fsblock_dirty(e_bmp->raw);
+}
+
+void
+ext2bmp_discard(struct ext2_bmp* e_bmp)
+{
+ assert(e_bmp->raw);
+
+ fsblock_put(e_bmp->raw);
+ e_bmp->raw = NULL;
+}
\ No newline at end of file
--- /dev/null
+#include <lunaix/fs/api.h>
+#include <lunaix/mm/valloc.h>
+
+#include <klibc/string.h>
+
+#include "ext2.h"
+
+static struct v_inode_ops ext2_inode_ops = {
+ .dir_lookup = ext2dr_lookup,
+ .open = ext2_open_inode,
+ .mkdir = ext2_mkdir,
+ .rmdir = ext2_rmdir,
+ .read_symlink = ext2_get_symlink,
+ .set_symlink = ext2_set_symlink,
+ .rename = ext2_rename,
+ .link = ext2_link,
+ .unlink = ext2_unlink,
+ .create = ext2_create,
+ .sync = ext2_sync_inode
+};
+
+static struct v_file_ops ext2_file_ops = {
+ .close = ext2_close_inode,
+
+ .read = ext2_inode_read,
+ .read_page = ext2_inode_read_page,
+
+ .write = ext2_inode_write,
+ .write_page = ext2_inode_write_page,
+
+ .readdir = ext2dr_read,
+ .seek = ext2_seek_inode,
+ .sync = ext2_file_sync
+};
+
+#define to_tag(e_ino, val) \
+ (((val) >> (e_ino)->inds_lgents) | (1 << msbiti))
+#define valid_tag(tag) ((tag) & (1 << msbiti))
+
+static void
+__btlb_insert(struct ext2_inode* e_inode, unsigned int blkid, bbuf_t buf)
+{
+ struct ext2_btlb* btlb;
+ struct ext2_btlb_entry* btlbe = NULL;
+ unsigned int cap_sel;
+
+ if (unlikely(!blkid)) {
+ return;
+ }
+
+ btlb = e_inode->btlb;
+
+ for (int i = 0; i < BTLB_SETS; i++)
+ {
+ if (valid_tag(btlb->buffer[i].tag)) {
+ continue;
+ }
+
+ btlbe = &btlb->buffer[i];
+ goto found;
+ }
+
+ /*
+ we have triggered the capacity miss.
+ since most file operation is heavily linear and strong
+ locality, we place our bet on it and avoid go through
+ the whole overhead of LRU eviction stuff. Just a trival
+ random eviction will do the fine job
+ */
+ cap_sel = hash_32(blkid, ilog2(BTLB_SETS));
+ btlbe = &btlb->buffer[cap_sel];
+
+ fsblock_put(btlbe->block);
+
+found:
+ btlbe->tag = to_tag(e_inode, blkid);
+ btlbe->block = fsblock_take(buf);
+}
+
+static bbuf_t
+__btlb_hit(struct ext2_inode* e_inode, unsigned int blkid)
+{
+ struct ext2_btlb* btlb;
+ struct ext2_btlb_entry* btlbe = NULL;
+ unsigned int in_tag, ref_cnts;
+
+ btlb = e_inode->btlb;
+ in_tag = to_tag(e_inode, blkid);
+
+ for (int i = 0; i < BTLB_SETS; i++)
+ {
+ btlbe = &btlb->buffer[i];
+
+ if (btlbe->tag != in_tag) {
+ continue;
+ }
+
+ ref_cnts = blkbuf_refcounts(btlbe->block);
+ if (!ref_cnts) {
+ btlbe->tag = 0;
+ btlbe->block = bbuf_null;
+ break;
+ }
+
+ return fsblock_take(btlbe->block);
+ }
+
+ return NULL;
+}
+
+static void
+__btlb_flushall(struct ext2_inode* e_inode)
+{
+ struct ext2_btlb* btlb;
+ struct ext2_btlb_entry* btlbe = NULL;
+
+ btlb = e_inode->btlb;
+
+ for (int i = 0; i < BTLB_SETS; i++)
+ {
+ btlbe = &btlb->buffer[i];
+ if (!valid_tag(btlbe->tag)) {
+ continue;
+ }
+
+ btlbe->tag = 0;
+ fsblock_put(btlbe->block);
+ }
+}
+
+void
+ext2db_itbegin(struct ext2_iterator* iter, struct v_inode* inode)
+{
+ struct ext2_inode* e_ino;
+
+ e_ino = EXT2_INO(inode);
+ *iter = (struct ext2_iterator){
+ .pos = 0,
+ .inode = inode,
+ .blksz = inode->sb->blksize,
+ .end_pos = ICEIL(e_ino->isize, inode->sb->blksize)
+ };
+}
+
+void
+ext2db_itreset(struct ext2_iterator* iter)
+{
+ if (likely(iter->sel_buf)) {
+ fsblock_put(iter->sel_buf);
+ iter->sel_buf = NULL;
+ }
+
+ iter->pos = 0;
+}
+
+int
+ext2db_itffw(struct ext2_iterator* iter, int count)
+{
+ iter->pos += count;
+ return count;
+}
+
+void
+ext2db_itend(struct ext2_iterator* iter)
+{
+ if (likely(iter->sel_buf)) {
+ fsblock_put(iter->sel_buf);
+ iter->sel_buf = NULL;
+ }
+}
+
+bool
+ext2db_itnext(struct ext2_iterator* iter)
+{
+ bbuf_t buf;
+
+ if (unlikely(iter->has_error)) {
+ return false;
+ }
+
+ if (unlikely(iter->pos > iter->end_pos)) {
+ return false;
+ }
+
+ if (likely(iter->sel_buf)) {
+ fsblock_put(iter->sel_buf);
+ }
+
+ buf = ext2db_get(iter->inode, iter->pos);
+ iter->sel_buf = buf;
+
+ if (!buf || !ext2_itcheckbuf(iter)) {
+ return false;
+ }
+
+ iter->pos++;
+ iter->data = blkbuf_data(buf);
+
+ return true;
+}
+
+void
+ext2ino_init(struct v_superblock* vsb, struct v_inode* inode)
+{
+ // Placeholder, to make vsb happy
+}
+
+static void
+__destruct_ext2_inode(struct ext2_inode* e_inode)
+{
+ __btlb_flushall(e_inode);
+
+ fsblock_put(e_inode->ind_ord1);
+ fsblock_put(e_inode->buf);
+
+ ext2gd_put(e_inode->blk_grp);
+
+ vfree_safe(e_inode->symlink);
+ vfree(e_inode->btlb);
+ vfree(e_inode);
+}
+
+static void
+ext2_destruct_inode(struct v_inode* inode)
+{
+ struct ext2_inode* e_inode;
+
+ e_inode = EXT2_INO(inode);
+
+ assert(e_inode);
+ __destruct_ext2_inode(e_inode);
+}
+
+static inline void
+__ext2ino_fill_common(struct v_inode* inode, ino_t ino_id)
+{
+ fsapi_inode_setid(inode, ino_id, ino_id);
+ fsapi_inode_setfops(inode, &ext2_file_ops);
+ fsapi_inode_setops(inode, &ext2_inode_ops);
+ fsapi_inode_setdector(inode, ext2_destruct_inode);
+}
+
+
+static unsigned int
+__translate_vfs_itype(unsigned int v_itype)
+{
+ unsigned int e_itype = IMODE_IFREG;
+
+ if (v_itype == VFS_IFFILE) {
+ e_itype = IMODE_IFREG;
+ }
+ else if (check_itype(v_itype, VFS_IFDIR)) {
+ e_itype = IMODE_IFDIR;
+ e_itype |= IMODE_UEX;
+ }
+ else if (check_itype(v_itype, VFS_IFSEQDEV)) {
+ e_itype = IMODE_IFCHR;
+ }
+ else if (check_itype(v_itype, VFS_IFVOLDEV)) {
+ e_itype = IMODE_IFBLK;
+ }
+
+ if (check_itype(v_itype, VFS_IFSYMLINK)) {
+ e_itype |= IMODE_IFLNK;
+ }
+
+ // FIXME we keep this until we have our own user manager
+ e_itype |= (IMODE_URD | IMODE_GRD | IMODE_ORD);
+ return e_itype;
+}
+
+int
+ext2ino_fill(struct v_inode* inode, ino_t ino_id)
+{
+ struct ext2_sbinfo* sb;
+ struct ext2_inode* e_ino;
+ struct v_superblock* vsb;
+ struct ext2b_inode* b_ino;
+ unsigned int type = VFS_IFFILE;
+ int errno = 0;
+
+ vsb = inode->sb;
+ sb = EXT2_SB(vsb);
+
+ if ((errno = ext2ino_get(vsb, ino_id, &e_ino))) {
+ return errno;
+ }
+
+ b_ino = e_ino->ino;
+ ino_id = e_ino->ino_id;
+
+ fsapi_inode_setsize(inode, e_ino->isize);
+
+ fsapi_inode_settime(inode, b_ino->i_ctime,
+ b_ino->i_mtime,
+ b_ino->i_atime);
+
+ __ext2ino_fill_common(inode, ino_id);
+
+ if (check_itype(b_ino->i_mode, IMODE_IFLNK)) {
+ type = VFS_IFSYMLINK;
+ }
+ else if (check_itype(b_ino->i_mode, IMODE_IFDIR)) {
+ type = VFS_IFDIR;
+ }
+ else if (check_itype(b_ino->i_mode, IMODE_IFCHR)) {
+ type = VFS_IFSEQDEV;
+ }
+ else if (check_itype(b_ino->i_mode, IMODE_IFBLK)) {
+ type = VFS_IFVOLDEV;
+ }
+
+ fsapi_inode_settype(inode, type);
+
+ fsapi_inode_complete(inode, e_ino);
+
+ return 0;
+}
+
+static int
+__get_group_desc(struct v_superblock* vsb, int ino,
+ struct ext2_gdesc** gd_out)
+{
+ unsigned int blkgrp_id;
+ struct ext2_sbinfo* sb;
+
+ sb = EXT2_SB(vsb);
+
+ blkgrp_id = to_fsblock_id(ino) / sb->raw->s_ino_per_grp;
+ return ext2gd_take(vsb, blkgrp_id, gd_out);
+}
+
+static struct ext2b_inode*
+__get_raw_inode(struct v_superblock* vsb, struct ext2_gdesc* gd,
+ bbuf_t* buf_out, int ino_index)
+{
+ bbuf_t ino_tab;
+ struct ext2_sbinfo* sb;
+ struct ext2b_inode* b_inode;
+ unsigned int ino_tab_sel, ino_tab_off, tab_partlen;
+
+ assert(buf_out);
+
+ sb = gd->sb;
+ tab_partlen = sb->block_size / sb->raw->s_ino_size;
+ ino_tab_sel = ino_index / tab_partlen;
+ ino_tab_off = ino_index % tab_partlen;
+
+ ino_tab = fsblock_get(vsb, gd->info->bg_ino_tab + ino_tab_sel);
+ if (blkbuf_errbuf(ino_tab)) {
+ return NULL;
+ }
+
+ b_inode = (struct ext2b_inode*)blkbuf_data(ino_tab);
+ b_inode = &b_inode[ino_tab_off];
+
+ *buf_out = ino_tab;
+
+ return b_inode;
+}
+
+static struct ext2_inode*
+__create_inode(struct v_superblock* vsb, struct ext2_gdesc* gd, int ino_index)
+{
+ bbuf_t ino_tab;
+ struct ext2_sbinfo* sb;
+ struct ext2b_inode* b_inode;
+ struct ext2_inode* inode;
+ unsigned int ind_ents;
+ size_t inds_blks;
+
+ sb = gd->sb;
+ b_inode = __get_raw_inode(vsb, gd, &ino_tab, ino_index);
+ if (!b_inode) {
+ return NULL;
+ }
+
+ inode = vzalloc(sizeof(*inode));
+ inode->btlb = vzalloc(sizeof(struct ext2_btlb));
+ inode->buf = ino_tab;
+ inode->ino = b_inode;
+ inode->blk_grp = gd;
+ inode->isize = b_inode->i_size;
+
+ if (ext2_feature(vsb, FEAT_LARGE_FILE)) {
+ inode->isize |= (size_t)((u64_t)(b_inode->i_size_h32) << 32);
+ }
+
+ if (b_inode->i_blocks) {
+ inds_blks = (size_t)b_inode->i_blocks;
+ inds_blks -= ICEIL(inode->isize, 512);
+ inds_blks /= (sb->block_size / 512);
+
+ inode->indirect_blocks = inds_blks;
+ }
+
+ ind_ents = sb->block_size / sizeof(int);
+ assert(is_pot(ind_ents));
+
+ inode->inds_lgents = ilog2(ind_ents);
+ inode->ino_id = gd->ino_base + to_ext2ino_id(ino_index);
+
+ return inode;
+}
+
+int
+ext2ino_get_fast(struct v_superblock* vsb,
+ unsigned int ino, struct ext2_fast_inode* fast_ino)
+{
+ int errno;
+ bbuf_t ino_tab;
+ struct ext2_gdesc* gd;
+ struct ext2_sbinfo* sb;
+ struct ext2b_inode* b_inode;
+ unsigned int ino_rel_id;
+
+ sb = EXT2_SB(vsb);
+ errno = __get_group_desc(vsb, ino, &gd);
+ if (errno) {
+ return errno;
+ }
+
+ ino_rel_id = to_fsblock_id(ino) % sb->raw->s_ino_per_grp;
+ b_inode = __get_raw_inode(vsb, gd, &ino_tab, ino_rel_id);
+
+ fast_ino->buf = ino_tab;
+ fast_ino->ino = b_inode;
+
+ return 0;
+}
+
+int
+ext2ino_get(struct v_superblock* vsb,
+ unsigned int ino, struct ext2_inode** out)
+{
+ struct ext2_sbinfo* sb;
+ struct ext2_inode* inode;
+ struct ext2_gdesc* gd;
+ struct ext2b_inode* b_inode;
+ unsigned int ino_rel_id;
+ unsigned int tab_partlen;
+ unsigned int ind_ents, prima_ind;
+ int errno = 0;
+
+ sb = EXT2_SB(vsb);
+
+ if ((errno = __get_group_desc(vsb, ino, &gd))) {
+ return errno;
+ }
+
+ ino_rel_id = to_fsblock_id(ino) % sb->raw->s_ino_per_grp;
+ inode = __create_inode(vsb, gd, ino_rel_id);
+ if (!inode) {
+ return EIO;
+ }
+
+ b_inode = inode->ino;
+ prima_ind = b_inode->i_block.ind1;
+ *out = inode;
+
+ if (!prima_ind) {
+ return errno;
+ }
+
+ inode->ind_ord1 = fsblock_get(vsb, prima_ind);
+ if (blkbuf_errbuf(inode->ind_ord1)) {
+ vfree(inode->btlb);
+ vfree(inode);
+ *out = NULL;
+ return EIO;
+ }
+
+ return errno;
+}
+
+int
+ext2ino_alloc(struct v_superblock* vsb,
+ struct ext2_inode* hint, struct ext2_inode** out)
+{
+ int free_ino_idx;
+ struct ext2_gdesc* gd;
+ struct ext2_inode* inode;
+
+ free_ino_idx = ALLOC_FAIL;
+ if (hint) {
+ gd = hint->blk_grp;
+ free_ino_idx = ext2gd_alloc_inode(gd);
+ }
+
+ // locality hinted alloc failed, try entire fs
+ if (!valid_bmp_slot(free_ino_idx)) {
+ free_ino_idx = ext2ino_alloc_slot(vsb, &gd);
+ }
+
+ if (!valid_bmp_slot(free_ino_idx)) {
+ return EDQUOT;
+ }
+
+ inode = __create_inode(vsb, gd, free_ino_idx);
+ if (!inode) {
+ // what a shame!
+ ext2gd_free_inode(gd, free_ino_idx);
+ return EIO;
+ }
+
+ memset(inode->ino, 0, sizeof(*inode->ino));
+ fsblock_dirty(inode->buf);
+
+ *out = inode;
+ return 0;
+}
+
+static inline int
+__free_block_at(struct v_superblock *vsb, unsigned int block_pos)
+{
+ int errno, gd_index;
+ struct ext2_gdesc* gd;
+ struct ext2_sbinfo * sb;
+
+ if (!block_pos) {
+ return 0;
+ }
+
+ block_pos = ext2_datablock(vsb, block_pos);
+
+ sb = EXT2_SB(vsb);
+ gd_index = block_pos / sb->raw->s_blk_per_grp;
+
+ if ((errno = ext2gd_take(vsb, gd_index, &gd))) {
+ return errno;
+ }
+
+ assert(block_pos >= gd->base);
+ ext2gd_free_block(gd, block_pos - gd->base);
+
+ ext2gd_put(gd);
+ return 0;
+}
+
+static int
+__free_recurisve_from(struct v_superblock *vsb, struct ext2_inode* inode,
+ struct walk_stack* stack, int depth)
+{
+ bbuf_t tab;
+ int idx, len, errno;
+ u32_t* db_tab;
+
+ int ind_entries = 1 << inode->inds_lgents;
+ int max_len[] = { 15, ind_entries, ind_entries, ind_entries };
+
+ u32_t* tables = stack->tables;
+ u32_t* indices = stack->indices;
+
+ if (depth > MAX_INDS_DEPTH || !tables[depth]) {
+ return 0;
+ }
+
+ idx = indices[depth];
+ len = max_len[depth];
+ tab = fsblock_get(vsb, ext2_datablock(vsb, tables[depth]));
+
+ if (blkbuf_errbuf(tab)) {
+ return EIO;
+ }
+
+ db_tab = blkbuf_data(tab);
+ if (depth == 0) {
+ int offset = offsetof(struct ext2b_inode, i_block_arr);
+ db_tab = offset(db_tab, offset);
+ }
+
+ errno = 0;
+ indices[depth] = 0;
+
+ for (; idx < len; idx++)
+ {
+ u32_t db_id = db_tab[idx];
+
+ if (!db_id) {
+ continue;
+ }
+
+ if (depth >= MAX_INDS_DEPTH) {
+ goto cont;
+ }
+
+ tables[depth] = db_id;
+ errno = __free_recurisve_from(vsb, inode, stack, depth + 1);
+ if (errno) {
+ break;
+ }
+
+cont:
+ __free_block_at(vsb, db_id);
+ db_tab[idx] = 0;
+ }
+
+ fsblock_dirty(tab);
+ fsblock_put(tab);
+ return errno;
+}
+
+int
+ext2ino_free(struct v_inode* inode)
+{
+ int errno = 0;
+ unsigned int ino_slot;
+ struct ext2_inode* e_ino;
+ struct ext2_gdesc* e_gd;
+ struct ext2b_inode* b_ino;
+ struct ext2_sbinfo* sb;
+
+ sb = EXT2_SB(inode->sb);
+ e_ino = EXT2_INO(inode);
+ b_ino = e_ino->ino;
+ e_gd = e_ino->blk_grp;
+
+ assert_fs(b_ino->i_lnk_cnt > 0);
+ fsblock_dirty(e_ino->buf);
+
+ b_ino->i_lnk_cnt--;
+ if (b_ino->i_lnk_cnt >= 1) {
+ return 0;
+ }
+
+ ext2ino_resizing(inode, 0);
+
+ ino_slot = e_ino->ino_id;
+ ino_slot = to_fsblock_id(ino_slot - e_gd->base);
+ ext2gd_free_inode(e_ino->blk_grp, ino_slot);
+
+ __destruct_ext2_inode(e_ino);
+
+ inode->data = NULL;
+
+ return errno;
+}
+
+static void
+__update_inode_access_metadata(struct ext2b_inode* b_ino,
+ struct v_inode* inode)
+{
+ b_ino->i_ctime = inode->ctime;
+ b_ino->i_atime = inode->atime;
+ b_ino->i_mtime = inode->mtime;
+}
+
+static inline void
+__update_inode_size(struct v_inode* inode, size_t size)
+{
+ struct ext2b_inode* b_ino;
+ struct ext2_inode* e_ino;
+
+ e_ino = EXT2_INO(inode);
+ b_ino = e_ino->ino;
+
+ e_ino->isize = size;
+
+ if (ext2_feature(inode->sb, FEAT_LARGE_FILE)) {
+ b_ino->i_size_l32 = (unsigned int)size;
+ b_ino->i_size_h32 = (unsigned int)((u64_t)size >> 32);
+ }
+ else {
+ b_ino->i_size = size;
+ }
+
+ b_ino->i_blocks = ICEIL(size, 512);
+ b_ino->i_blocks += e_ino->indirect_blocks;
+}
+
+int
+ext2ino_make(struct v_superblock* vsb, unsigned int itype,
+ struct ext2_inode* hint, struct v_inode** out)
+{
+ int errno = 0;
+ struct ext2_inode* e_ino;
+ struct ext2b_inode* b_ino;
+ struct v_inode* inode;
+
+ errno = ext2ino_alloc(vsb, hint, &e_ino);
+ if (errno) {
+ return errno;
+ }
+
+ b_ino = e_ino->ino;
+ inode = vfs_i_alloc(vsb);
+
+ __ext2ino_fill_common(inode, e_ino->ino_id);
+
+ __update_inode_access_metadata(b_ino, inode);
+ b_ino->i_mode = __translate_vfs_itype(itype);
+
+ fsapi_inode_settype(inode, itype);
+ fsapi_inode_complete(inode, e_ino);
+
+ *out = inode;
+ return errno;
+}
+
+int
+ext2_create(struct v_inode* this, struct v_dnode* dnode, unsigned int itype)
+{
+ int errno;
+ struct v_inode* created;
+
+ errno = ext2ino_make(this->sb, itype, EXT2_INO(this), &created);
+ if (errno) {
+ return errno;
+ }
+
+ return ext2_link(created, dnode);
+}
+
+int
+ext2_link(struct v_inode* this, struct v_dnode* new_name)
+{
+ int errno = 0;
+ struct v_inode* parent;
+ struct ext2_inode* e_ino;
+ struct ext2_dnode* e_dno;
+ struct ext2b_dirent dirent;
+
+ e_ino = EXT2_INO(this);
+ parent = fsapi_dnode_parent(new_name);
+
+ ext2dr_setup_dirent(&dirent, this, &new_name->name);
+ ext2ino_linkto(e_ino, &dirent);
+
+ errno = ext2dr_insert(parent, &dirent, &e_dno);
+ if (errno) {
+ goto done;
+ }
+
+ new_name->data = e_dno;
+ vfs_assign_inode(new_name, this);
+
+done:
+ return errno;
+}
+
+int
+ext2_unlink(struct v_inode* this, struct v_dnode* name)
+{
+ int errno = 0;
+ struct ext2_inode* e_ino;
+ struct ext2_dnode* e_dno;
+
+ e_ino = EXT2_INO(this);
+ e_dno = EXT2_DNO(name);
+
+ assert_fs(e_dno);
+ assert_fs(e_dno->self.dirent->inode == e_ino->ino_id);
+
+ errno = ext2dr_remove(e_dno);
+ if (errno) {
+ return errno;
+ }
+
+ return ext2ino_free(this);
+}
+
+void
+ext2ino_update(struct v_inode* inode)
+{
+ struct ext2_inode* e_ino;
+
+ e_ino = EXT2_INO(inode);
+ __update_inode_access_metadata(e_ino->ino, inode);
+
+ fsblock_dirty(e_ino->buf);
+}
+
+/* ******************* Data Blocks ******************* */
+
+static inline void
+__walkstate_set_stack(struct walk_state* state, int depth,
+ bbuf_t tab, unsigned int index)
+{
+ state->stack.tables[depth] = fsblock_id(tab);
+ state->stack.indices[depth] = index;
+}
+
+/**
+ * @brief Walk the indrection chain given the position of data block
+ * relative to the inode. Upon completed, walk_state will be
+ * populated with result. On error, walk_state is untouched.
+ *
+ * Note, the result will always be one above the stopping level.
+ * That means, if your pos is pointed directly to file-content block
+ * (i.e., a leaf block), then the state is the indirect block that
+ * containing the ID of that leaf block.
+ *
+ * If `resolve` is set, it will resolve any absence encountered
+ * during the walk by allocating and chaining indirect block.
+ * It require the file system is mounted writable.
+ *
+ * @param inode inode to walk
+ * @param pos flattened data block position to be located
+ * @param state contain the walk result
+ * @param resolve whether to auto allocate the indirection structure during
+ * walk if `pos` is not exist.
+ * @return int
+ */
+static int
+__walk_indirects(struct v_inode* inode, unsigned int pos,
+ struct walk_state* state, bool resolve, bool full_walk)
+{
+ int errno;
+ int inds, stride, shifts, level;
+ unsigned int *slotref, index, next, mask;
+ struct ext2_inode* e_inode;
+ struct ext2b_inode* b_inode;
+ struct v_superblock* vsb;
+ bbuf_t table, next_table;
+
+ e_inode = EXT2_INO(inode);
+ b_inode = e_inode->ino;
+ vsb = inode->sb;
+ level = 0;
+ resolve = resolve && !EXT2_SB(vsb)->read_only;
+
+ if (pos < 12) {
+ index = pos;
+ slotref = &b_inode->i_block_arr[pos];
+ table = fsblock_take(e_inode->buf);
+ inds = 0;
+ goto _return;
+ }
+
+ pos -= 12;
+ stride = e_inode->inds_lgents;
+ if (!(pos >> stride)) {
+ inds = 1;
+ }
+ else if (!(pos >> (stride * 2))) {
+ inds = 2;
+ }
+ else if (!(pos >> (stride * 3))) {
+ inds = 3;
+ }
+ else {
+ fail("unrealistic block pos");
+ }
+
+ // bTLB cache the last level indirect block
+ if (!full_walk && (table = __btlb_hit(e_inode, pos))) {
+ level = inds;
+ index = pos & ((1 << stride) - 1);
+ slotref = &block_buffer(table, u32_t)[index];
+ goto _return;
+ }
+
+ shifts = stride * (inds - 1);
+ mask = ((1 << stride) - 1) << shifts;
+
+ index = 12 + inds - 1;
+ slotref = &b_inode->i_block.inds[inds - 1];
+ table = fsblock_take(e_inode->buf);
+
+ for (; level < inds; level++)
+ {
+ __walkstate_set_stack(state, level, table, index);
+
+ next = *slotref;
+ if (!next) {
+ if (!resolve) {
+ goto _return;
+ }
+
+ if ((errno = ext2db_alloc(inode, &next_table))) {
+ fsblock_put(table);
+ return errno;
+ }
+
+ e_inode->indirect_blocks++;
+ *slotref = fsblock_id(next_table);
+ fsblock_dirty(table);
+ }
+ else {
+ next_table = fsblock_get(vsb, next);
+ }
+
+ fsblock_put(table);
+ table = next_table;
+
+ if (blkbuf_errbuf(table)) {
+ return EIO;
+ }
+
+ assert(shifts >= 0);
+
+ index = (pos & mask) >> shifts;
+
+ slotref = &block_buffer(table, u32_t)[index];
+
+ shifts -= stride;
+ mask = mask >> stride;
+ }
+
+ __btlb_insert(e_inode, pos, table);
+
+_return:
+ assert(blkbuf_refcounts(table) >= 1);
+ assert_fs(table);
+ assert_fs(slotref);
+
+ state->slot_ref = slotref;
+ state->table = table;
+ state->level = level;
+ state->indirections = inds;
+
+ __walkstate_set_stack(state, level, table, index);
+
+ return 0;
+}
+
+bbuf_t
+ext2db_get(struct v_inode* inode, unsigned int data_pos)
+{
+ int errno;
+ unsigned int blkid;
+ struct walk_state state;
+
+ ext2walk_init_state(&state);
+
+ errno = __walk_indirects(inode, data_pos, &state, false, false);
+ if (errno) {
+ return (bbuf_t)INVL_BUFFER;
+ }
+
+ blkid = *state.slot_ref;
+
+ ext2walk_free_state(&state);
+
+ if (!blkid) {
+ return NULL;
+ }
+
+ return fsblock_get(inode->sb, blkid);
+}
+
+int
+ext2db_acquire(struct v_inode* inode, unsigned int data_pos, bbuf_t* out)
+{
+ int errno = 0;
+ bbuf_t buf;
+ unsigned int block_id;
+ struct walk_state state;
+
+ ext2walk_init_state(&state);
+
+ errno = __walk_indirects(inode, data_pos, &state, true, false);
+ if (errno) {
+ return errno;
+ }
+
+ block_id = *state.slot_ref;
+ if (block_id) {
+ buf = fsblock_get(inode->sb, block_id);
+ goto done;
+ }
+
+ errno = ext2db_alloc(inode, &buf);
+ if (errno) {
+ ext2walk_free_state(&state);
+ return errno;
+ }
+
+ *state.slot_ref = fsblock_id(buf);
+ fsblock_dirty(state.table);
+
+done:
+ ext2walk_free_state(&state);
+
+ if (blkbuf_errbuf(buf)) {
+ return EIO;
+ }
+
+ *out = buf;
+ return 0;
+}
+
+int
+ext2db_alloc(struct v_inode* inode, bbuf_t* out)
+{
+ int free_ino_idx;
+ struct ext2_gdesc* gd;
+ struct ext2_inode* e_inode;
+ struct v_superblock* vsb;
+
+ free_ino_idx = ALLOC_FAIL;
+ e_inode = EXT2_INO(inode);
+ vsb = inode->sb;
+
+ gd = e_inode->blk_grp;
+ free_ino_idx = ext2gd_alloc_block(gd);
+
+ // locality alloc failed, try entire fs
+ if (!valid_bmp_slot(free_ino_idx)) {
+ free_ino_idx = ext2db_alloc_slot(vsb, &gd);
+ }
+
+ if (!valid_bmp_slot(free_ino_idx)) {
+ return EDQUOT;
+ }
+
+ free_ino_idx += gd->base;
+ free_ino_idx = ext2_datablock(vsb, free_ino_idx);
+ free_ino_idx = to_ext2ino_id(free_ino_idx);
+
+ bbuf_t buf = fsblock_get(vsb, free_ino_idx);
+ if (blkbuf_errbuf(buf)) {
+ return EIO;
+ }
+
+ *out = buf;
+ return 0;
+}
+
+void
+ext2db_free_pos(struct v_inode* inode, unsigned int block_pos)
+{
+ struct ext2_inode* e_inode;
+ struct ext2_gdesc* gd;
+
+ e_inode = EXT2_INO(inode);
+ gd = e_inode->blk_grp;
+
+ assert(block_pos >= gd->base);
+
+ block_pos -= gd->base;
+
+ ext2gd_free_block(gd, block_pos);
+}
+
+int
+ext2db_free(struct v_inode* inode, bbuf_t buf)
+{
+ assert(blkbuf_not_shared(buf));
+
+ ext2db_free_pos(inode, blkbuf_id(buf));
+ fsblock_put(buf);
+
+ return 0;
+}
+
+int
+ext2ino_resizing(struct v_inode* inode, size_t new_size)
+{
+ int errno;
+ unsigned int pos;
+ size_t oldsize;
+ struct walk_state state;
+ struct ext2_inode* e_ino;
+ struct ext2b_inode* b_ino;
+
+ e_ino = EXT2_INO(inode);
+ b_ino = e_ino->ino;
+ oldsize = e_ino->isize;
+
+ if (oldsize == new_size) {
+ return 0;
+ }
+
+ __update_inode_size(inode, new_size);
+ fsblock_dirty(e_ino->buf);
+
+ if (check_symlink_node(inode)) {
+ return 0;
+ }
+
+ if (oldsize < new_size) {
+ return 0;
+ }
+
+ ext2walk_init_state(&state);
+
+ pos = new_size / fsapi_block_size(inode->sb);
+ errno = __walk_indirects(inode, pos, &state, false, true);
+ if (errno) {
+ return errno;
+ }
+
+ errno = __free_recurisve_from(inode->sb, e_ino, &state.stack, 0);
+
+ ext2walk_free_state(&state);
+ return errno;
+}
\ No newline at end of file
--- /dev/null
+#include <lunaix/fs/api.h>
+#include <lunaix/block.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/mm/page.h>
+#include <lunaix/syslog.h>
+
+#include "ext2.h"
+
+LOG_MODULE("EXT2")
+
+#define EXT2_COMPRESSION 0x0001
+#define EXT2_FILETYPE 0x0002
+#define EXT2_JOURNAL 0x0004
+#define EXT2_METABG 0x0008
+
+#define EXT2_SPARSESB 0x0001
+#define EXT2_LARGEFLE 0x0002
+#define EXT2_BTREEDIR 0x0004
+
+#define EXT2_SUPER_MAGIC 0xef53
+#define EXT2_BASE_BLKSZ 1024
+#define EXT2_PRIME_SB_OFF EXT2_BASE_BLKSZ
+
+// current support for incompatible features
+#define EXT2_IMPL_REQFEAT (EXT2_FILETYPE)
+
+// current support for readonly feature
+#define EXT2_IMPL_ROFEAT (EXT2_SPARSESB)
+
+#define EXT2_ROOT_INO to_ext2ino_id(1)
+
+#define check_compat_mnt(feat) \
+ (!((feat) & ~EXT2_IMPL_REQFEAT))
+
+#define check_compat_mnt_ro_fallback(feat) \
+ (((feat) & ~EXT2_IMPL_ROFEAT))
+
+static size_t
+ext2_rd_capacity(struct v_superblock* vsb)
+{
+ struct ext2_sbinfo* sb = fsapi_impl_data(vsb, struct ext2_sbinfo);
+ return sb->raw->s_blk_cnt * fsapi_block_size(vsb);
+}
+
+static void
+__vsb_release(struct v_superblock* vsb)
+{
+ ext2gd_release_gdt(vsb);
+ vfree(vsb->data);
+}
+
+static size_t
+ext2_rd_usage(struct v_superblock* vsb)
+{
+ struct ext2_sbinfo* sb = fsapi_impl_data(vsb, struct ext2_sbinfo);
+ size_t used = sb->raw->s_free_blk_cnt - sb->raw->s_blk_cnt;
+ return used * fsapi_block_size(vsb);
+}
+
+struct fsapi_vsb_ops vsb_ops = {
+ .read_capacity = ext2_rd_capacity,
+ .read_usage = ext2_rd_usage,
+ .init_inode = ext2ino_init,
+ .release = __vsb_release
+};
+
+static inline unsigned int
+__translate_feature(struct ext2b_super* sb)
+{
+ unsigned int feature = 0;
+ unsigned int req, opt, ro;
+
+ req = sb->s_required_feat;
+ opt = sb->s_optional_feat;
+ ro = sb->s_ro_feat;
+
+ if ((req & EXT2_COMPRESSION)) {
+ feature |= FEAT_COMPRESSION;
+ }
+
+ if ((req & EXT2_FILETYPE)) {
+ feature |= FEAT_FILETYPE;
+ }
+
+ if ((ro & EXT2_SPARSESB)) {
+ feature |= FEAT_SPARSE_SB;
+ }
+
+ if ((ro & EXT2_LARGEFLE)) {
+ feature |= FEAT_LARGE_FILE;
+ }
+
+ return feature;
+}
+
+static bool
+__check_mount(struct v_superblock* vsb, struct ext2b_super* sb)
+{
+ unsigned int req, opt, ro;
+
+ req = sb->s_required_feat;
+ opt = sb->s_optional_feat;
+ ro = sb->s_ro_feat;
+
+ if (sb->s_magic != EXT2_SUPER_MAGIC) {
+ ERROR("invalid magic: 0x%x", sb->s_magic);
+ return false;
+ }
+
+ if (!check_compat_mnt(req))
+ {
+ ERROR("unsupported feature: 0x%x, mount refused", req);
+ return false;
+ }
+
+ if (check_compat_mnt_ro_fallback(ro))
+ {
+ WARN("unsupported feature: 0x%x, mounted as readonly", ro);
+ fsapi_set_readonly_mount(vsb);
+ }
+
+#ifndef CONFIG_ARCH_BITS_64
+ if ((ro & EXT2_LARGEFLE)) {
+ WARN("large file not supported on 32bits machine");
+ fsapi_set_readonly_mount(vsb);
+ }
+#endif
+
+ return true;
+}
+
+static int
+ext2_mount(struct v_superblock* vsb, struct v_dnode* mnt)
+{
+ struct device* bdev;
+ struct ext2_sbinfo* ext2sb;
+ struct ext2b_super* rawsb;
+ struct v_inode* root_inode;
+ bbuf_t buf;
+ size_t block_size;
+ int errno = 0;
+ unsigned int req_feat;
+
+ bdev = fsapi_blockdev(vsb);
+ ext2sb = vzalloc(sizeof(*ext2sb));
+ rawsb = vzalloc(sizeof(*rawsb));
+
+ errno = bdev->ops.read(bdev, rawsb, EXT2_PRIME_SB_OFF, sizeof(*rawsb));
+ if (errno < 0) {
+ goto failed;
+ }
+
+ block_size = EXT2_BASE_BLKSZ << rawsb->s_log_blk_size;
+ fsapi_begin_vsb_setup(vsb, block_size);
+
+ if (!__check_mount(vsb, rawsb)) {
+ goto unsupported;
+ }
+
+ if (block_size > PAGE_SIZE) {
+ ERROR("block size must not greater than page size");
+ errno = EINVAL;
+ goto failed;
+ }
+
+ ext2sb->bdev = bdev;
+ ext2sb->block_size = block_size;
+ ext2sb->vsb = vsb;
+ ext2sb->read_only = fsapi_readonly_mount(vsb);
+ ext2sb->raw = rawsb;
+ ext2sb->all_feature = __translate_feature(rawsb);
+
+ fsapi_set_vsb_ops(vsb, &vsb_ops);
+ fsapi_complete_vsb_setup(vsb, ext2sb);
+
+ ext2gd_prepare_gdt(vsb);
+
+ root_inode = vfs_i_alloc(vsb);
+ ext2ino_fill(root_inode, EXT2_ROOT_INO);
+ vfs_assign_inode(mnt, root_inode);
+
+ // replace the superblock raw buffer with bcache managed one
+ buf = fsblock_get(vsb, ext2_datablock(vsb, 0));
+ if (block_size == EXT2_BASE_BLKSZ) {
+ ext2sb->raw = blkbuf_data(buf);
+ }
+ else {
+ ext2sb->raw = offset(blkbuf_data(buf), EXT2_BASE_BLKSZ);
+ }
+
+ ext2sb->buf = buf;
+ vfree(rawsb);
+ return 0;
+
+unsupported:
+ errno = ENOTSUP;
+
+failed:
+ vfree(ext2sb);
+ vfree(rawsb);
+ fsapi_reset_vsb(vsb);
+ return errno;
+}
+
+static int
+ext2_umount(struct v_superblock* vsb)
+{
+ // sync all dirty buffers
+ if (!blkbuf_syncall(vsb->blks, false)) {
+ return EAGAIN;
+ }
+
+ ext2gd_release_gdt(vsb);
+
+ blkbuf_release(vsb->blks);
+ return 0;
+}
+
+static void
+ext2_init()
+{
+ struct filesystem* fs;
+ fs = fsapi_fs_declare("ext2", 0);
+
+ fsapi_fs_set_mntops(fs, ext2_mount, ext2_umount);
+ fsapi_fs_finalise(fs);
+
+ gdesc_bcache_zone = bcache_create_zone("ext2_gdesc");
+}
+EXPORT_FILE_SYSTEM(ext2fs, ext2_init);
\ No newline at end of file
#include <lunaix/fs.h>
-#include <lunaix/fs/iso9660.h>
+#include "iso9660.h"
#include <lunaix/mm/cake.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
}
done:
- if (!cache->name.len) {
- // Load ISO9660 file id if no NM found.
- u32_t l = drec->name.len;
- while (l < (u32_t)-1 && drec->name.content[l--] != ';')
- ;
+ if (cache->name.len) {
+ return;
+ }
- l = (l + 1) ? l : drec->name.len;
- l = MIN(l, ISO9660_IDLEN - 1);
+ // Load ISO9660 file id if no NM found.
- strncpy(cache->name_val, (const char*)drec->name.content, l);
+ ;
+ char name_val = drec->name.content[0];
+ u32_t l = drec->name.len;
- cache->name = HSTR(cache->name_val, l);
- hstr_rehash(&cache->name, HSTR_FULL_HASH);
+ if (l == 1 && !name_val) {
+ cache->name = vfs_dot;
+ return;
+ }
+
+ if(l == 1 && name_val == 1) {
+ cache->name = vfs_ddot;
+ return;
}
+
+ while (l < (u32_t)-1 && drec->name.content[l--] != ';')
+ ;
+
+ l = (l + 1) ? l : drec->name.len;
+ l = MIN(l, ISO9660_IDLEN - 1);
+
+ strncpy(cache->name_val, (const char*)drec->name.content, l);
+ cache->name = HSTR(cache->name_val, l);
+ hstr_rehash(&cache->name, HSTR_FULL_HASH);
}
int
iso9660_setup_dnode(struct v_dnode* dnode, struct v_inode* inode)
{
- if ((inode->itype & F_FILE)) {
+ if (!check_directory_node(inode)) {
vfs_assign_inode(dnode, inode);
return 0;
}
break;
}
- // ignore the '.', '..' as we have built-in support
- if (drec->name.len == 1) {
- goto cont;
- }
-
struct iso_drecache* cache = cake_grab(drec_cache_pile);
iso9660_fill_drecache(cache, drec, mdu->len);
llist_append(&isoino->drecaches, &cache->caches);
- cont:
+
blk_offset += mdu->len;
} while (current_pos + blk_offset < max_pos);
int
iso9660_readdir(struct v_file* file, struct dir_context* dctx)
-{
+{
struct llist_header* lead = file->dnode->data;
struct iso_drecache *pos, *n;
- u32_t counter = dctx->index - 1;
+ u32_t counter = 0;
llist_for_each(pos, n, lead, caches)
{
- if (counter == (u32_t)-1 && !(pos->flags & ISO_FHIDDEN)) {
+ if (counter == file->f_pos && !(pos->flags & ISO_FHIDDEN)) {
dctx->read_complete_callback(
- dctx, pos->name_val, pos->name.len, __get_dtype(pos));
+ dctx, HSTR_VAL(pos->name), HSTR_LEN(pos->name), __get_dtype(pos));
return 1;
}
- counter--;
+ counter++;
}
+
return 0;
}
\ No newline at end of file
#include <lunaix/fs.h>
-#include <lunaix/fs/iso9660.h>
+#include "iso9660.h"
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
}
int
-iso9660_seek(struct v_inode* inode, size_t offset)
+iso9660_seek(struct v_file* file, size_t offset)
{
- // TODO
+ file->f_pos = offset;
return 0;
}
\ No newline at end of file
#include <klibc/string.h>
#include <lunaix/fs.h>
-#include <lunaix/fs/iso9660.h>
+#include "iso9660.h"
#include <lunaix/mm/cake.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
iso9660_write_page(struct v_inode* inode, void* buffer, size_t fpos);
int
-iso9660_seek(struct v_inode* inode, size_t offset);
+iso9660_seek(struct v_file* file, size_t offset);
int
isorr_parse_px(struct iso_drecache* cache, void* px_start);
#include <lunaix/block.h>
-#include <lunaix/fs.h>
-#include <lunaix/fs/iso9660.h>
+#include <lunaix/fs/api.h>
#include <lunaix/mm/valloc.h>
+#include <lunaix/mm/cake.h>
#include <lunaix/spike.h>
-#include <lunaix/mm/cake.h>
+#include "iso9660.h"
struct cake_pile* drec_cache_pile;
extern void
iso9660_init_inode(struct v_superblock* vsb, struct v_inode* inode);
-u32_t
-iso9660_rd_capacity(struct v_superblock* vsb)
+static size_t
+__iso9660_rd_capacity(struct v_superblock* vsb)
{
struct iso_superblock* isovsb = (struct iso_superblock*)vsb->data;
return isovsb->volume_size;
}
+static void
+__vsb_release(struct v_superblock* vsb)
+{
+ vfree(vsb->data);
+}
+
int
iso9660_mount(struct v_superblock* vsb, struct v_dnode* mount_point)
{
vsb->data = isovsb;
vsb->ops.init_inode = iso9660_init_inode;
- vsb->ops.read_capacity = iso9660_rd_capacity;
+ vsb->ops.read_capacity = __iso9660_rd_capacity;
+ vsb->ops.release = __vsb_release;
vsb->blksize = ISO9660_BLKSZ;
struct v_inode* rootino = vfs_i_alloc(vsb);
return errno;
}
+
+
int
iso9660_unmount(struct v_superblock* vsb)
{
- vfree(vsb->data);
-
return 0;
}
void
iso9660_init()
{
+ struct filesystem* fs;
+ fs = fsapi_fs_declare("iso9660", FSTYPE_ROFS);
+
+ fsapi_fs_set_mntops(fs, iso9660_mount, iso9660_unmount);
+ fsapi_fs_finalise(fs);
+
drec_cache_pile =
cake_new_pile("iso_drec", sizeof(struct iso_drecache), 1, 0);
-
- struct filesystem* fs = fsm_new_fs("iso9660", -1);
- fs->types |= FSTYPE_ROFS;
- fs->mount = iso9660_mount;
- fs->unmount = iso9660_unmount;
-
- fsm_register(fs);
}
EXPORT_FILE_SYSTEM(iso9660, iso9660_init);
\ No newline at end of file
#include <klibc/string.h>
-#include <lunaix/fs/iso9660.h>
+#include "iso9660.h"
int
isorr_parse_px(struct iso_drecache* cache, void* px_start)
-#include <lunaix/fs/iso9660.h>
+#include "iso9660.h"
struct iso_drecord*
iso9660_get_drecord(struct iso_var_mdu* drecord_mdu)
#include <lunaix/foptions.h>
-#include <lunaix/fs.h>
+#include <lunaix/fs/api.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/process.h>
#include <lunaix/spike.h>
mnt->parent = parent;
mnt->mnt_point = mnt_point;
- mnt->super_block = mnt_point->super_block;
+ vfs_vmnt_assign_sb(mnt, mnt_point->super_block);
if (parent) {
mnt_mkbusy(parent);
return mnt;
}
+void
+__vfs_release_vmnt(struct v_mount* mnt)
+{
+ assert(llist_empty(&mnt->submnts));
+
+ if (mnt->parent) {
+ mnt_chillax(mnt->parent);
+ }
+
+ llist_delete(&mnt->sibmnts);
+ llist_delete(&mnt->list);
+ atomic_fetch_sub(&mnt->mnt_point->ref_count, 1);
+ vfree(mnt);
+}
+
int
__vfs_do_unmount(struct v_mount* mnt)
{
return errno;
}
- llist_delete(&mnt->list);
- llist_delete(&mnt->sibmnts);
-
// detached the inodes from cache, and let lru policy to recycle them
for (size_t i = 0; i < VFS_HASHTABLE_SIZE; i++) {
struct hbucket* bucket = &sb->i_cache[i];
bucket->head->pprev = 0;
}
- mnt_chillax(mnt->parent);
-
mnt->mnt_point->mnt = mnt->parent;
vfs_sb_free(sb);
- atomic_fetch_sub(&mnt->mnt_point->ref_count, 1);
- vfree(mnt);
+ __vfs_release_vmnt(mnt);
return errno;
}
return ENOTBLK;
}
- if (mnt_point->inode && (mnt_point->inode->itype & F_MFILE)) {
+ if (mnt_point->inode && !check_directory_node(mnt_point->inode)) {
return ENOTDIR;
}
return ENODEV;
}
- if (fs->types == FSTYPE_ROFS) {
+ if ((fs->types & FSTYPE_ROFS)) {
options |= MNT_RO;
}
+ if (!(fs->types & FSTYPE_PSEUDO) && !device) {
+ return ENODEV;
+ }
+
+ int errno = 0;
char* dev_name = "sys";
struct v_mount* parent_mnt = mnt_point->mnt;
- struct v_superblock *sb = vfs_sb_alloc(), *old_sb = mnt_point->super_block;
- sb->dev = device;
- mnt_point->super_block = sb;
+ struct v_superblock *sb = vfs_sb_alloc(),
+ *old_sb = mnt_point->super_block;
if (device) {
dev_name = device->name_val;
}
- int errno = 0;
- if (!(errno = fs->mount(sb, mnt_point))) {
- sb->fs = fs;
- sb->root = mnt_point;
+ // prepare v_superblock for fs::mount invoke
+ sb->dev = device;
+ sb->fs = fs;
+ sb->root = mnt_point;
+ vfs_d_assign_sb(mnt_point, sb);
- if (!(mnt_point->mnt = vfs_create_mount(parent_mnt, mnt_point))) {
- errno = ENOMEM;
- goto cleanup;
- }
+ if (!(mnt_point->mnt = vfs_create_mount(parent_mnt, mnt_point))) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ mnt_point->mnt->flags = options;
+ if (!(errno = fs->mount(sb, mnt_point))) {
kprintf("mount: dev=%s, fs=%s, mode=%d", dev_name, fs_name, options);
-
- mnt_point->mnt->flags = options;
} else {
goto cleanup;
}
+ vfs_sb_free(old_sb);
return errno;
cleanup:
- ERROR("mount: dev=%s, fs=%s, mode=%d, err=%d",
+ ERROR("failed mount: dev=%s, fs=%s, mode=%d, err=%d",
dev_name,
fs_name,
options,
errno);
- mnt_point->super_block = old_sb;
+ vfs_d_assign_sb(mnt_point, old_sb);
vfs_sb_free(sb);
+ __vfs_release_vmnt(mnt_point->mnt);
+
return errno;
}
struct device* device = NULL;
if (dev) {
- if (!(dev->inode->itype & VFS_IFVOLDEV)) {
+ if (!check_voldev_node(dev->inode)) {
errno = ENOTDEV;
goto done;
}
#include <klibc/string.h>
#define VFS_SYMLINK_DEPTH 16
+#define VFS_SYMLINK_MAXLEN 512
extern struct lru_zone *dnode_lru, *inode_lru;
current_level = dnode;
current_inode = current_level->inode;
- if ((current_inode->itype & F_MSLNK) &&
+ assert(current_inode);
+
+ if (check_symlink_node(current_inode) &&
!(walk_options & VFS_WALK_NOFOLLOW)) {
const char* link;
+ struct v_inode_ops* iops;
- if (!current_inode->ops->read_symlink) {
+ iops = current_inode->ops;
+
+ if (!iops->read_symlink) {
errno = ENOTSUP;
goto error;
}
lock_inode(current_inode);
- if ((errno =
- current_inode->ops->read_symlink(current_inode, &link))) {
+
+ errno = iops->read_symlink(current_inode, &link);
+ if ((errno < 0)) {
unlock_inode(current_inode);
goto error;
}
+
unlock_inode(current_inode);
errno = __vfs_walk(current_level->parent,
#include <lunaix/mm/page.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
+#include <lunaix/bcache.h>
+#include <lunaix/syslog.h>
-#define PCACHE_DIRTY 0x1
+LOG_MODULE("pcache")
-static struct lru_zone* pcache_zone;
+#define pcache_obj(bcache) container_of(bcache, struct pcache, cache)
-static int
-__pcache_try_evict(struct lru_node* obj)
+void pcache_release_page(struct pcache* pcache, struct pcache_pg* page);
+void pcache_set_dirty(struct pcache* pcache, struct pcache_pg* pg);
+
+static bcache_zone_t pagecached_zone = NULL;
+
+static void
+__pcache_sync(struct bcache* bc, unsigned long tag, void* data)
{
- struct pcache_pg* page = container_of(obj, struct pcache_pg, lru);
- pcache_invalidate(page->holder, page);
- return 1;
+ struct pcache* cache;
+
+ cache = pcache_obj(bc);
+ pcache_commit(cache->master, (struct pcache_pg*)data);
}
static void
-pcache_free_page(void* va)
+__pcache_try_release(struct bcache* bc, void* data)
{
- pte_t* ptep = mkptep_va(VMS_SELF, (ptr_t)va);
- pte_t pte = pte_at(ptep);
- leaflet_return(pte_leaflet(pte));
+ struct pcache_pg* page;
+
+ page = (struct pcache_pg*)data;
+ pcache_release_page(pcache_obj(bc), page);
}
+static struct bcache_ops cache_ops = {
+ .release_on_evict = __pcache_try_release,
+ .sync_cached = __pcache_sync
+};
+
static void*
pcache_alloc_page()
{
return (void*)va;
}
+static void
+pcache_free_page(void* va)
+{
+ pte_t* ptep = mkptep_va(VMS_SELF, (ptr_t)va);
+ pte_t pte = pte_at(ptep);
+ leaflet_return(pte_leaflet(pte));
+}
+
void
pcache_init(struct pcache* pcache)
{
- btrie_init(&pcache->tree, PAGE_SHIFT);
+ if (unlikely(!pagecached_zone)) {
+ pagecached_zone = bcache_create_zone("pcache");
+ }
+
llist_init_head(&pcache->dirty);
- llist_init_head(&pcache->pages);
- pcache_zone = lru_new_zone(__pcache_try_evict);
+ bcache_init_zone(&pcache->cache, pagecached_zone, 4, -1,
+ sizeof(struct pcache_pg), &cache_ops);
}
void
pcache_release_page(struct pcache* pcache, struct pcache_pg* page)
{
- pcache_free_page(page->pg);
-
- llist_delete(&page->pg_list);
-
- btrie_remove(&pcache->tree, page->fpos);
+ pcache_free_page(page->data);
vfree(page);
}
struct pcache_pg*
-pcache_new_page(struct pcache* pcache, u32_t index)
+pcache_new_page(struct pcache* pcache)
{
- struct pcache_pg* ppg = vzalloc(sizeof(struct pcache_pg));
- void* pg = pcache_alloc_page();
-
- if (!ppg || !pg) {
- lru_evict_one(pcache_zone);
- if (!ppg && !(ppg = vzalloc(sizeof(struct pcache_pg)))) {
- return NULL;
- }
-
- if (!pg && !(pg = pcache_alloc_page())) {
- return NULL;
- }
+ struct pcache_pg* ppg;
+ void* data_page;
+
+ data_page = pcache_alloc_page();
+ if (!data_page) {
+ return NULL;
}
- ppg->pg = pg;
- ppg->holder = pcache;
-
- llist_append(&pcache->pages, &ppg->pg_list);
- btrie_set(&pcache->tree, index, ppg);
+ ppg = vzalloc(sizeof(struct pcache_pg));
+ ppg->data = data_page;
return ppg;
}
void
pcache_set_dirty(struct pcache* pcache, struct pcache_pg* pg)
{
- if (!(pg->flags & PCACHE_DIRTY)) {
- pg->flags |= PCACHE_DIRTY;
- pcache->n_dirty++;
- llist_append(&pcache->dirty, &pg->dirty_list);
+ if (pg->dirty) {
+ return;
}
+
+ pg->dirty = true;
+ pcache->n_dirty++;
+ llist_append(&pcache->dirty, &pg->dirty_list);
}
-int
-pcache_get_page(struct pcache* pcache,
- u32_t index,
- u32_t* offset,
- struct pcache_pg** page)
+static bcobj_t
+__getpage_and_lock(struct pcache* pcache, unsigned int tag,
+ struct pcache_pg** page)
{
- struct pcache_pg* pg = btrie_get(&pcache->tree, index);
- int is_new = 0;
- u32_t mask = ((1 << pcache->tree.truncated) - 1);
- *offset = index & mask;
- if (!pg && (pg = pcache_new_page(pcache, index))) {
- pg->fpos = index & ~mask;
- pcache->n_pages++;
- is_new = 1;
+ bcobj_t cobj;
+ struct pcache_pg* pg;
+
+ if (bcache_tryget(&pcache->cache, tag, &cobj))
+ {
+ *page = (struct pcache_pg*)bcached_data(cobj);
+ return cobj;
+ }
+
+ pg = pcache_new_page(pcache);
+ if (pg) {
+ pg->index = tag;
}
- if (pg)
- lru_use_one(pcache_zone, &pg->lru);
+
*page = pg;
- return is_new;
+
+ return NULL;
+}
+
+static inline int
+__fill_page(struct v_inode* inode, struct pcache_pg* pg, unsigned int index)
+{
+ return inode->default_fops->read_page(inode, pg->data, page_addr(index));
}
int
pcache_write(struct v_inode* inode, void* data, u32_t len, u32_t fpos)
{
int errno = 0;
- u32_t pg_off, buf_off = 0;
- struct pcache* pcache = inode->pg_cache;
+ unsigned int tag, off, wr_cnt;
+ unsigned int end = fpos + len;
+ struct pcache* pcache;
struct pcache_pg* pg;
+ bcobj_t obj;
- while (buf_off < len && errno >= 0) {
- u32_t wr_bytes = MIN(PAGE_SIZE - pg_off, len - buf_off);
+ pcache = inode->pg_cache;
+
+ while (fpos < end && errno >= 0) {
+ tag = pfn(fpos);
+ off = va_offset(fpos);
+ wr_cnt = MIN(end - fpos, PAGE_SIZE - off);
- int new_page = pcache_get_page(pcache, fpos, &pg_off, &pg);
+ obj = __getpage_and_lock(pcache, tag, &pg);
- if (new_page) {
- // Filling up the page
- errno = inode->default_fops->read_page(inode, pg->pg, pg->fpos);
+ if (!obj && !pg) {
+ errno = inode->default_fops->write(inode, data, fpos, wr_cnt);
+ goto cont;
+ }
+ // new page and unaligned write, then prepare for partial override
+ if (!obj && wr_cnt != PAGE_SIZE) {
+ errno = __fill_page(inode, pg, tag);
if (errno < 0) {
- break;
+ return errno;
}
- if (errno < (int)PAGE_SIZE) {
- // EOF
- len = MIN(len, buf_off + errno);
- }
- } else if (!pg) {
- errno = inode->default_fops->write(inode, data, wr_bytes, fpos);
- continue;
}
-
- memcpy(pg->pg + pg_off, (data + buf_off), wr_bytes);
+
+ memcpy(offset(pg->data, off), data, wr_cnt);
pcache_set_dirty(pcache, pg);
- pg->len = pg_off + wr_bytes;
- buf_off += wr_bytes;
- fpos += wr_bytes;
+ if (obj) {
+ bcache_return(obj);
+ } else {
+ bcache_put(&pcache->cache, tag, pg);
+ }
+
+cont:
+ data = offset(data, wr_cnt);
+ fpos += wr_cnt;
}
- return errno < 0 ? errno : (int)buf_off;
+ return errno < 0 ? errno : (int)(len - (end - fpos));
}
int
pcache_read(struct v_inode* inode, void* data, u32_t len, u32_t fpos)
{
- u32_t pg_off, buf_off = 0, new_pg = 0;
int errno = 0;
- struct pcache* pcache = inode->pg_cache;
+ unsigned int tag, off, rd_cnt;
+ unsigned int end = fpos + len, size = 0;
+ struct pcache* pcache;
struct pcache_pg* pg;
+ bcobj_t obj;
+
+ pcache = inode->pg_cache;
+
+ while (fpos < page_upaligned(end)) {
+ tag = pfn(fpos);
+ off = va_offset(fpos);
- while (buf_off < len) {
- int new_page = pcache_get_page(pcache, fpos, &pg_off, &pg);
- if (new_page) {
- // Filling up the page
- errno = inode->default_fops->read_page(inode, pg->pg, pg->fpos);
+ obj = __getpage_and_lock(pcache, tag, &pg);
+ if (!obj) {
+ errno = __fill_page(inode, pg, tag);
if (errno < 0) {
- break;
- }
- if (errno < (int)PAGE_SIZE) {
- // EOF
- len = MIN(len, buf_off + errno);
+ return errno;
}
- pg->len = errno;
- } else if (!pg) {
- errno = inode->default_fops->read(
- inode, (data + buf_off), len - buf_off, pg->fpos);
- buf_off = len;
- break;
+ end -= (PAGE_SIZE - errno);
}
- u32_t rd_bytes = MIN(pg->len - pg_off, len - buf_off);
-
- if (!rd_bytes)
- break;
+ rd_cnt = MIN(end - fpos, PAGE_SIZE - off);
+ memcpy(data, pg->data + off, rd_cnt);
- memcpy((data + buf_off), pg->pg + pg_off, rd_bytes);
+ if (obj) {
+ bcache_return(obj);
+ } else {
+ bcache_put(&pcache->cache, tag, pg);
+ }
- buf_off += rd_bytes;
- fpos += rd_bytes;
+ data += rd_cnt;
+ size += rd_cnt;
+ fpos = page_aligned(fpos + PAGE_SIZE);
}
- return errno < 0 ? errno : (int)buf_off;
+ return errno < 0 ? errno : (int)size;
}
void
pcache_release(struct pcache* pcache)
{
- struct pcache_pg *pos, *n;
- llist_for_each(pos, n, &pcache->pages, pg_list)
- {
- lru_remove(pcache_zone, &pos->lru);
- vfree(pos);
- }
-
- btrie_release(&pcache->tree);
+ bcache_free(&pcache->cache);
}
int
pcache_commit(struct v_inode* inode, struct pcache_pg* page)
{
- if (!(page->flags & PCACHE_DIRTY)) {
+ if (!page->dirty) {
return 0;
}
- int errno = inode->default_fops->write_page(inode, page->pg, page->fpos);
-
+ int errno;
+ unsigned int fpos = page_addr(page->index);
+
+ errno = inode->default_fops->write_page(inode, page->data, fpos);
if (!errno) {
- page->flags &= ~PCACHE_DIRTY;
+ page->dirty = false;
llist_delete(&page->dirty_list);
inode->pg_cache->n_dirty--;
}
void
pcache_commit_all(struct v_inode* inode)
{
- if (!inode->pg_cache) {
+ struct pcache* cache = inode->pg_cache;
+ if (!cache) {
return;
}
- struct pcache* cache = inode->pg_cache;
struct pcache_pg *pos, *n;
-
llist_for_each(pos, n, &cache->dirty, dirty_list)
{
pcache_commit(inode, pos);
}
-}
-
-void
-pcache_invalidate(struct pcache* pcache, struct pcache_pg* page)
-{
- pcache_commit(pcache->master, page);
- pcache_release_page(pcache, page);
}
\ No newline at end of file
-#include <lunaix/fs/iso9660.h>
#include <lunaix/fs/probe_boot.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/syslog.h>
+#include "iso9660/iso9660.h"
+
LOG_MODULE("PROBE")
#define LUNAIX_ID 0x414e554cUL // "LUNA"
dev->ident.unique,
dev->name.value,
(char*)volp->vol_id);
- break;
+ goto done;
}
}
+ return NULL;
+
done:
vfree(volp);
return dev;
*
*/
#include <klibc/string.h>
-#include <lunaix/fs.h>
+#include <lunaix/fs/api.h>
#include <lunaix/fs/ramfs.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/spike.h>
rinode->flags = flags;
inode->data = rinode;
+ inode->itype = VFS_IFFILE;
- if (!(flags & RAMF_DIR)) {
- inode->itype = VFS_IFFILE;
+ if ((flags & RAMF_DIR)) {
+ inode->itype |= VFS_IFDIR;
}
if ((flags & RAMF_SYMLINK)) {
int
ramfs_readdir(struct v_file* file, struct dir_context* dctx)
{
- int i = 0;
+ unsigned int i = 2;
struct v_dnode *pos, *n;
+
+ if (fsapi_handle_pseudo_dirent(file, dctx)) {
+ return 1;
+ }
+
llist_for_each(pos, n, &file->dnode->children, siblings)
{
- if (i++ >= dctx->index) {
+ if (i++ >= file->f_pos) {
dctx->read_complete_callback(dctx,
pos->name.value,
pos->name.len,
}
int
-ramfs_create(struct v_inode* this, struct v_dnode* dnode)
+ramfs_create(struct v_inode* this, struct v_dnode* dnode, unsigned int itype)
{
return __ramfs_mknod(dnode, NULL, RAMF_FILE);
}
return 0;
}
-void
-ramfs_init()
-{
- struct filesystem* ramfs = fsm_new_fs("ramfs", -1);
- ramfs->mount = ramfs_mount;
- ramfs->unmount = ramfs_unmount;
-
- fsm_register(ramfs);
-}
-EXPORT_FILE_SYSTEM(ramfs, ramfs_init);
-
int
ramfs_mksymlink(struct v_inode* this, const char* target)
{
memcpy(symlink, target, len);
- this->itype |= VFS_IFSYMLINK;
+ this->itype = VFS_IFSYMLINK;
rinode->flags |= RAMF_SYMLINK;
rinode->symlink = symlink;
+ rinode->size = len;
return 0;
}
*path_out = rinode->symlink;
- return 0;
+ return rinode->size;
}
int
-ramfs_unlink(struct v_inode* this)
+ramfs_unlink(struct v_inode* this, struct v_dnode* name)
{
struct ram_inode* rinode = RAM_INODE(this->data);
if ((rinode->flags & RAMF_SYMLINK)) {
rinode->flags &= ~RAMF_SYMLINK;
- this->itype &= ~VFS_IFSYMLINK;
+ this->itype &= ~F_SYMLINK;
vfree(rinode->symlink);
return 0;
}
+static void
+ramfs_init()
+{
+ struct filesystem* fs;
+ fs = fsapi_fs_declare("ramfs", FSTYPE_PSEUDO);
+
+ fsapi_fs_set_mntops(fs, ramfs_mount, ramfs_unmount);
+ fsapi_fs_finalise(fs);
+}
+EXPORT_FILE_SYSTEM(ramfs, ramfs_init);
+
const struct v_inode_ops ramfs_inode_ops = { .mkdir = ramfs_mkdir,
.rmdir = default_inode_rmdir,
.dir_lookup =
#include <klibc/strfmt.h>
#include <klibc/string.h>
#include <lunaix/clock.h>
-#include <lunaix/fs.h>
+#include <lunaix/fs/api.h>
#include <lunaix/fs/twifs.h>
#include <lunaix/fs/twimap.h>
#include <lunaix/mm/cake.h>
return 0;
}
+int
+__twifs_unmount(struct v_superblock* vsb)
+{
+ return 0;
+}
+
int
__twifs_fwrite(struct v_inode* inode, void* buffer, size_t len, size_t fpos)
{
{
struct twifs_node* twi_node = (struct twifs_node*)inode->data;
- if ((twi_node->itype & F_FILE)) {
+ if (!check_directory_node(inode)) {
return ENOTDIR;
}
__twifs_iterate_dir(struct v_file* file, struct dir_context* dctx)
{
struct twifs_node* twi_node = (struct twifs_node*)(file->inode->data);
- int counter = 0;
+ unsigned int counter = 2;
struct twifs_node *pos, *n;
+ if (fsapi_handle_pseudo_dirent(file, dctx)) {
+ return 1;
+ }
+
llist_for_each(pos, n, &twi_node->children, siblings)
{
- if (counter++ >= dctx->index) {
- dctx->index = counter;
- dctx->read_complete_callback(
+ if (counter++ >= file->f_pos) {
+ fsapi_dir_report(
dctx, pos->name.value, pos->name.len, vfs_get_dtype(pos->itype));
return 1;
}
int
twifs_rm_node(struct twifs_node* node)
{
- if (!(node->itype & F_FILE) && !llist_empty(&node->children)) {
+ if (check_itype(node->itype, VFS_IFDIR) && !llist_empty(&node->children)) {
return ENOTEMPTY;
}
llist_delete(&node->siblings);
void
twifs_init()
{
- twi_pile = cake_new_pile("twifs_node", sizeof(struct twifs_node), 1, 0);
-
- struct filesystem* twifs = vzalloc(sizeof(struct filesystem));
- twifs->fs_name = HSTR("twifs", 5);
- twifs->mount = __twifs_mount;
- twifs->types = FSTYPE_ROFS;
- twifs->fs_id = 0;
-
- fsm_register(twifs);
+ struct filesystem* fs;
+ fs = fsapi_fs_declare("twifs", FSTYPE_PSEUDO | FSTYPE_ROFS);
+
+ fsapi_fs_set_mntops(fs, __twifs_mount, __twifs_unmount);
+ fsapi_fs_finalise(fs);
+ twi_pile = cake_new_pile("twifs_node", sizeof(struct twifs_node), 1, 0);
fs_root = twifs_dir_node(NULL, NULL, 0, 0);
}
EXPORT_FILE_SYSTEM(twifs, twifs_init);
twimap_read(struct twimap* map, void* buffer, size_t len, size_t fpos)
{
map->buffer = valloc(TWIMAP_BUFFER_SIZE);
+ map->size_acc = 0;
+
map->reset(map);
// FIXME what if TWIMAP_BUFFER_SIZE is not big enough?
- size_t pos = 0;
- do {
+ size_t pos = map->size_acc;
+ while (pos <= fpos) {
map->size_acc = 0;
map->read(map);
pos += map->size_acc;
- } while (pos <= fpos && map->go_next(map));
+
+ if (!map->go_next(map)) {
+ break;
+ }
+ }
if (pos <= fpos) {
vfree(map->buffer);
struct hstr vfs_dot = HSTR(".", 1);
struct hstr vfs_empty = HSTR("", 0);
-struct v_superblock*
-vfs_sb_alloc();
-
-void
-vfs_sb_free(struct v_superblock* sb);
-
static int
__vfs_try_evict_dnode(struct lru_node* obj);
dnode_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
- dnode_lru = lru_new_zone(__vfs_try_evict_dnode);
- inode_lru = lru_new_zone(__vfs_try_evict_inode);
+ dnode_lru = lru_new_zone("vfs_dnode", __vfs_try_evict_dnode);
+ inode_lru = lru_new_zone("vfs_inode", __vfs_try_evict_inode);
hstr_rehash(&vfs_ddot, HSTR_FULL_HASH);
hstr_rehash(&vfs_dot, HSTR_FULL_HASH);
return &dnode_cache[_hash & VFS_HASH_MASK];
}
+static inline int
+__sync_inode_nolock(struct v_inode* inode)
+{
+ pcache_commit_all(inode);
+
+ int errno = ENOTSUP;
+ if (inode->ops->sync) {
+ errno = inode->ops->sync(inode);
+ }
+
+ return errno;
+}
+
struct v_dnode*
vfs_dcache_lookup(struct v_dnode* parent, struct hstr* str)
{
struct v_dnode *pos, *n;
hashtable_bucket_foreach(slot, pos, n, hash_list)
{
- if (pos->name.hash == hash) {
+ if (pos->name.hash == hash && pos->parent == parent) {
return pos;
}
}
vfile->ref_count = ATOMIC_VAR_INIT(1);
vfile->ops = inode->default_fops;
- if ((inode->itype & F_MFILE) && !inode->pg_cache) {
+ if (check_file_node(inode) && !inode->pg_cache) {
struct pcache* pcache = vzalloc(sizeof(struct pcache));
pcache_init(pcache);
pcache->master = inode;
llist_delete(&assign_to->aka_list);
assign_to->inode->link_count--;
}
+
llist_append(&inode->aka_dnodes, &assign_to->aka_list);
assign_to->inode = inode;
inode->link_count++;
int
vfs_pclose(struct v_file* file, pid_t pid)
{
+ struct v_inode* inode;
int errno = 0;
+
if (file->ref_count > 1) {
atomic_fetch_sub(&file->ref_count, 1);
- } else if (!(errno = file->ops->close(file))) {
- atomic_fetch_sub(&file->dnode->ref_count, 1);
- file->inode->open_count--;
-
- /*
- * Prevent dead lock.
- * This happened when process is terminated while blocking on read.
- * In that case, the process is still holding the inode lock and it
- will never get released.
- * The unlocking should also include ownership check.
- *
- * To see why, consider two process both open the same file both with
- * fd=x.
- * Process A: busy on reading x
- * Process B: do nothing with x
- * Assuming that, after a very short time, process B get terminated
- * while process A is still busy in it's reading business. By this
- * design, the inode lock of this file x is get released by B rather
- * than A. And this will cause a probable race condition on A if other
- * process is writing to this file later after B exit.
- */
- if (mutex_on_hold(&file->inode->lock)) {
- mutex_unlock_for(&file->inode->lock, pid);
- }
- mnt_chillax(file->dnode->mnt);
+ return 0;
+ }
+
+ inode = file->inode;
+
+ /*
+ * Prevent dead lock.
+ * This happened when process is terminated while blocking on read.
+ * In that case, the process is still holding the inode lock and it
+ will never get released.
+ * The unlocking should also include ownership check.
+ *
+ * To see why, consider two process both open the same file both with
+ * fd=x.
+ * Process A: busy on reading x
+ * Process B: do nothing with x
+ * Assuming that, after a very short time, process B get terminated
+ * while process A is still busy in it's reading business. By this
+ * design, the inode lock of this file x is get released by B rather
+ * than A. And this will cause a probable race condition on A if other
+ * process is writing to this file later after B exit.
+ */
+
+ if (mutex_on_hold(&inode->lock)) {
+ mutex_unlock_for(&inode->lock, pid);
+ }
+
+ lock_inode(inode);
+
+ pcache_commit_all(inode);
+ if ((errno = file->ops->close(file))) {
+ goto unlock;
+ }
+
+ atomic_fetch_sub(&file->dnode->ref_count, 1);
+ inode->open_count--;
- pcache_commit_all(file->inode);
- cake_release(file_pile, file);
+ if (!inode->open_count) {
+ __sync_inode_nolock(inode);
}
+
+ mnt_chillax(file->dnode->mnt);
+ cake_release(file_pile, file);
+
+unlock:
+ unlock_inode(inode);
return errno;
}
cake_release(fd_pile, fd);
}
+int
+vfs_isync(struct v_inode* inode)
+{
+ lock_inode(inode);
+
+ int errno = __sync_inode_nolock(inode);
+
+ unlock_inode(inode);
+
+ return errno;
+}
+
int
vfs_fsync(struct v_file* file)
{
return errno;
}
- lock_inode(file->inode);
-
- pcache_commit_all(file->inode);
-
- errno = ENOTSUP;
- if (file->ops->sync) {
- errno = file->ops->sync(file);
- }
-
- unlock_inode(file->inode);
-
- return errno;
+ return vfs_isync(file->inode);
}
int
memset(sb, 0, sizeof(*sb));
llist_init_head(&sb->sb_list);
sb->i_cache = vzalloc(VFS_HASHTABLE_SIZE * sizeof(struct hbucket));
+ sb->ref_count = 1;
return sb;
}
+void
+vfs_sb_ref(struct v_superblock* sb)
+{
+ sb->ref_count++;
+}
+
void
vfs_sb_free(struct v_superblock* sb)
{
+ assert(sb->ref_count);
+
+ sb->ref_count--;
+ if (sb->ref_count) {
+ return;
+ }
+
+ if (sb->ops.release) {
+ sb->ops.release(sb);
+ }
+
vfree(sb->i_cache);
cake_release(superblock_pile, sb);
}
hstrcpy(&dnode->name, name);
if (parent) {
- dnode->super_block = parent->super_block;
+ vfs_d_assign_sb(dnode, parent->super_block);
dnode->mnt = parent->mnt;
}
vfs_dcache_remove(pos);
}
+ if (dnode->destruct) {
+ dnode->destruct(dnode);
+ }
+
+ vfs_sb_free(dnode->super_block);
vfree((void*)dnode->name.value);
cake_release(dnode_pile, dnode);
}
sb->ops.init_inode(sb, inode);
- inode->sb = sb;
inode->ctime = clock_unixtime();
inode->atime = inode->ctime;
inode->mtime = inode->ctime;
+ vfs_i_assign_sb(inode, sb);
lru_use_one(inode_lru, &inode->lru);
return inode;
}
if (inode->destruct) {
inode->destruct(inode);
}
+
+ vfs_sb_free(inode->sb);
hlist_delete(&inode->hash_list);
cake_release(inode_pile, inode);
}
/* ---- System call definition and support ---- */
-#define FLOCATE_CREATE_EMPTY 1
-#define FLOCATE_CREATE_ONLY 2
-#define FLOCATE_NOFOLLOW 4
+// make a new name when not exists
+#define FLOC_MAYBE_MKNAME 1
+
+// name must be non-exist and made.
+#define FLOC_MKNAME 2
+
+// no follow symlink
+#define FLOC_NOFOLLOW 4
int
vfs_getfd(int fd, struct v_fd** fd_s)
return EBADF;
}
-int
+static int
+__vfs_mknod(struct v_inode* parent, struct v_dnode* dnode,
+ unsigned int itype, dev_t* dev)
+{
+ int errno;
+
+ errno = parent->ops->create(parent, dnode, itype);
+ if (errno) {
+ return errno;
+ }
+
+ return 0;
+}
+
+struct file_locator {
+ struct v_dnode* dir;
+ struct v_dnode* file;
+ bool fresh;
+};
+
+/**
+ * @brief unlock the file locator (floc) if possible.
+ * If the file to be located if not exists, and
+ * any FLOC_*MKNAME flag is set, then the parent
+ * dnode will be locked until the file has been properly
+ * finalised by subsequent logic.
+ *
+ * @param floc
+ */
+static inline void
+__floc_try_unlock(struct file_locator* floc)
+{
+ if (floc->fresh) {
+ assert(floc->dir);
+ unlock_dnode(floc->dir);
+ }
+}
+
+static int
__vfs_try_locate_file(const char* path,
- struct v_dnode** fdir,
- struct v_dnode** file,
+ struct file_locator* floc,
int options)
{
char name_str[VFS_NAME_MAXLEN];
+ struct v_dnode *fdir, *file;
struct hstr name = HSTR(name_str, 0);
int errno, woption = 0;
- if ((options & FLOCATE_NOFOLLOW)) {
+ if ((options & FLOC_NOFOLLOW)) {
woption |= VFS_WALK_NOFOLLOW;
+ options &= ~FLOC_NOFOLLOW;
}
+ floc->fresh = false;
name_str[0] = 0;
- if ((errno = vfs_walk_proc(path, fdir, &name, woption | VFS_WALK_PARENT))) {
+ errno = vfs_walk_proc(path, &fdir, &name, woption | VFS_WALK_PARENT);
+ if (errno) {
return errno;
}
- errno = vfs_walk(*fdir, name.value, file, NULL, woption);
+ errno = vfs_walk(fdir, name.value, &file, NULL, woption);
- if (errno != ENOENT && (options & FLOCATE_CREATE_ONLY)) {
- return EEXIST;
+ if (errno && errno != ENOENT) {
+ goto done;
+ }
+
+ if (!errno) {
+ if ((options & FLOC_MKNAME)) {
+ errno = EEXIST;
+ }
+ goto done;
}
- if (errno != ENOENT ||
- !(options & (FLOCATE_CREATE_EMPTY | FLOCATE_CREATE_ONLY))) {
- return errno;
+ // errno == ENOENT
+ if (!options) {
+ goto done;
}
- struct v_dnode* parent = *fdir;
- struct v_dnode* file_new = vfs_d_alloc(parent, &name);
+ errno = vfs_check_writable(fdir);
+ if (errno) {
+ goto done;
+ }
+
+ floc->fresh = true;
+
+ file = vfs_d_alloc(fdir, &name);
- if (!file_new) {
+ if (!file) {
return ENOMEM;
}
- lock_dnode(parent);
+ lock_dnode(fdir);
- if (!(errno = parent->inode->ops->create(parent->inode, file_new))) {
- vfs_dcache_add(parent, file_new);
- *file = file_new;
- } else {
- vfs_d_free(file_new);
- }
+ vfs_dcache_add(fdir, file);
- unlock_dnode(parent);
+done:
+ floc->dir = fdir;
+ floc->file = file;
return errno;
}
int errno, fd, loptions = 0;
struct v_dnode *dentry, *file;
struct v_file* ofile = NULL;
+ struct file_locator floc;
+ struct v_inode* inode;
if ((options & FO_CREATE)) {
- loptions |= FLOCATE_CREATE_EMPTY;
+ loptions |= FLOC_MAYBE_MKNAME;
} else if ((options & FO_NOFOLLOW)) {
- loptions |= FLOCATE_NOFOLLOW;
+ loptions |= FLOC_NOFOLLOW;
}
- errno = __vfs_try_locate_file(path, &dentry, &file, loptions);
+ errno = __vfs_try_locate_file(path, &floc, loptions);
- if (!errno && !(errno = vfs_alloc_fdslot(&fd))) {
+ if (errno || (errno = vfs_alloc_fdslot(&fd))) {
+ return errno;
+ }
+
+ file = floc.file;
+ dentry = floc.dir;
- if (errno || (errno = vfs_open(file, &ofile))) {
+ if (floc.fresh) {
+ errno = __vfs_mknod(dentry->inode, file, VFS_IFFILE, NULL);
+ if (errno) {
+ vfs_d_free(file);
+ __floc_try_unlock(&floc);
return errno;
}
- struct v_fd* fd_s = cake_grab(fd_pile);
- memset(fd_s, 0, sizeof(*fd_s));
+ __floc_try_unlock(&floc);
+ }
+
- ofile->f_pos = ofile->inode->fsize & -((options & FO_APPEND) != 0);
- fd_s->file = ofile;
- fd_s->flags = options;
- __current->fdtable->fds[fd] = fd_s;
- return fd;
+ if ((errno = vfs_open(file, &ofile))) {
+ return errno;
}
- return errno;
+ inode = ofile->inode;
+ lock_inode(inode);
+
+ struct v_fd* fd_s = cake_grab(fd_pile);
+ memset(fd_s, 0, sizeof(*fd_s));
+
+ if ((options & O_TRUNC)) {
+ file->inode->fsize = 0;
+ }
+
+ if (vfs_get_dtype(inode->itype) == DT_DIR) {
+ ofile->f_pos = 0;
+ }
+
+ fd_s->file = ofile;
+ fd_s->flags = options;
+ __current->fdtable->fds[fd] = fd_s;
+
+ unlock_inode(inode);
+
+ return fd;
}
__DEFINE_LXSYSCALL2(int, open, const char*, path, int, options)
const int dtype)
{
struct lx_dirent* dent = (struct lx_dirent*)dctx->cb_data;
- strncpy(dent->d_name, name, DIRENT_NAME_MAX_LEN);
+ strncpy(dent->d_name, name, MIN(len, DIRENT_NAME_MAX_LEN));
dent->d_nlen = len;
dent->d_type = dtype;
}
lock_inode(inode);
- if ((inode->itype & F_FILE)) {
+ if (!check_directory_node(inode)) {
errno = ENOTDIR;
- } else {
- struct dir_context dctx = (struct dir_context){
- .cb_data = dent,
- .index = dent->d_offset,
- .read_complete_callback = __vfs_readdir_callback};
- errno = 1;
- if (dent->d_offset == 0) {
- __vfs_readdir_callback(&dctx, vfs_dot.value, vfs_dot.len, DT_DIR);
- } else if (dent->d_offset == 1) {
- __vfs_readdir_callback(&dctx, vfs_ddot.value, vfs_ddot.len, DT_DIR);
- } else {
- dctx.index -= 2;
- if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
- unlock_inode(inode);
- goto done;
- }
- }
- dent->d_offset++;
+ goto unlock;
+ }
+
+ struct dir_context dctx = (struct dir_context) {
+ .cb_data = dent,
+ .read_complete_callback = __vfs_readdir_callback
+ };
+
+ if ((errno = fd_s->file->ops->readdir(fd_s->file, &dctx)) != 1) {
+ goto unlock;
}
+ dent->d_offset++;
+ fd_s->file->f_pos++;
+unlock:
unlock_inode(inode);
done:
}
struct v_file* file = fd_s->file;
- if (!(file->inode->itype & F_FILE)) {
+ if (check_directory_node(file->inode)) {
errno = EISDIR;
goto done;
}
file->inode->atime = clock_unixtime();
- if ((file->inode->itype & VFS_IFSEQDEV) || (fd_s->flags & FO_DIRECT)) {
+ if (check_seqdev_node(file->inode) || (fd_s->flags & FO_DIRECT)) {
errno = file->ops->read(file->inode, buf, count, file->f_pos);
} else {
errno = pcache_read(file->inode, buf, count, file->f_pos);
goto done;
}
+ struct v_inode* inode;
struct v_file* file = fd_s->file;
if ((errno = vfs_check_writable(file->dnode))) {
goto done;
}
- if (!(file->inode->itype & F_FILE)) {
+ if (check_directory_node(file->inode)) {
errno = EISDIR;
goto done;
}
- lock_inode(file->inode);
+ inode = file->inode;
+ lock_inode(inode);
- file->inode->mtime = clock_unixtime();
+ inode->mtime = clock_unixtime();
+ if ((fd_s->flags & O_APPEND)) {
+ file->f_pos = inode->fsize;
+ }
- if ((file->inode->itype & VFS_IFSEQDEV) || (fd_s->flags & FO_DIRECT)) {
- errno = file->ops->write(file->inode, buf, count, file->f_pos);
+ if (check_seqdev_node(inode) || (fd_s->flags & FO_DIRECT)) {
+ errno = file->ops->write(inode, buf, count, file->f_pos);
} else {
- errno = pcache_write(file->inode, buf, count, file->f_pos);
+ errno = pcache_write(inode, buf, count, file->f_pos);
}
if (errno > 0) {
file->f_pos += errno;
- unlock_inode(file->inode);
+ inode->fsize = MAX(inode->fsize, file->f_pos);
+
+ unlock_inode(inode);
return errno;
}
- unlock_inode(file->inode);
+ unlock_inode(inode);
done:
return DO_STATUS(errno);
}
struct v_file* file = fd_s->file;
+ struct v_inode* inode = file->inode;
if (!file->ops->seek) {
errno = ENOTSUP;
goto done;
}
- lock_inode(file->inode);
+ lock_inode(inode);
int overflow = 0;
int fpos = file->f_pos;
+
+ if (vfs_get_dtype(inode->itype) == DT_DIR) {
+ options = (options != FSEEK_END) ? options : FSEEK_SET;
+ }
+
switch (options) {
case FSEEK_CUR:
- overflow = sadd_overflow((int)file->f_pos, offset, &fpos);
+ overflow = sadd_of((int)file->f_pos, offset, &fpos);
break;
case FSEEK_END:
- overflow = sadd_overflow((int)file->inode->fsize, offset, &fpos);
+ overflow = sadd_of((int)inode->fsize, offset, &fpos);
break;
case FSEEK_SET:
fpos = offset;
break;
}
+
if (overflow) {
errno = EOVERFLOW;
- } else if (!(errno = file->ops->seek(file->inode, fpos))) {
- file->f_pos = fpos;
+ }
+ else {
+ errno = file->ops->seek(file, fpos);
}
- unlock_inode(file->inode);
+ unlock_inode(inode);
done:
return DO_STATUS(errno);
{
const char* link;
struct v_inode* inode = dnode->inode;
- if (inode->ops->read_symlink) {
- lock_inode(inode);
- int errno = inode->ops->read_symlink(inode, &link);
- strncpy(buf, link, size);
+ if (!check_symlink_node(inode)) {
+ return EINVAL;
+ }
- unlock_inode(inode);
- return errno;
+ if (!inode->ops->read_symlink) {
+ return ENOTSUP;
}
- return 0;
+
+ lock_inode(inode);
+
+ int errno = inode->ops->read_symlink(inode, &link);
+ if (errno >= 0) {
+ strncpy(buf, link, MIN(size, (size_t)errno));
+ }
+
+ unlock_inode(inode);
+ return errno;
}
int
vfs_get_dtype(int itype)
{
- if ((itype & VFS_IFSYMLINK) == VFS_IFSYMLINK) {
- return DT_SYMLINK;
- } else if (!(itype & VFS_IFFILE)) {
- return DT_DIR;
- } else {
- return DT_FILE;
+ int dtype = DT_FILE;
+ if (check_itype(itype, VFS_IFSYMLINK)) {
+ dtype |= DT_SYMLINK;
+ }
+
+ if (check_itype(itype, VFS_IFDIR)) {
+ dtype |= DT_DIR;
+ return dtype;
}
+
+ // TODO other types
+
+ return dtype;
}
__DEFINE_LXSYSCALL3(int, realpathat, int, fd, char*, buf, size_t, size)
lock_dnode(parent);
lock_inode(parent->inode);
- if (!(dnode->inode->itype & F_MFILE)) {
+ if (check_directory_node(dnode->inode)) {
errno = parent->inode->ops->rmdir(parent->inode, dnode);
if (!errno) {
vfs_dcache_remove(dnode);
goto done;
}
+ struct v_inode* inode = parent->inode;
+
lock_dnode(parent);
- lock_inode(parent->inode);
+ lock_inode(inode);
if ((parent->super_block->fs->types & FSTYPE_ROFS)) {
errno = ENOTSUP;
- } else if (!parent->inode->ops->mkdir) {
+ } else if (!inode->ops->mkdir) {
errno = ENOTSUP;
- } else if ((parent->inode->itype & F_FILE)) {
+ } else if (!check_directory_node(inode)) {
errno = ENOTDIR;
- } else if (!(errno = parent->inode->ops->mkdir(parent->inode, dir))) {
+ } else if (!(errno = inode->ops->mkdir(inode, dir))) {
vfs_dcache_add(parent, dir);
goto cleanup;
}
vfs_d_free(dir);
cleanup:
- unlock_inode(parent->inode);
+ unlock_inode(inode);
unlock_dnode(parent);
done:
return DO_STATUS(errno);
if (inode->open_count) {
errno = EBUSY;
- } else if ((inode->itype & F_MFILE)) {
- errno = inode->ops->unlink(inode);
+ } else if (!check_directory_node(inode)) {
+ errno = inode->ops->unlink(inode, dnode);
if (!errno) {
vfs_d_free(dnode);
}
__DEFINE_LXSYSCALL2(int, link, const char*, oldpath, const char*, newpath)
{
int errno;
- struct v_dnode *dentry, *to_link, *name_dentry, *name_file;
+ struct file_locator floc;
+ struct v_dnode *to_link, *name_file;
- errno = __vfs_try_locate_file(oldpath, &dentry, &to_link, 0);
+ errno = __vfs_try_locate_file(oldpath, &floc, 0);
+ if (errno) {
+ goto done;
+ }
+
+ __floc_try_unlock(&floc);
+
+ to_link = floc.file;
+ errno = __vfs_try_locate_file(newpath, &floc, FLOC_MKNAME);
if (!errno) {
- errno = __vfs_try_locate_file(
- newpath, &name_dentry, &name_file, FLOCATE_CREATE_ONLY);
- if (!errno) {
- errno = vfs_link(to_link, name_file);
- }
+ goto done;
+ }
+
+ name_file = floc.file;
+ errno = vfs_link(to_link, name_file);
+ if (errno) {
+ vfs_d_free(name_file);
}
+
+done:
+ __floc_try_unlock(&floc);
return DO_STATUS(errno);
}
int, symlink, const char*, pathname, const char*, link_target)
{
int errno;
- struct v_dnode *dnode, *file;
- if ((errno = __vfs_try_locate_file(
- pathname, &dnode, &file, FLOCATE_CREATE_ONLY))) {
+ struct file_locator floc;
+ struct v_dnode *file;
+ struct v_inode *f_ino;
+
+ errno = __vfs_try_locate_file(pathname, &floc, FLOC_MKNAME);
+ if (errno) {
goto done;
}
- if ((errno = vfs_check_writable(file))) {
+ file = floc.file;
+ errno = __vfs_mknod(floc.dir->inode, file, VFS_IFSYMLINK, NULL);
+ if (errno) {
+ vfs_d_free(file);
goto done;
}
- if (!file->inode->ops->set_symlink) {
+ f_ino = file->inode;
+
+ assert(f_ino);
+
+ errno = vfs_check_writable(file);
+ if (errno) {
+ goto done;
+ }
+
+ if (!f_ino->ops->set_symlink) {
errno = ENOTSUP;
goto done;
}
- lock_inode(file->inode);
+ lock_inode(f_ino);
- errno = file->inode->ops->set_symlink(file->inode, link_target);
+ errno = f_ino->ops->set_symlink(f_ino, link_target);
- unlock_inode(file->inode);
+ unlock_inode(f_ino);
done:
+ __floc_try_unlock(&floc);
return DO_STATUS(errno);
}
lock_dnode(dnode);
- if ((dnode->inode->itype & F_FILE)) {
+ if (!check_directory_node(dnode->inode)) {
errno = ENOTDIR;
goto done;
}
.st_ioblksize = PAGE_SIZE,
.st_blksize = vino->sb->blksize};
- if (VFS_DEVFILE(vino->itype)) {
+ if (check_device_node(vino)) {
struct device* rdev = resolve_device(vino->data);
if (!rdev || rdev->magic != DEV_STRUCT_MAGIC) {
errno = EINVAL;
kprintf_ml(component, level, fmt, args);
}
+void
+kprintf_v(const char* component, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ kprintf_m(component, fmt, args);
+ va_end(args);
+}
+
static void
__twimap_kprintf_read(struct twimap* map)
{
--- /dev/null
+#include <lunaix/ds/lru.h>
+#include <lunaix/mm/valloc.h>
+#include <lunaix/spike.h>
+#include <lunaix/fs/twimap.h>
+#include <lunaix/fs/twifs.h>
+
+#include <klibc/string.h>
+
+static struct llist_header zone_lead = { .next = &zone_lead, .prev = &zone_lead };
+
+DEFINE_SPINLOCK_OPS(struct lru_zone*, lock);
+
+
+static void
+__do_evict_lockless(struct lru_zone* zone, struct llist_header* elem)
+{
+ llist_delete(elem);
+ if (!zone->try_evict(container_of(elem, struct lru_node, lru_nodes))) {
+ // if the node is unable to evict, raise it's rank by one, so
+ // others can have chance in the next round
+ struct llist_header* new_tail = zone->lead_node.prev;
+ llist_prepend(new_tail, elem);
+ } else {
+ zone->objects--;
+ }
+
+ zone->evict_stats.n_single++;
+}
+
+static void
+__lru_evict_all_lockness(struct lru_zone* zone)
+{
+ struct llist_header* tail = zone->lead_node.prev;
+ while (tail != &zone->lead_node) {
+ __do_evict_lockless(zone, tail);
+ tail = tail->prev;
+ }
+}
+
+struct lru_zone*
+lru_new_zone(const char* name, evict_cb try_evict_cb)
+{
+ struct lru_zone* zone = vzalloc(sizeof(struct lru_zone));
+ if (!zone) {
+ return NULL;
+ }
+
+ zone->try_evict = try_evict_cb;
+
+ strncpy(zone->name, name, sizeof(zone->name) - 1);
+ llist_init_head(&zone->lead_node);
+ llist_append(&zone_lead, &zone->zones);
+ spinlock_init(&zone->lock);
+
+ return zone;
+}
+
+void
+lru_free_zone(struct lru_zone* zone)
+{
+ lock(zone);
+
+ __lru_evict_all_lockness(zone);
+
+ if (llist_empty(&zone->lead_node)) {
+ llist_delete(&zone->zones);
+ vfree(zone);
+ return;
+ }
+
+ /*
+ We are unable to free it at this moment,
+ (probably due to tricky things happened
+ to some cached object). Thus mark it and
+ let the daemon try to free it asynchronously
+ */
+ zone->delayed_free = true;
+ zone->attempts++;
+
+ unlock(zone);
+}
+
+void
+lru_use_one(struct lru_zone* zone, struct lru_node* node)
+{
+ lock(zone);
+
+ assert(!zone->delayed_free);
+
+ if (node->lru_nodes.next && node->lru_nodes.prev) {
+ llist_delete(&node->lru_nodes);
+ }
+ else {
+ zone->objects++;
+ }
+
+ llist_prepend(&zone->lead_node, &node->lru_nodes);
+ zone->hotness++;
+
+ unlock(zone);
+}
+
+void
+lru_evict_one(struct lru_zone* zone)
+{
+ lock(zone);
+
+ struct llist_header* tail = zone->lead_node.prev;
+ if (tail == &zone->lead_node) {
+ return;
+ }
+
+ __do_evict_lockless(zone, tail);
+
+ unlock(zone);
+}
+
+void
+lru_evict_half(struct lru_zone* zone)
+{
+ lock(zone);
+
+ int target = (int)(zone->objects / 2);
+ struct llist_header* tail = zone->lead_node.prev;
+ while (tail != &zone->lead_node && target > 0) {
+ __do_evict_lockless(zone, tail);
+ tail = tail->prev;
+ target--;
+ }
+
+ zone->evict_stats.n_half++;
+
+ unlock(zone);
+}
+
+void
+lru_evict_all(struct lru_zone* zone)
+{
+ lock(zone);
+
+ __lru_evict_all_lockness(zone);
+
+ zone->evict_stats.n_full++;
+
+ unlock(zone);
+}
+
+void
+lru_remove(struct lru_zone* zone, struct lru_node* node)
+{
+ lock(zone);
+
+ if (node->lru_nodes.next && node->lru_nodes.prev) {
+ llist_delete(&node->lru_nodes);
+ }
+ zone->objects--;
+
+ unlock(zone);
+}
+
+static void
+read_lrulist_entry(struct twimap* map)
+{
+ struct lru_zone* zone;
+
+ zone = twimap_index(map, struct lru_zone*);
+ twimap_printf(map, "%s, %d, %d, %d, %d, %d, ",
+ zone->name,
+ zone->objects,
+ zone->hotness,
+ zone->evict_stats.n_single,
+ zone->evict_stats.n_half,
+ zone->evict_stats.n_full);
+
+ if (zone->delayed_free) {
+ twimap_printf(map, "freeing %d attempts\n", zone->attempts);
+ }
+ else {
+ twimap_printf(map, "active\n");
+ }
+}
+
+static void
+read_lrulist_reset(struct twimap* map)
+{
+ map->index = container_of(&zone_lead, struct lru_zone, zones);
+ twimap_printf(map, "name, n_objs, hot, n_evt, n_half, n_full, status\n");
+}
+
+static int
+read_lrulist_next(struct twimap* map)
+{
+ struct lru_zone* zone;
+ struct llist_header* next;
+
+ zone = twimap_index(map, struct lru_zone*);
+ next = zone->zones.next;
+ if (next == &zone_lead) {
+ return false;
+ }
+
+ map->index = container_of(next, struct lru_zone, zones);
+ return true;
+}
+
+static void
+lru_pool_twimappable()
+{
+ struct twimap* map;
+
+ map = twifs_mapping(NULL, NULL, "lru_pool");
+ map->read = read_lrulist_entry;
+ map->reset = read_lrulist_reset;
+ map->go_next = read_lrulist_next;
+}
+EXPORT_TWIFS_PLUGIN(__lru_twimap, lru_pool_twimappable);
\ No newline at end of file
static void
lunad_do_usr() {
// No, these are not preemptive
- cpu_disable_interrupt();
+ no_preemption();
if (!mount_bootmedium() || !exec_initd()) {
fail("failed to initd");
thread (which is preemptive!)
*/
- cpu_enable_interrupt();
+ set_preemption();
while (1)
{
cleanup_detached_threads();
- sched_pass();
+ yield_current();
}
}
void
__cake_stat_reset(struct twimap* map)
{
- map->index = container_of(piles.next, struct cake_pile, piles);
+ map->index = container_of(&piles, struct cake_pile, piles);
+ twimap_printf(map, "name, n_cakes, pg/cake, slices/cake, n_slices\n");
}
void
map->reset = __cake_stat_reset;
map->go_next = __cake_stat_gonext;
map->read = __cake_rd_stat;
- __cake_stat_reset(map);
struct cake_pile *pos, *n;
llist_for_each(pos, n, &piles, piles)
#define CLASS_LEN(class) (sizeof(class) / sizeof(class[0]))
-static char piles_names[][PILE_NAME_MAXLEN] = {"valloc_8",
- "valloc_16",
- "valloc_32",
- "valloc_64",
- "valloc_128",
- "valloc_256",
- "valloc_512",
- "valloc_1k",
- "valloc_2k",
- "valloc_4k",
- "valloc_8k"};
-
-static char piles_names_dma[][PILE_NAME_MAXLEN] = {"valloc_dma_128",
- "valloc_dma_256",
- "valloc_dma_512",
- "valloc_dma_1k",
- "valloc_dma_2k",
- "valloc_dma_4k"};
+static char piles_names[][PILE_NAME_MAXLEN] =
+{
+ "valloc_8", "valloc_16", "valloc_32", "valloc_64",
+ "valloc_128", "valloc_256", "valloc_512", "valloc_1k",
+ "valloc_2k", "valloc_4k", "valloc_8k"
+};
+
+static char piles_names_dma[][PILE_NAME_MAXLEN] =
+{
+ "valloc_dma_128", "valloc_dma_256", "valloc_dma_512",
+ "valloc_dma_1k", "valloc_dma_2k", "valloc_dma_4k"
+};
static struct cake_pile* piles[CLASS_LEN(piles_names)];
static struct cake_pile* piles_dma[CLASS_LEN(piles_names_dma)];
size_t len,
size_t boffset)
{
- size_t i = ILOG2(size);
+ size_t i = ilog2(size);
i += (size - (1 << i) != 0);
i -= boffset;
vcalloc(unsigned int size, unsigned int count)
{
unsigned int alloc_size;
- if (umul_overflow(size, count, &alloc_size)) {
+ if (umul_of(size, count, &alloc_size)) {
return 0;
}
"process.c",
"taskfs.c",
"task_attr.c",
- "thread.c"
+ "thread.c",
+ "preemption.c",
+ "switch.c",
])
\ No newline at end of file
#include <lunaix/syscall.h>
#include <lunaix/syslog.h>
#include <lunaix/signal.h>
+#include <lunaix/kpreempt.h>
#include <sys/abi.h>
#include <sys/mm/mm_defs.h>
struct leaflet* leaflet;
ptr_t kstack_pn = pfn(current_thread->kstack);
- kstack_pn -= pfn(KSTACK_SIZE) - 1;
+ kstack_pn -= pfn(KSTACK_SIZE);
// copy the kernel stack
pte_t* src_ptep = mkptep_pn(VMS_SELF, kstack_pn);
pte_t* dest_ptep = mkptep_pn(vm_mnt, kstack_pn);
- for (size_t i = 0; i < pfn(KSTACK_SIZE); i++) {
+ for (size_t i = 0; i <= pfn(KSTACK_SIZE); i++) {
pte_t p = *src_ptep;
if (pte_isguardian(p)) {
pid_t
dup_proc()
{
- // FIXME need investigate: issue with fork, as well as pthread
- // especially when involving frequent alloc and dealloc ops
- // (could be issue in allocator's segregated free list)
+ no_preemption();
+
struct proc_info* pcb = alloc_process();
if (!pcb) {
syscall_result(ENOMEM);
--- /dev/null
+#include <lunaix/kpreempt.h>
+#include <lunaix/process.h>
+#include <lunaix/switch.h>
+#include <lunaix/syslog.h>
+#include <lunaix/trace.h>
+
+LOG_MODULE("preempt");
+
+#ifdef CONFIG_CHECK_STALL
+bool
+preempt_check_stalled(struct thread* th)
+{
+ // we can't access the hart state here
+ // as th might be in other address space
+
+ if (thread_flags_test(th, TH_STALLED))
+ {
+ // alrady stalled, no need to concern
+ return false;
+ }
+
+ struct thread_stats* stats;
+ stats = &th->stats;
+
+ if (!stats->kpreempt_count) {
+ return false;
+ }
+
+ if (stats->at_user) {
+ return false;
+ }
+
+#if defined(CONFIG_STALL_MAX_PREEMPTS) && CONFIG_STALL_MAX_PREEMPTS
+ if (stats->kpreempt_count > CONFIG_STALL_MAX_PREEMPTS) {
+ return true;
+ }
+#endif
+
+ ticks_t total_elapsed;
+ total_elapsed = thread_stats_kernel_elapse(th);
+
+ return total_elapsed > ticks_seconds(CONFIG_STALL_TIMEOUT);
+}
+
+#else
+bool
+preempt_check_stalled(struct thread* th)
+{
+ return false;
+}
+
+#endif
+
+void
+preempt_handle_stalled(struct signpost_result* result)
+{
+ if (!thread_flags_test(current_thread, TH_STALLED))
+ {
+ continue_switch(result);
+ return;
+ }
+
+ kprintf("+++++ ");
+ kprintf(" stalling detected (pid: %d, tid: %d)",
+ __current->pid, current_thread->tid);
+ kprintf(" (preempted: %d, elapsed: %dms)",
+ current_thread->stats.kpreempt_count,
+ thread_stats_kernel_elapse(current_thread));
+ kprintf(" are you keeping Luna too busy?");
+ kprintf("+++++ ");
+
+ kprintf("last known state:");
+ trace_dump_state(current_thread->hstate);
+
+ kprintf("trace from the point of stalling:");
+ trace_printstack_isr(current_thread->hstate);
+
+ kprintf("thread is blocked");
+
+ block_current_thread();
+ giveup_switch(result);
+}
\ No newline at end of file
set_current_executing(thread);
switch_context();
+
fail("unexpected return from switching");
}
cpu_enable_interrupt();
}
-int
+bool
can_schedule(struct thread* thread)
{
if (!thread) {
return 0;
}
+ if (proc_terminated(thread)) {
+ return false;
+ }
+
+ if (preempt_check_stalled(thread)) {
+ thread_flags_set(thread, TH_STALLED);
+ return true;
+ }
+
if (unlikely(kernel_process(thread->process))) {
// a kernel process is always runnable
return thread->state == PS_READY;
if ((thread->state & PS_PAUSED)) {
return !!(sh->sig_pending & ~1);
}
+
if ((thread->state & PS_BLOCKED)) {
return sigset_test(sh->sig_pending, _SIGINT);
}
// all other threads are also SIGSTOP (as per POSIX-2008.1)
// In which case, the entire process is stopped.
thread->state = PS_STOPPED;
- return 0;
+ return false;
}
+
if (sigset_test(sh->sig_pending, _SIGCONT)) {
thread->state = PS_READY;
}
assert(sched_ctx.ptable_len && sched_ctx.ttable_len);
// 上下文切换相当的敏感!我们不希望任何的中断打乱栈的顺序……
- cpu_disable_interrupt();
+ no_preemption();
if (!(current_thread->state & ~PS_RUNNING)) {
current_thread->state = PS_READY;
fail("unexpected return from scheduler");
}
-void
-sched_pass()
-{
- cpu_enable_interrupt();
- cpu_trap_sched();
-}
-
__DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
{
if (!seconds) {
}
wpid = wpid ? wpid : -__current->pgid;
+
repeat:
llist_for_each(proc, n, &__current->children, siblings)
{
return 0;
}
// 放弃当前的运行机会
- sched_pass();
+ yield_current();
goto repeat;
done:
if (status) {
- *status = proc->exit_code | status_flags;
+ *status = PEXITNUM(status_flags, proc->exit_code);
}
return destroy_process(proc->pid);
}
#include <lunaix/syscall.h>
#include <lunaix/syslog.h>
#include <lunaix/mm/valloc.h>
+#include <lunaix/switch.h>
+#include <lunaix/kpreempt.h>
#include <klibc/string.h>
}
// Referenced in kernel/asm/x86/interrupt.S
-void*
-signal_dispatch()
+void
+signal_dispatch(struct signpost_result* result)
{
+ continue_switch(result);
+
if (kernel_process(__current)) {
// signal is undefined under 'kernel process'
- return 0;
+ return;
}
if (!pending_sigs(current_thread)) {
// 没有待处理信号
- return 0;
+ return;
}
struct sigregistry* sigreg = __current->sigreg;
struct sigact* prev_working = active_signal(current_thread);
sigset_t mask = psig->sig_mask | (prev_working ? prev_working->sa_mask : 0);
- int sig_selected = 31 - clz(psig->sig_pending & ~mask);
+ int sig_selected = msbiti - clz(psig->sig_pending & ~mask);
sigset_clear(psig->sig_pending, sig_selected);
if (!sig_selected) {
// SIG0 is reserved
- return 0;
+ return;
}
struct sigact* action = sigreg->signals[sig_selected];
if (!action || !action->sa_actor) {
if (sigset_test(TERMSIG, sig_selected)) {
signal_terminate(sig_selected);
- schedule();
- // never return
+ giveup_switch(result);
}
- return 0;
+ return;
}
ptr_t ustack = current_thread->ustack_top;
ptr_t ustack_start = current_thread->ustack->start;
if ((int)(ustack - ustack_start) < (int)sizeof(struct proc_sig)) {
// 用户栈没有空间存放信号上下文
- return 0;
+ return;
}
struct proc_sig* sigframe =
sigactive_push(current_thread, sig_selected);
- return sigframe;
+ redirect_switch(result, __ptr(sigframe));
}
static inline void must_inline
__DEFINE_LXSYSCALL(int, pause)
{
pause_current_thread();
- sched_pass();
+ yield_current();
syscall_result(EINTR);
return -1;
sigctx->sig_mask = (*mask) & ~UNMASKABLE;
pause_current_thread();
- sched_pass();
+ yield_current();
sigctx->sig_mask = tmp;
return -1;
--- /dev/null
+#include <lunaix/switch.h>
+#include <lunaix/signal.h>
+#include <lunaix/sched.h>
+#include <lunaix/process.h>
+
+extern void
+signal_dispatch(struct signpost_result* result);
+
+extern void
+preempt_handle_stalled(struct signpost_result* result);
+
+#define do_signpost(fn, result) \
+ do { \
+ fn((result)); \
+ if ((result)->mode == SWITCH_MODE_FAST) { \
+ thread_stats_update_leaving(); \
+ return (result)->stack; \
+ } \
+ if ((result)->mode == SWITCH_MODE_GIVEUP) { \
+ schedule(); \
+ fail("unexpected return"); \
+ } \
+ } while (0)
+
+ptr_t
+switch_signposting()
+{
+ struct signpost_result result;
+
+ do_signpost(preempt_handle_stalled, &result);
+
+ do_signpost(signal_dispatch, &result);
+
+ thread_stats_update_leaving();
+
+ return 0;
+}
\ No newline at end of file
#include <lunaix/fs/taskfs.h>
#include <lunaix/fs/twimap.h>
+#include <lunaix/fs/api.h>
#include <lunaix/mm/valloc.h>
#include <lunaix/process.h>
#include <lunaix/sched.h>
{
struct v_inode* inode = file->inode;
pid_t pid = inode->id >> 16;
- int counter = 0;
+ unsigned int counter = 0;
if ((inode->id & COUNTER_MASK)) {
return ENOTDIR;
}
+ if (fsapi_handle_pseudo_dirent(file, dctx)) {
+ return 1;
+ }
+
if (pid) {
struct task_attribute *pos, *n;
llist_for_each(pos, n, &attributes, siblings)
{
- if (counter == dctx->index) {
+ if (counter == file->f_pos) {
dctx->read_complete_callback(
dctx, pos->key_val, VFS_NAME_MAXLEN, DT_FILE);
return 1;
struct proc_info *root = get_process(pid), *pos, *n;
llist_for_each(pos, n, &root->tasks, tasks)
{
- if (counter == dctx->index) {
+ if (counter == file->f_pos) {
ksnprintf(name, VFS_NAME_MAXLEN, "%d", pos->pid);
dctx->read_complete_callback(dctx, name, VFS_NAME_MAXLEN, DT_DIR);
return 1;
return taskfs_mknod(mount_point, 0, 0, VFS_IFDIR);
}
+int
+taskfs_unmount(struct v_superblock* vsb)
+{
+ return 0;
+}
+
void
taskfs_invalidate(pid_t pid)
{
void
taskfs_init()
{
- struct filesystem* taskfs = fsm_new_fs("taskfs", 5);
- taskfs->mount = taskfs_mount;
-
- fsm_register(taskfs);
+ struct filesystem* fs;
+ fs = fsapi_fs_declare("taskfs", FSTYPE_PSEUDO);
+
+ fsapi_fs_set_mntops(fs, taskfs_mount, taskfs_unmount);
+ fsapi_fs_finalise(fs);
attr_export_table = vcalloc(ATTR_TABLE_LEN, sizeof(struct hbucket));
#include <lunaix/mm/mmap.h>
#include <lunaix/mm/page.h>
#include <lunaix/syslog.h>
+#include <lunaix/kpreempt.h>
#include <usr/lunaix/threads.h>
static ptr_t
__alloc_kernel_thread_stack(struct proc_info* proc, ptr_t vm_mnt)
{
- pfn_t kstack_top = leaf_count(KSTACK_AREA_END);
+ pfn_t kstack_top = pfn(KSTACK_AREA_END);
pfn_t kstack_end = pfn(KSTACK_AREA);
pte_t* ptep = mkptep_pn(vm_mnt, kstack_top);
while (ptep_pfn(ptep) > kstack_end) {
- ptep -= KSTACK_PAGES + 1;
+ ptep -= KSTACK_PAGES;
- pte_t pte = pte_at(ptep + 1);
+ pte_t pte = pte_at(ptep);
if (pte_isnull(pte)) {
goto found;
}
+
+ ptep--;
}
WARN("failed to create kernel stack: max stack num reach\n");
return 0;
}
- set_pte(ptep, guard_pte);
- ptep_map_leaflet(ptep + 1, mkpte_prot(KERNEL_DATA), leaflet);
+ set_pte(ptep++, guard_pte);
+ ptep_map_leaflet(ptep, mkpte_prot(KERNEL_DATA), leaflet);
ptep += KSTACK_PAGES;
return align_stack(ptep_va(ptep, LFT_SIZE) - 1);
pte_t* ptep = mkptep_va(vm_mnt, thread->kstack);
leaflet = pte_leaflet(*ptep);
- ptep -= KSTACK_PAGES - 1;
+ ptep -= KSTACK_PAGES;
set_pte(ptep, null_pte);
ptep_unmap_leaflet(ptep + 1, leaflet);
return NULL;
}
+void
+thread_stats_update(bool inbound, bool voluntary)
+{
+ struct thread_stats* stats;
+ time_t now;
+
+ now = clock_systime();
+ stats = ¤t_thread->stats;
+
+ stats->at_user = !kernel_context(current_thread->hstate);
+
+ if (!inbound) {
+ if (kernel_process(current_thread->process) ||
+ stats->at_user)
+ {
+ // exiting to user or kernel (kernel thread only), how graceful
+ stats->last_leave = now;
+ }
+ else {
+ // exiting to kernel, effectively reentry
+ stats->last_reentry = now;
+ }
+
+ stats->last_resume = now;
+ return;
+ }
+
+ stats->last_reentry = now;
+
+ if (!stats->at_user)
+ {
+ // entering from kernel, it is a kernel preempt
+ thread_stats_update_kpreempt();
+ return;
+ }
+
+ // entering from user space, a clean entrance.
+
+ if (!voluntary) {
+ stats->entry_count_invol++;
+ }
+ else {
+ stats->entry_count_vol++;
+ }
+
+ thread_stats_reset_kpreempt();
+ stats->last_entry = now;
+}
+
__DEFINE_LXSYSCALL3(int, th_create, tid_t*, tid,
struct uthread_param*, thparam, void*, entry)
{
+ no_preemption();
+
struct thread* th = create_thread(__current, true);
if (!th) {
return EAGAIN;
}
while (!proc_terminated(th)) {
- sched_pass();
+ yield_current();
}
if (val_ptr) {
*val_ptr = (void*)th->exit_val;
}
+ no_preemption();
destory_thread(th);
return 0;
--- /dev/null
+#include <lunaix/process.h>
+#include <lunaix/kpreempt.h>
+
+typedef reg_t (*syscall_fn)(reg_t p1, reg_t p2, reg_t p3, reg_t p4, reg_t p5);
+
+reg_t
+dispatch_syscall(void* syscall_fnptr,
+ reg_t p1, reg_t p2, reg_t p3, reg_t p4, reg_t p5)
+{
+ reg_t ret_val;
+
+ thread_stats_update_entering(true);
+
+ set_preemption();
+ ret_val = ((syscall_fn)syscall_fnptr)(p1, p2, p3, p4, p5);
+ no_preemption();
+
+ return ret_val;
+}
\ No newline at end of file
if (sched_ticks_counter >= sched_ticks) {
sched_ticks_counter = 0;
+ thread_stats_update_entering(false);
schedule();
}
}
b++;
}
return 0;
+}
+
+int
+strneq(const char* a, const char* b, unsigned long n)
+{
+ while (n-- && *a == *b) {
+ if (!(*a)) {
+ return 1;
+ }
+
+ a++;
+ b++;
+ }
+ return !(n + 1);
}
\ No newline at end of file
return &dest[i];
}
+/**
+ * @brief strcpy with constrain on numbers of character.
+ * this version is smarter than stdc, it will automatically
+ * handle the null-terminator.
+ *
+ * @param dest
+ * @param src
+ * @param n
+ * @return char*
+ */
char* weak
strncpy(char* dest, const char* src, unsigned long n)
{
char c;
unsigned int i = 0;
- while ((c = src[i]) && i <= n)
+ while (i <= n && (c = src[i]))
dest[i++] = c;
- while (i <= n)
- dest[i++] = 0;
+
+ if (!(n < i && src[i - 1])) {
+ while (i <= n)
+ dest[i++] = 0;
+ }
+ else {
+ dest[i - 1] = 0;
+ }
+
return dest;
}
\ No newline at end of file
-v KIMG=build/lunaix.iso \
-v QMPORT=${hmp_port} \
-v GDB_PORT=${gdb_port} \
+ -v EXT2_TEST_DISC=machine/test_part.ext2 \
-v ARCH=${ARCH} &
QMPORT=${hmp_port} gdb build/bin/kernel.bin -ex "target remote localhost:${gdb_port}"
\ No newline at end of file
include $(mkinc_dir)/utils.mkinc
include $(mkinc_dir)/lunabuild.mkinc
+QEMU_HMP ?= 45454
ARCH ?= i386
MODE ?= debug
export ARCH
@cp -r usr/build/* $(os_img_dir)/usr
@cp -r $(kbin_dir)/* $(os_img_dir)/boot
@grub-mkrescue -o $(kimg) $(os_img_dir)\
- -- -volid "$(OS_ID) $(OS_VER)" -system_id "$(OS_NAME)" \
+ -- -volid "LUNA" -system_id "Lunaix" \
-report_about FAILURE -abort_on FAILURE
usr/build: user
@$(MAKE) -C usr clean -I $(mkinc_dir)
@$(MAKE) -f kernel.mk clean -I $(mkinc_dir)
@rm -rf $(kbuild_dir) || exit 1
- @rm -rf .builder || exit 1
\ No newline at end of file
+ @rm -rf .builder || exit 1
AR := $(CX_PREFIX)ar
LBUILD ?= $(shell realpath ./scripts/build-tools/luna_build.py)
-O := -O2
+
W := -Wall -Wextra -Werror \
-Wno-unknown-pragmas \
-Wno-unused-function \
CFLAGS := -std=gnu99 $(OFLAGS) $(W) -g
ifeq ($(MODE),debug)
- O = -Og
+ O := -Og
+else
+ O := -O2
endif
CFLAGS += $(O)
def has_config(self, name):
try:
- self.__env.lookup_value(name)
- return True
+ v = self.__env.lookup_value(name)
+ return not not v
except:
return False
\ No newline at end of file
import subprocess, time, os, re, argparse, json
from pathlib import PurePosixPath
+import logging
+
+logger = logging.getLogger("auto_qemu")
g_lookup = {}
d_ro = get_config(disk, "ro", default=False)
d_fmt = get_config(disk, "format", default="raw")
d_id = f"disk_{i}"
+
+ if not os.path.exists(d_img):
+ logger.warning(f"AHCI bus: {d_img} not exists, disk skipped")
+ continue
cmds += [
"-drive", join_attrs([
def get_qemu_general_opts(self):
return [
"-m", get_config(self._opt, "memory", required=True),
- "-smp", get_config(self._opt, "ncpu", default=1)
+ "-smp", str(get_config(self._opt, "ncpu", default=1))
]
def add_peripheral(self, peripheral):
qemu_path = os.path.join(qemu_dir_override, qemu_path)
cmds = [
qemu_path,
+ *self.get_qemu_general_opts(),
*self.get_qemu_arch_opts(),
*self.get_qemu_debug_opts()
]
q.start(arg_opt.qemu_dir)
if __name__ == "__main__":
- try:
- main()
- except Exception as e:
- print(e)
\ No newline at end of file
+ main()
\ No newline at end of file
{
"arch": "$ARCH",
"memory": "1G",
+ "ncpu": 1,
"machine": "q35",
"cpu": {
"type": "base",
"img": "$KIMG",
"ro": true,
"format": "raw"
+ },
+ {
+ "type": "ide-hd",
+ "img": "$EXT2_TEST_DISC",
+ "format": "raw"
}
]
},
"cat",
"stat",
"test_pthread",
- "maze"
+ "file_test",
+ "maze",
+ "mkdir",
+ "rm",
])
compile_opts([
return 1;
}
- if (!(stat.mode & F_MFILE)) {
+ if ((stat.mode & F_DIR)) {
printf("%s is a directory", argv[i]);
return 1;
}
do {
size = read(fd, buffer, BUFSIZE);
+ if (size < 0) {
+ printf("error while reading: %d\n", size);
+ break;
+ }
write(stdout, buffer, size);
} while (size == BUFSIZE);
--- /dev/null
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#define BUFSIZE 4096
+
+static char buffer[BUFSIZE];
+
+#define _open(f, o) \
+ ({ \
+ int __fd = open(f, o); \
+ if (__fd < 0) { \
+ printf("open failed: %s (error: %d)", f, errno); \
+ _exit(__fd); \
+ } \
+ __fd; \
+ })
+
+int
+main(int argc, const char* argv[])
+{
+ int fd = 0, fdrand = 0;
+ int size = 0, sz2 = 0;
+
+ fd = _open(argv[1], O_RDWR | O_CREAT);
+ fdrand = _open("/dev/rand", O_RDONLY);
+
+ for (int i = 0; i < 100; i++)
+ {
+ printf("write to file: (round) {%d}/100\n", i + 1);
+
+ size = read(fdrand, buffer, BUFSIZE);
+ printf(">>> read random chars: %d\n", size);
+ for (int i = 0; i < size; i++)
+ {
+ buffer[i] = (char)((unsigned char)(buffer[i] % 94U) + 33U);
+ }
+
+ sz2 += write(fd, buffer, size);
+ sz2 += write(fd, "\n\n", 2);
+ }
+
+ close(fd);
+ close(fdrand);
+
+ return 0;
+}
\ No newline at end of file
} \
} while (0)
+#define maybe_mount(src, target, fs, opts) \
+ do { \
+ int err = 0; \
+ if ((err = mount(src, target, fs, opts))) { \
+ syslog(2, "mount fs %s to %s failed (%d)\n", fs, target, errno); \
+ } \
+ } while (0)
+
#define check(statement) \
({ \
int err = 0; \
mkdir("/dev");
mkdir("/sys");
mkdir("/task");
+ mkdir("/mnt/disk");
must_mount(NULL, "/dev", "devfs", 0);
must_mount(NULL, "/sys", "twifs", MNT_RO);
must_mount(NULL, "/task", "taskfs", MNT_RO);
+ maybe_mount("/dev/block/sdb", "/mnt/disk", "ext2", 0);
int fd = check(open("/dev/tty", 0));
pid_t pid;
int err = 0;
if (!(pid = fork())) {
-
-
err = execve(sh_argv[0], sh_argv, sh_envp);
printf("fail to execute (%d)\n", errno);
_exit(err);
struct lx_dirent* _lxd = &dir->_lxd;
int more = sys_readdir(dir->dirfd, _lxd);
+
+ if (more < 0) {
+ return NULL;
+ }
_dirent.d_type = _lxd->d_type;
strncpy(_dirent.d_name, _lxd->d_name, 256);
}
}
+ int err = errno;
+ if (err) {
+ printf("failed: %d\n",err);
+ }
+
closedir(dir);
- return 0;
+ return err;
}
\ No newline at end of file
--- /dev/null
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+
+int
+main(int argc, const char* argv[])
+{
+ if (argc != 2) {
+ printf("expect a directory name\n");
+ return 1;
+ }
+
+ int err;
+
+ err = mkdir(argv[1]);
+ if (err) {
+ printf("unable to mkdir: %d\n", errno);
+ }
+
+ return err;
+}
\ No newline at end of file
--- /dev/null
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+int
+main(int argc, const char* argv[])
+{
+ if (argc != 2) {
+ printf("expect a file name\n");
+ return 1;
+ }
+
+ int err, fd;
+ char* path = argv[1];
+ struct file_stat stat;
+
+ fd = open(path, O_RDONLY);
+
+ if (fd < 0) {
+ printf("open failed: %s (error: %d)", path, fd);
+ return 1;
+ }
+
+ if (fstat(fd, &stat) < 0) {
+ printf("fail to get stat %d\n", errno);
+ return 1;
+ }
+
+ close(fd);
+
+ if ((stat.mode & F_DIR)) {
+ err = rmdir(path);
+ }
+ else {
+ err = unlink(path);
+ }
+
+ if (err) {
+ printf("fail to delete: %s (%d)", path, errno);
+ }
+
+ return err;
+}
\ No newline at end of file
void
sh_exec(const char** argv)
{
+ static int prev_exit;
const char* envp[] = { 0 };
char* name = argv[0];
if (!strcmp(name, "cd")) {
return;
}
+ if (!strcmp(name, "?")) {
+ printf("%d\n", prev_exit);
+ return;
+ }
+
+ char buffer[1024];
+ strcpy(buffer, "/usr/bin/");
+ strcpy(&buffer[9], name);
+
pid_t p;
+ int res;
if (!(p = fork())) {
- if (execve(name, argv, envp)) {
+ if (execve(buffer, argv, envp)) {
sh_printerr();
}
_exit(1);
}
setpgid(p, getpgid());
- waitpid(p, NULL, 0);
+ waitpid(p, &res, 0);
+
+ prev_exit = WEXITSTATUS(res);
}
static char*
int fd = open(argv[1], FO_RDONLY | FO_NOFOLLOW);
if (fd < 0) {
- printf("fail to open %d\n", fd);
+ printf("fail to open %d\n", errno);
return 1;
}
char* ftype = "directory";
int mode = stat.mode;
- if ((mode & F_MDEV)) {
- if (!((mode & F_SEQDEV) ^ F_SEQDEV)) {
- ftype = "sequential device";
- } else if (!((mode & F_VOLDEV) ^ F_VOLDEV)) {
+ if ((mode & F_DEV)) {
+ ftype = "mappable (sequential) device";
+
+ if (!((mode & F_SVDEV) ^ F_SVDEV)) {
ftype = "volumetric device";
- } else {
- ftype = "regular device";
}
- } else if ((mode & F_MSLNK)) {
+
+ } else if ((mode & F_SYMLINK)) {
if (readlinkat(fd, NULL, buf, 256) < 0) {
printf("fail to readlink %d\n", errno);
} else {
printf("-> %s", buf);
}
ftype = "symbolic link";
- } else if ((mode & F_MFILE)) {
+ } else if (mode == F_FILE) {
ftype = "regular file";
}
printf("Inode: %d; ", stat.st_ino);
dev_t* dev;
- if (!(stat.mode & F_MDEV)) {
+ if (!(stat.mode & F_DEV)) {
dev = &stat.st_dev;
} else {
dev = &stat.st_rdev;