lunaix-os/kernel/process/sched.c

   1 #include <arch/x86/interrupts.h>
   2 #include <arch/x86/tss.h>
   3
   4 #include <hal/apic.h>
   5 #include <hal/cpu.h>
   6
   7 #include <lunaix/fs/taskfs.h>
   8 #include <lunaix/mm/cake.h>
   9 #include <lunaix/mm/kalloc.h>
  10 #include <lunaix/mm/pmm.h>
  11 #include <lunaix/mm/valloc.h>
  12 #include <lunaix/mm/vmm.h>
  13 #include <lunaix/process.h>
  14 #include <lunaix/sched.h>
  15 #include <lunaix/signal.h>
  16 #include <lunaix/spike.h>
  17 #include <lunaix/status.h>
  18 #include <lunaix/syscall.h>
  19 #include <lunaix/syslog.h>
  20
  21 volatile struct proc_info* __current;
  22
  23 static struct proc_info dummy_proc;
  24
  25 struct proc_info dummy;
  26
  27 struct scheduler sched_ctx;
  28
  29 struct cake_pile* proc_pile;
  30
  31 LOG_MODULE("SCHED")
  32
  33 void
  34 sched_init_dummy();
  35
  36 void
  37 sched_init()
  38 {
  39     proc_pile = cake_new_pile("proc", sizeof(struct proc_info), 1, 0);
  40     cake_set_constructor(proc_pile, cake_ctor_zeroing);
  41
  42     sched_ctx = (struct scheduler){ ._procs = vzalloc(PROC_TABLE_SIZE),
  43                                     .ptable_len = 0,
  44                                     .procs_index = 0 };
  45
  46     // TODO initialize dummy_proc
  47     sched_init_dummy();
  48 }
  49
  50 #define DUMMY_STACK_SIZE 2048
  51
  52 void
  53 sched_init_dummy()
  54 {
  55     // This surely need to be simplified or encapsulated!
  56     // It is a living nightmare!
  57
  58     extern void my_dummy();
  59     static char dummy_stack[DUMMY_STACK_SIZE] __attribute__((aligned(16)));
  60
  61     // memset to 0
  62     dummy_proc = (struct proc_info){};
  63     dummy_proc.intr_ctx = (isr_param){
  64         .registers = { .ds = KDATA_SEG,
  65                        .es = KDATA_SEG,
  66                        .fs = KDATA_SEG,
  67                        .gs = KDATA_SEG,
  68                        .esp = (void*)dummy_stack + DUMMY_STACK_SIZE - 20 },
  69         .cs = KCODE_SEG,
  70         .eip = (void*)my_dummy,
  71         .ss = KDATA_SEG,
  72         .eflags = cpu_reflags() | 0x0200
  73     };
  74
  75     *(u32_t*)(&dummy_stack[DUMMY_STACK_SIZE - 4]) = dummy_proc.intr_ctx.eflags;
  76     *(u32_t*)(&dummy_stack[DUMMY_STACK_SIZE - 8]) = KCODE_SEG;
  77     *(u32_t*)(&dummy_stack[DUMMY_STACK_SIZE - 12]) = dummy_proc.intr_ctx.eip;
  78
  79     dummy_proc.page_table = cpu_rcr3();
  80     dummy_proc.state = PS_READY;
  81     dummy_proc.parent = &dummy_proc;
  82     dummy_proc.pid = KERNEL_PID;
  83
  84     __current = &dummy_proc;
  85 }
  86
  87 void
  88 run(struct proc_info* proc)
  89 {
  90     proc->state = PS_RUNNING;
  91
  92     /*
  93         将tss.esp0设置为上次调度前的esp值。
  94         当处理信号时，上下文信息是不会恢复的，而是保存在用户栈中，然后直接跳转进位于用户空间的sig_wrapper进行
  95           信号的处理。当用户自定义的信号处理函数返回时，sigreturn的系统调用才开始进行上下文的恢复（或者说是进行
  96           另一次调度。
  97         由于这中间没有进行地址空间的交换，所以第二次跳转使用的是同一个内核栈，而之前默认tss.esp0的值是永远指向最顶部
  98         这样一来就有可能会覆盖更早的上下文信息（比如嵌套的信号捕获函数）
  99     */
 100     tss_update_esp(proc->intr_ctx.registers.esp);
 101
 102     apic_done_servicing();
 103
 104     asm volatile("pushl %0\n"
 105                  "jmp switch_to\n" ::"r"(proc)
 106                  : "memory"); // kernel/asm/x86/interrupt.S
 107 }
 108
 109 int
 110 can_schedule(struct proc_info* proc)
 111 {
 112     if (__SIGTEST(proc->sig_pending, _SIGCONT)) {
 113         __SIGCLEAR(proc->sig_pending, _SIGSTOP);
 114     } else if (__SIGTEST(proc->sig_pending, _SIGSTOP)) {
 115         // 如果进程受到SIGSTOP，则该进程不给予调度。
 116         return 0;
 117     }
 118
 119     return 1;
 120 }
 121
 122 void
 123 check_sleepers()
 124 {
 125     struct proc_info* leader = sched_ctx._procs[0];
 126     struct proc_info *pos, *n;
 127     time_t now = clock_systime();
 128     llist_for_each(pos, n, &leader->sleep.sleepers, sleep.sleepers)
 129     {
 130         if (PROC_TERMINATED(pos->state)) {
 131             goto del;
 132         }
 133
 134         time_t wtime = pos->sleep.wakeup_time;
 135         time_t atime = pos->sleep.alarm_time;
 136
 137         if (wtime && now >= wtime) {
 138             pos->sleep.wakeup_time = 0;
 139             pos->state = PS_READY;
 140         }
 141
 142         if (atime && now >= atime) {
 143             pos->sleep.alarm_time = 0;
 144             __SIGSET(pos->sig_pending, _SIGALRM);
 145         }
 146
 147         if (!wtime && !atime) {
 148         del:
 149             llist_delete(&pos->sleep.sleepers);
 150         }
 151     }
 152 }
 153
 154 void
 155 schedule()
 156 {
 157     if (!sched_ctx.ptable_len) {
 158         return;
 159     }
 160
 161     // 上下文切换相当的敏感！我们不希望任何的中断打乱栈的顺序……
 162     cpu_disable_interrupt();
 163     struct proc_info* next;
 164     int prev_ptr = sched_ctx.procs_index;
 165     int ptr = prev_ptr;
 166
 167     if (!(__current->state & ~PS_RUNNING)) {
 168         __current->state = PS_READY;
 169     }
 170
 171     check_sleepers();
 172
 173     // round-robin scheduler
 174 redo:
 175     do {
 176         ptr = (ptr + 1) % sched_ctx.ptable_len;
 177         next = sched_ctx._procs[ptr];
 178     } while (!next || (next->state != PS_READY && ptr != prev_ptr));
 179
 180     sched_ctx.procs_index = ptr;
 181
 182     if (next->state != PS_READY) {
 183         // schedule the dummy process if we're out of choice
 184         next = &dummy_proc;
 185         goto done;
 186     }
 187
 188     if (!can_schedule(next)) {
 189         // 如果该进程不给予调度，则尝试重新选择
 190         goto redo;
 191     }
 192
 193 done:
 194     run(next);
 195 }
 196
 197 void
 198 sched_yieldk()
 199 {
 200     cpu_enable_interrupt();
 201     cpu_int(LUNAIX_SCHED);
 202 }
 203
 204 __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
 205 {
 206     if (!seconds) {
 207         return 0;
 208     }
 209
 210     if (__current->sleep.wakeup_time) {
 211         return (__current->sleep.wakeup_time - clock_systime()) / 1000U;
 212     }
 213
 214     struct proc_info* root_proc = sched_ctx._procs[0];
 215     __current->sleep.wakeup_time = clock_systime() + seconds * 1000;
 216
 217     if (llist_empty(&__current->sleep.sleepers)) {
 218         llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
 219     }
 220
 221     __current->intr_ctx.registers.eax = seconds;
 222
 223     block_current();
 224     schedule();
 225 }
 226
 227 __DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds)
 228 {
 229     time_t prev_ddl = __current->sleep.alarm_time;
 230     time_t now = clock_systime();
 231
 232     __current->sleep.alarm_time = seconds ? now + seconds * 1000 : 0;
 233
 234     struct proc_info* root_proc = sched_ctx._procs[0];
 235     if (llist_empty(&__current->sleep.sleepers)) {
 236         llist_append(&root_proc->sleep.sleepers, &__current->sleep.sleepers);
 237     }
 238
 239     return prev_ddl ? (prev_ddl - now) / 1000 : 0;
 240 }
 241
 242 __DEFINE_LXSYSCALL1(void, exit, int, status)
 243 {
 244     terminate_proc(status);
 245     schedule();
 246 }
 247
 248 __DEFINE_LXSYSCALL(void, yield)
 249 {
 250     schedule();
 251 }
 252
 253 pid_t
 254 _wait(pid_t wpid, int* status, int options);
 255
 256 __DEFINE_LXSYSCALL1(pid_t, wait, int*, status)
 257 {
 258     return _wait(-1, status, 0);
 259 }
 260
 261 __DEFINE_LXSYSCALL3(pid_t, waitpid, pid_t, pid, int*, status, int, options)
 262 {
 263     return _wait(pid, status, options);
 264 }
 265
 266 __DEFINE_LXSYSCALL(int, geterrno)
 267 {
 268     return __current->k_status;
 269 }
 270
 271 pid_t
 272 _wait(pid_t wpid, int* status, int options)
 273 {
 274     pid_t cur = __current->pid;
 275     int status_flags = 0;
 276     struct proc_info *proc, *n;
 277     if (llist_empty(&__current->children)) {
 278         return -1;
 279     }
 280
 281     wpid = wpid ? wpid : -__current->pgid;
 282 repeat:
 283     llist_for_each(proc, n, &__current->children, siblings)
 284     {
 285         if (!~wpid || proc->pid == wpid || proc->pgid == -wpid) {
 286             if (proc->state == PS_TERMNAT && !options) {
 287                 status_flags |= PEXITTERM;
 288                 goto done;
 289             }
 290             if (proc->state == PS_READY && (options & WUNTRACED)) {
 291                 status_flags |= PEXITSTOP;
 292                 goto done;
 293             }
 294         }
 295     }
 296     if ((options & WNOHANG)) {
 297         return 0;
 298     }
 299     // 放弃当前的运行机会
 300     sched_yieldk();
 301     goto repeat;
 302
 303 done:
 304     status_flags |= PEXITSIG * (proc->sig_inprogress != 0);
 305     if (status) {
 306         *status = proc->exit_code | status_flags;
 307     }
 308     return destroy_process(proc->pid);
 309 }
 310
 311 struct proc_info*
 312 alloc_process()
 313 {
 314     pid_t i = 0;
 315     for (; i < sched_ctx.ptable_len && sched_ctx._procs[i]; i++)
 316         ;
 317
 318     if (i == MAX_PROCESS) {
 319         panick("Panic in Ponyville shimmer!");
 320     }
 321
 322     if (i == sched_ctx.ptable_len) {
 323         sched_ctx.ptable_len++;
 324     }
 325
 326     struct proc_info* proc = cake_grab(proc_pile);
 327
 328     proc->state = PS_CREATED;
 329     proc->pid = i;
 330     proc->created = clock_systime();
 331     proc->pgid = proc->pid;
 332     proc->fdtable = vzalloc(sizeof(struct v_fdtable));
 333     proc->fxstate =
 334       vzalloc_dma(512); // FXSAVE需要十六位对齐地址，使用DMA块（128位对齐）
 335
 336     llist_init_head(&proc->mm.regions.head);
 337     llist_init_head(&proc->tasks);
 338     llist_init_head(&proc->children);
 339     llist_init_head(&proc->grp_member);
 340     llist_init_head(&proc->sleep.sleepers);
 341     waitq_init(&proc->waitqueue);
 342
 343     sched_ctx._procs[i] = proc;
 344
 345     return proc;
 346 }
 347
 348 void
 349 commit_process(struct proc_info* process)
 350 {
 351     assert(process == sched_ctx._procs[process->pid]);
 352
 353     if (process->state != PS_CREATED) {
 354         __current->k_status = EINVAL;
 355         return;
 356     }
 357
 358     // every process is the child of first process (pid=1)
 359     if (!process->parent) {
 360         process->parent = sched_ctx._procs[1];
 361     }
 362
 363     llist_append(&process->parent->children, &process->siblings);
 364     llist_append(&sched_ctx._procs[0]->tasks, &process->tasks);
 365
 366     process->state = PS_READY;
 367 }
 368
 369 // from <kernel/process.c>
 370 extern void
 371 __del_pagetable(pid_t pid, uintptr_t mount_point);
 372
 373 pid_t
 374 destroy_process(pid_t pid)
 375 {
 376     int index = pid;
 377     if (index <= 0 || index > sched_ctx.ptable_len) {
 378         __current->k_status = EINVAL;
 379         return;
 380     }
 381     struct proc_info* proc = sched_ctx._procs[index];
 382     sched_ctx._procs[index] = 0;
 383
 384     llist_delete(&proc->siblings);
 385     llist_delete(&proc->grp_member);
 386     llist_delete(&proc->tasks);
 387     llist_delete(&proc->sleep.sleepers);
 388
 389     taskfs_invalidate(pid);
 390
 391     if (proc->cwd) {
 392         vfs_unref_dnode(proc->cwd);
 393     }
 394
 395     for (size_t i = 0; i < VFS_MAX_FD; i++) {
 396         struct v_fd* fd = proc->fdtable->fds[i];
 397         if (fd) {
 398             vfs_pclose(fd->file, pid);
 399             vfs_free_fd(fd);
 400         }
 401     }
 402
 403     vfree(proc->fdtable);
 404     vfree_dma(proc->fxstate);
 405
 406     struct mm_region *pos, *n;
 407     llist_for_each(pos, n, &proc->mm.regions.head, head)
 408     {
 409         vfree(pos);
 410     }
 411
 412     vmm_mount_pd(PD_MOUNT_1, proc->page_table);
 413
 414     __del_pagetable(pid, PD_MOUNT_1);
 415
 416     vmm_unmount_pd(PD_MOUNT_1);
 417
 418     cake_release(proc_pile, proc);
 419
 420     return pid;
 421 }
 422
 423 void
 424 terminate_proc(int exit_code)
 425 {
 426     __current->state = PS_TERMNAT;
 427     __current->exit_code = exit_code;
 428
 429     __SIGSET(__current->parent->sig_pending, _SIGCHLD);
 430 }
 431
 432 struct proc_info*
 433 get_process(pid_t pid)
 434 {
 435     int index = pid;
 436     if (index < 0 || index > sched_ctx.ptable_len) {
 437         return NULL;
 438     }
 439     return sched_ctx._procs[index];
 440 }
 441
 442 int
 443 orphaned_proc(pid_t pid)
 444 {
 445     if (!pid)
 446         return 0;
 447     if (pid >= sched_ctx.ptable_len)
 448         return 0;
 449     struct proc_info* proc = sched_ctx._procs[pid];
 450     struct proc_info* parent = proc->parent;
 451
 452     // 如果其父进程的状态是terminated 或 destroy中的一种
 453     // 或者其父进程是在该进程之后创建的，那么该进程为孤儿进程
 454     return PROC_TERMINATED(parent->state) || parent->created > proc->created;
 455 }