lunaix-os/kernel/process/sched.c

   1 #include <sys/abi.h>
   2 #include <sys/mm/mempart.h>
   3
   4 #include <hal/intc.h>
   5 #include <sys/cpu.h>
   6
   7 #include <lunaix/fs/taskfs.h>
   8 #include <lunaix/mm/cake.h>
   9 #include <lunaix/mm/mmap.h>
  10 #include <lunaix/mm/pmm.h>
  11 #include <lunaix/mm/valloc.h>
  12 #include <lunaix/mm/vmm.h>
  13 #include <lunaix/mm/procvm.h>
  14 #include <lunaix/process.h>
  15 #include <lunaix/sched.h>
  16 #include <lunaix/signal.h>
  17 #include <lunaix/spike.h>
  18 #include <lunaix/status.h>
  19 #include <lunaix/syscall.h>
  20 #include <lunaix/syslog.h>
  21 #include <lunaix/pcontext.h>
  22 #include <lunaix/kpreempt.h>
  23
  24 #include <klibc/string.h>
  25
  26 struct thread empty_thread_obj;
  27
  28 volatile struct proc_info* __current;
  29 volatile struct thread* current_thread = &empty_thread_obj;
  30
  31 struct scheduler sched_ctx;
  32
  33 struct cake_pile *proc_pile ,*thread_pile;
  34
  35 LOG_MODULE("SCHED")
  36
  37 void
  38 sched_init()
  39 {
  40     proc_pile = cake_new_pile("proc", sizeof(struct proc_info), 1, 0);
  41     thread_pile = cake_new_pile("thread", sizeof(struct thread), 1, 0);
  42     cake_set_constructor(proc_pile, cake_ctor_zeroing);
  43     cake_set_constructor(thread_pile, cake_ctor_zeroing);
  44
  45     sched_ctx = (struct scheduler){
  46         .procs = vzalloc(PROC_TABLE_SIZE), .ptable_len = 0, .procs_index = 0};
  47
  48     llist_init_head(&sched_ctx.sleepers);
  49 }
  50
  51 void
  52 run(struct thread* thread)
  53 {
  54     thread->state = PS_RUNNING;
  55     thread->process->state = PS_RUNNING;
  56     thread->process->th_active = thread;
  57
  58     procvm_mount_self(vmspace(thread->process));
  59     set_current_executing(thread);
  60
  61     switch_context();
  62     fail("unexpected return from switching");
  63 }
  64
  65 /*
  66     Currently, we do not allow self-destorying thread, doing
  67     so will eliminate current kernel stack which is disaster.
  68     A compromise solution is to perform a regular scan and
  69     clean-up on these thread, in the preemptible kernel thread.
  70 */
  71
  72 void _preemptible
  73 cleanup_detached_threads() {
  74     ensure_preempt_caller();
  75
  76     // XXX may be a lock on sched_context will ben the most appropriate?
  77     cpu_disable_interrupt();
  78
  79     int i = 0;
  80     struct thread *pos, *n;
  81     llist_for_each(pos, n, sched_ctx.threads, sched_sibs) {
  82         if (likely(!proc_terminated(pos) || !thread_detached(pos))) {
  83             continue;
  84         }
  85
  86         struct proc_mm* mm = vmspace(pos->process);
  87
  88         procvm_mount(mm);
  89         destory_thread(pos);
  90         procvm_unmount(mm);
  91
  92         i++;
  93     }
  94
  95     if (i) {
  96         INFO("cleaned %d terminated detached thread(s)", i);
  97     }
  98
  99     cpu_enable_interrupt();
 100 }
 101
 102 int
 103 can_schedule(struct thread* thread)
 104 {
 105     if (!thread) {
 106         return 0;
 107     }
 108
 109     if (unlikely(kernel_process(thread->process))) {
 110         // a kernel process is always runnable
 111         return thread->state == PS_READY;
 112     }
 113
 114     struct sigctx* sh = &thread->sigctx;
 115
 116     if ((thread->state & PS_PAUSED)) {
 117         return !!(sh->sig_pending & ~1);
 118     }
 119     if ((thread->state & PS_BLOCKED)) {
 120         return sigset_test(sh->sig_pending, _SIGINT);
 121     }
 122
 123     if (sigset_test(sh->sig_pending, _SIGSTOP)) {
 124         // If one thread is experiencing SIGSTOP, then we know
 125         // all other threads are also SIGSTOP (as per POSIX-2008.1)
 126         // In which case, the entire process is stopped.
 127         thread->state = PS_STOPPED;
 128         return 0;
 129     }
 130     if (sigset_test(sh->sig_pending, _SIGCONT)) {
 131         thread->state = PS_READY;
 132     }
 133
 134     return (thread->state == PS_READY) \
 135             && proc_runnable(thread->process);
 136 }
 137
 138 void
 139 check_sleepers()
 140 {
 141     struct thread *pos, *n;
 142     time_t now = clock_systime() / 1000;
 143
 144     llist_for_each(pos, n, &sched_ctx.sleepers, sleep.sleepers)
 145     {
 146         if (proc_terminated(pos)) {
 147             goto del;
 148         }
 149
 150         time_t wtime = pos->sleep.wakeup_time;
 151         time_t atime = pos->sleep.alarm_time;
 152
 153         if (wtime && now >= wtime) {
 154             pos->sleep.wakeup_time = 0;
 155             pos->state = PS_READY;
 156         }
 157
 158         if (atime && now >= atime) {
 159             pos->sleep.alarm_time = 0;
 160             thread_setsignal(pos, _SIGALRM);
 161         }
 162
 163         if (!wtime && !atime) {
 164         del:
 165             llist_delete(&pos->sleep.sleepers);
 166         }
 167     }
 168 }
 169
 170 void
 171 schedule()
 172 {
 173     assert(sched_ctx.ptable_len && sched_ctx.ttable_len);
 174
 175     // 上下文切换相当的敏感！我们不希望任何的中断打乱栈的顺序……
 176     cpu_disable_interrupt();
 177
 178     if (!(current_thread->state & ~PS_RUNNING)) {
 179         current_thread->state = PS_READY;
 180         __current->state = PS_READY;
 181
 182     }
 183
 184     procvm_unmount_self(vmspace(__current));
 185     check_sleepers();
 186
 187     // round-robin scheduler
 188
 189     struct thread* current = current_thread;
 190     struct thread* to_check = current;
 191
 192     do {
 193         to_check = list_next(to_check, struct thread, sched_sibs);
 194
 195         if (can_schedule(to_check)) {
 196             break;
 197         }
 198
 199         if (to_check == current) {
 200             // FIXME do something less leathal here
 201             fail("Ran out of threads!")
 202             goto done;
 203         }
 204
 205     } while (1);
 206
 207     sched_ctx.procs_index = to_check->process->pid;
 208
 209 done:
 210     intc_notify_eos(0);
 211     run(to_check);
 212
 213     fail("unexpected return from scheduler");
 214 }
 215
 216 void
 217 sched_pass()
 218 {
 219     cpu_enable_interrupt();
 220     cpu_trap_sched();
 221 }
 222
 223 __DEFINE_LXSYSCALL1(unsigned int, sleep, unsigned int, seconds)
 224 {
 225     if (!seconds) {
 226         return 0;
 227     }
 228
 229     time_t systime = clock_systime() / 1000;
 230     struct haybed* bed = &current_thread->sleep;
 231
 232     if (bed->wakeup_time) {
 233         return (bed->wakeup_time - systime);
 234     }
 235
 236     bed->wakeup_time = systime + seconds;
 237
 238     if (llist_empty(&bed->sleepers)) {
 239         llist_append(&sched_ctx.sleepers, &bed->sleepers);
 240     }
 241
 242     store_retval(seconds);
 243
 244     block_current_thread();
 245     schedule();
 246
 247     return 0;
 248 }
 249
 250 __DEFINE_LXSYSCALL1(unsigned int, alarm, unsigned int, seconds)
 251 {
 252     struct haybed* bed = &current_thread->sleep;
 253     time_t prev_ddl = bed->alarm_time;
 254     time_t now = clock_systime() / 1000;
 255
 256     bed->alarm_time = seconds ? now + seconds : 0;
 257
 258     struct proc_info* root_proc = sched_ctx.procs[0];
 259     if (llist_empty(&bed->sleepers)) {
 260         llist_append(&sched_ctx.sleepers, &bed->sleepers);
 261     }
 262
 263     return prev_ddl ? (prev_ddl - now) : 0;
 264 }
 265
 266 __DEFINE_LXSYSCALL1(void, exit, int, status)
 267 {
 268     terminate_current(status);
 269     schedule();
 270 }
 271
 272 __DEFINE_LXSYSCALL(void, yield)
 273 {
 274     schedule();
 275 }
 276
 277 pid_t
 278 _wait(pid_t wpid, int* status, int options);
 279
 280 __DEFINE_LXSYSCALL1(pid_t, wait, int*, status)
 281 {
 282     return _wait(-1, status, 0);
 283 }
 284
 285 __DEFINE_LXSYSCALL3(pid_t, waitpid, pid_t, pid, int*, status, int, options)
 286 {
 287     return _wait(pid, status, options);
 288 }
 289
 290 __DEFINE_LXSYSCALL(int, geterrno)
 291 {
 292     return current_thread->syscall_ret;
 293 }
 294
 295 pid_t
 296 _wait(pid_t wpid, int* status, int options)
 297 {
 298     pid_t cur = __current->pid;
 299     int status_flags = 0;
 300     struct proc_info *proc, *n;
 301     if (llist_empty(&__current->children)) {
 302         return -1;
 303     }
 304
 305     wpid = wpid ? wpid : -__current->pgid;
 306 repeat:
 307     llist_for_each(proc, n, &__current->children, siblings)
 308     {
 309         if (!~wpid || proc->pid == wpid || proc->pgid == -wpid) {
 310             if (proc->state == PS_TERMNAT && !options) {
 311                 status_flags |= PEXITTERM;
 312                 goto done;
 313             }
 314             if (proc->state == PS_READY && (options & WUNTRACED)) {
 315                 status_flags |= PEXITSTOP;
 316                 goto done;
 317             }
 318         }
 319     }
 320     if ((options & WNOHANG)) {
 321         return 0;
 322     }
 323     // 放弃当前的运行机会
 324     sched_pass();
 325     goto repeat;
 326
 327 done:
 328     if (status) {
 329         *status = proc->exit_code | status_flags;
 330     }
 331     return destroy_process(proc->pid);
 332 }
 333
 334 static inline pid_t
 335 get_free_pid() {
 336     pid_t i = 0;
 337
 338     for (; i < sched_ctx.ptable_len && sched_ctx.procs[i]; i++)
 339         ;
 340
 341     if (unlikely(i == MAX_PROCESS)) {
 342         panick("Panic in Ponyville shimmer!");
 343     }
 344
 345     return i;
 346 }
 347
 348 struct thread*
 349 alloc_thread(struct proc_info* process) {
 350     if (process->thread_count >= MAX_THREAD_PP) {
 351         return NULL;
 352     }
 353
 354     struct thread* th = cake_grab(thread_pile);
 355
 356     th->process = process;
 357     th->created = clock_systime();
 358
 359     // FIXME we need a better tid allocation method!
 360     th->tid = th->created;
 361     th->tid = (th->created ^ ((ptr_t)th)) % MAX_THREAD_PP;
 362
 363     th->state = PS_CREATED;
 364
 365     llist_init_head(&th->sleep.sleepers);
 366     llist_init_head(&th->sched_sibs);
 367     llist_init_head(&th->proc_sibs);
 368     waitq_init(&th->waitqueue);
 369
 370     return th;
 371 }
 372
 373 struct proc_info*
 374 alloc_process()
 375 {
 376     pid_t i = get_free_pid();
 377
 378     if (i == sched_ctx.ptable_len) {
 379         sched_ctx.ptable_len++;
 380     }
 381
 382     struct proc_info* proc = cake_grab(proc_pile);
 383     if (!proc) {
 384         return NULL;
 385     }
 386
 387     proc->state = PS_CREATED;
 388     proc->pid = i;
 389     proc->created = clock_systime();
 390     proc->pgid = proc->pid;
 391
 392     proc->sigreg = vzalloc(sizeof(struct sigregister));
 393     proc->fdtable = vzalloc(sizeof(struct v_fdtable));
 394
 395     proc->mm = procvm_create(proc);
 396
 397     llist_init_head(&proc->tasks);
 398     llist_init_head(&proc->children);
 399     llist_init_head(&proc->grp_member);
 400     llist_init_head(&proc->threads);
 401
 402     iopoll_init(&proc->pollctx);
 403
 404     sched_ctx.procs[i] = proc;
 405
 406     return proc;
 407 }
 408
 409 void
 410 commit_thread(struct thread* thread) {
 411     struct proc_info* process = thread->process;
 412
 413     assert(process && !proc_terminated(process));
 414
 415     llist_append(&process->threads, &thread->proc_sibs);
 416
 417     if (sched_ctx.threads) {
 418         llist_append(sched_ctx.threads, &thread->sched_sibs);
 419     } else {
 420         sched_ctx.threads = &thread->sched_sibs;
 421     }
 422
 423     sched_ctx.ttable_len++;
 424     process->thread_count++;
 425     thread->state = PS_READY;
 426 }
 427
 428 void
 429 commit_process(struct proc_info* process)
 430 {
 431     assert(process == sched_ctx.procs[process->pid]);
 432     assert(process->state == PS_CREATED);
 433
 434     // every process is the child of first process (pid=1)
 435     if (!process->parent) {
 436         if (likely(!kernel_process(process))) {
 437             process->parent = sched_ctx.procs[1];
 438         } else {
 439             process->parent = process;
 440         }
 441     } else {
 442         assert(!proc_terminated(process->parent));
 443     }
 444
 445     if (sched_ctx.proc_list) {
 446         llist_append(sched_ctx.proc_list, &process->tasks);
 447     } else {
 448         sched_ctx.proc_list = &process->tasks;
 449     }
 450
 451     llist_append(&process->parent->children, &process->siblings);
 452
 453     process->state = PS_READY;
 454 }
 455
 456 void
 457 destory_thread(struct thread* thread)
 458 {
 459     cake_ensure_valid(thread);
 460
 461     struct proc_info* proc = thread->process;
 462
 463     llist_delete(&thread->sched_sibs);
 464     llist_delete(&thread->proc_sibs);
 465     llist_delete(&thread->sleep.sleepers);
 466     waitq_cancel_wait(&thread->waitqueue);
 467
 468     thread_release_mem(thread);
 469
 470     proc->thread_count--;
 471     sched_ctx.ttable_len--;
 472
 473     cake_release(thread_pile, thread);
 474 }
 475
 476 void
 477 delete_process(struct proc_info* proc)
 478 {
 479     pid_t pid = proc->pid;
 480     struct proc_mm* mm = vmspace(proc);
 481
 482     assert(pid);    // long live the pid0 !!
 483
 484     sched_ctx.procs[pid] = NULL;
 485
 486     llist_delete(&proc->siblings);
 487     llist_delete(&proc->grp_member);
 488     llist_delete(&proc->tasks);
 489
 490     iopoll_free(proc);
 491
 492     taskfs_invalidate(pid);
 493
 494     if (proc->cwd) {
 495         vfs_unref_dnode(proc->cwd);
 496     }
 497
 498     if (proc->cmd) {
 499         vfree(proc->cmd);
 500     }
 501
 502     for (size_t i = 0; i < VFS_MAX_FD; i++) {
 503         struct v_fd* fd = proc->fdtable->fds[i];
 504         if (fd) {
 505             vfs_pclose(fd->file, pid);
 506             vfs_free_fd(fd);
 507         }
 508     }
 509
 510     vfree(proc->fdtable);
 511
 512     signal_free_registers(proc->sigreg);
 513
 514     procvm_mount(mm);
 515
 516     struct thread *pos, *n;
 517     llist_for_each(pos, n, &proc->threads, proc_sibs) {
 518         // terminate and destory all thread unconditionally
 519         destory_thread(pos);
 520     }
 521
 522     procvm_unmount_release(mm);
 523
 524     cake_release(proc_pile, proc);
 525 }
 526
 527 pid_t
 528 destroy_process(pid_t pid)
 529 {
 530     int index = pid;
 531     if (index <= 0 || index > sched_ctx.ptable_len) {
 532         syscall_result(EINVAL);
 533         return -1;
 534     }
 535
 536     struct proc_info* proc = sched_ctx.procs[index];
 537     delete_process(proc);
 538
 539     return pid;
 540 }
 541
 542 static void
 543 terminate_proc_only(struct proc_info* proc, int exit_code) {
 544     proc->state = PS_TERMNAT;
 545     proc->exit_code = exit_code;
 546
 547     proc_setsignal(proc->parent, _SIGCHLD);
 548 }
 549
 550 void
 551 terminate_thread(struct thread* thread, ptr_t val) {
 552     thread->exit_val = val;
 553     thread->state = PS_TERMNAT;
 554
 555     struct proc_info* proc = thread->process;
 556     if (proc->thread_count == 1) {
 557         terminate_proc_only(thread->process, 0);
 558     }
 559 }
 560
 561 void
 562 terminate_current_thread(ptr_t val) {
 563     terminate_thread(current_thread, val);
 564 }
 565
 566 void
 567 terminate_proccess(struct proc_info* proc, int exit_code) {
 568     assert(!kernel_process(proc));
 569
 570     if (proc->pid == 1) {
 571         panick("Attempt to kill init");
 572     }
 573
 574     terminate_proc_only(proc, exit_code);
 575
 576     struct thread *pos, *n;
 577     llist_for_each(pos, n, &__current->threads, proc_sibs) {
 578         pos->state = PS_TERMNAT;
 579     }
 580 }
 581
 582 void
 583 terminate_current(int exit_code)
 584 {
 585     terminate_proccess(__current, exit_code);
 586 }
 587
 588 struct proc_info*
 589 get_process(pid_t pid)
 590 {
 591     int index = pid;
 592     if (index < 0 || index > sched_ctx.ptable_len) {
 593         return NULL;
 594     }
 595     return sched_ctx.procs[index];
 596 }
 597
 598 int
 599 orphaned_proc(pid_t pid)
 600 {
 601     if (!pid)
 602         return 0;
 603     if (pid >= sched_ctx.ptable_len)
 604         return 0;
 605     struct proc_info* proc = sched_ctx.procs[pid];
 606     struct proc_info* parent = proc->parent;
 607
 608     // 如果其父进程的状态是terminated 或 destroy中的一种
 609     // 或者其父进程是在该进程之后创建的，那么该进程为孤儿进程
 610     return proc_terminated(parent) || parent->created > proc->created;
 611 }