慢慢来吧~~
支持定时器和信号
流程图:
源码:
void schedule(void) { int i,next,c; struct task_struct ** p; /* check alarm, wake up any interruptible tasks that have got a signal */ for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) { if ((*p)->alarm && (*p)->alarm < jiffies) { (*p)->signal |= (1<<(SIGALRM-1)); (*p)->alarm = 0; } if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) && (*p)->state==TASK_INTERRUPTIBLE) (*p)->state=TASK_RUNNING; } /* this is the scheduler proper: */ while (1) { c = -1; next = 0; i = NR_TASKS; p = &task[NR_TASKS]; while (--i) { if (!*--p) continue; if ((*p)->state == TASK_RUNNING && (*p)->counter > c) c = (*p)->counter, next = i; } if (c) break; for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) (*p)->counter = ((*p)->counter >> 1) + (*p)->priority; } switch_to(next); }
重新计算时间片:
time = oldtime / 2 + priority
1. pause , 暂时放弃CPU . 可打断.
int sys_pause(void) { current->state = TASK_INTERRUPTIBLE; schedule(); return 0; }
void sleep_on(struct task_struct **p) { struct task_struct *tmp; if (!p) return; if (current == &(init_task.task)) panic("task[0] trying to sleep"); tmp = *p; *p = current; current->state = TASK_UNINTERRUPTIBLE; schedule(); if (tmp) tmp->state=0; }3 . 可打断睡眠 , 多用于多进程等待同一个资源的时候. 可以形成等待队列.
void interruptible_sleep_on(struct task_struct **p) { struct task_struct *tmp; if (!p) return; if (current == &(init_task.task)) panic("task[0] trying to sleep"); tmp=*p; *p=current; repeat: current->state = TASK_INTERRUPTIBLE; schedule(); if (*p && *p != current) { (**p).state=0; goto repeat; } *p=NULL; if (tmp) tmp->state=0; }
void wake_up(struct task_struct **p) { if (p && *p) { (**p).state=0; *p=NULL; } }
5. nice . 降低优先级. 更愿意出让CPU . 但是设置的increment 必须不大于当前的优先级数值.
int sys_nice(long increment) { if (current->priority-increment>0) current->priority -= increment; return 0; }
reschedule: pushl $ret_from_sys_call jmp _schedule _system_call: cmpl $nr_system_calls-1,%eax # 检查系统调用号 ja bad_sys_call ... # 各种压栈 call _sys_call_table(,%eax,4) # 调用对应接口 pushl %eax movl _current,%eax # 拿到scheduel后的进程结构体指针 cmpl $0,state(%eax) # 如果不是运行态 就回去重新调度 jne reschedule cmpl $0,counter(%eax) # 如果时间片刚好消耗没了, 就回去重新调度 je reschedule
保存进程指针的数组. 所有的进程保存在一个固定大小的数组中,所以Linux系统支持的最大进程数目是固定的.
进程之间有指针组成链表.
struct task_struct * task[NR_TASKS] = {&(init_task.task), };
支持限时操作, 比如 select 支持最长等待时间.
void schedule(void) { int i,next,c; struct task_struct ** p; /* check alarm, wake up any interruptible tasks that have got a signal */ for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) { if ((*p)->timeout && (*p)->timeout < jiffies) { (*p)->timeout = 0; if ((*p)->state == TASK_INTERRUPTIBLE) (*p)->state = TASK_RUNNING; } if ((*p)->alarm && (*p)->alarm < jiffies) { (*p)->signal |= (1<<(SIGALRM-1)); (*p)->alarm = 0; } if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) && (*p)->state==TASK_INTERRUPTIBLE) (*p)->state=TASK_RUNNING; } /* this is the scheduler proper: */ while (1) { c = -1; next = 0; i = NR_TASKS; p = &task[NR_TASKS]; while (--i) { if (!*--p) continue; if ((*p)->state == TASK_RUNNING && (*p)->counter > c) c = (*p)->counter, next = i; } if (c) break; for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) (*p)->counter = ((*p)->counter >> 1) + (*p)->priority; } switch_to(next); }
1. 添加接口 __sleep_on , 睡眠等待和可打断睡眠均直接调用此接口
static inline void __sleep_on(struct task_struct **p, int state) { struct task_struct *tmp; if (!p) return; if (current == &(init_task.task)) panic("task[0] trying to sleep"); tmp = *p; *p = current; current->state = state; repeat: schedule(); if (*p && *p != current) { (**p).state = 0; current->state = TASK_UNINTERRUPTIBLE; goto repeat; } if (!*p) printk("Warning: *P = NULL\n\r"); if (*p = tmp) tmp->state=0; } void interruptible_sleep_on(struct task_struct **p) { __sleep_on(p,TASK_INTERRUPTIBLE); } void sleep_on(struct task_struct **p) { __sleep_on(p,TASK_UNINTERRUPTIBLE); }
void wake_up(struct task_struct **p) { if (p && *p) { if ((**p).state == TASK_STOPPED) printk("wake_up: TASK_STOPPED"); if ((**p).state == TASK_ZOMBIE) printk("wake_up: TASK_ZOMBIE"); (**p).state=0; } }
仅仅修改了一点, 关于signal 屏蔽位:
if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) && (*p)->state==TASK_INTERRUPTIBLE) (*p)->state=TASK_RUNNING;
if (((*p)->signal & ~(*p)->blocked) && (*p)->state==TASK_INTERRUPTIBLE) (*p)->state=TASK_RUNNING;
因为V0.95 版本在sys_ssetmask函数中已经去掉了SIGKILL 和 SIGSTOP :
int sys_ssetmask(int newmask) { int old=current->blocked; current->blocked = newmask & ~(1<<(SIGKILL-1)) & ~(1<<(SIGSTOP-1)); return old; }
1. 暂时出让CPU的进程对SIG_IGN信号进行了屏蔽 :
int sys_pause(void) { unsigned long old_blocked; unsigned long mask; struct sigaction * sa = current->sigaction; old_blocked = current->blocked; for (mask=1 ; mask ; sa++,mask += mask) if (sa->sa_handler == SIG_IGN) current->blocked |= mask; current->state = TASK_INTERRUPTIBLE; schedule(); current->blocked = old_blocked; return -EINTR; }
static inline void __sleep_on(struct task_struct **p, int state) { struct task_struct *tmp; unsigned int flags; if (!p) return; if (current == &(init_task.task)) panic("task[0] trying to sleep"); __asm__("pushfl ; popl %0":"=r" (flags)); tmp = *p; *p = current; current->state = state; /* make sure interrupts are enabled: there should be no more races here */ sti(); repeat: schedule(); if (*p && *p != current) { current->state = TASK_UNINTERRUPTIBLE; (**p).state = 0; goto repeat; } if (*p = tmp) tmp->state=0; __asm__("pushl %0 ; popfl"::"r" (flags)); }3. nice函数处理了increment 比当前优先值大的情况: 直接减成 1
int sys_nice(long increment) { if (increment < 0 && !suser()) return -EPERM; if (increment > current->priority) increment = current->priority-1; current->priority -= increment; return 0; }
修改对限时操作的BUG:
if ((*p)->timeout && (*p)->timeout < jiffies) { (*p)->timeout = 0; if ((*p)->state == TASK_INTERRUPTIBLE) (*p)->state = TASK_RUNNING; }
==>
if ((*p)->timeout && (*p)->timeout < jiffies) if ((*p)->state == TASK_INTERRUPTIBLE) { (*p)->timeout = 0; (*p)->state = TASK_RUNNING; }
1. 将next_wait 加入结构体 task_strcut. 从而使用它来维护睡眠等待列表, 不再唤醒僵尸进程和终止进程.
void wake_up(struct task_struct **p) { struct task_struct * wakeup_ptr, * tmp; if (p && *p) { wakeup_ptr = *p; *p = NULL; while (wakeup_ptr && wakeup_ptr != task[0]) { if (wakeup_ptr->state == TASK_STOPPED) printk("wake_up: TASK_STOPPED\n"); else if (wakeup_ptr->state == TASK_ZOMBIE) printk("wake_up: TASK_ZOMBIE\n"); else wakeup_ptr->state = TASK_RUNNING; tmp = wakeup_ptr->next_wait; wakeup_ptr->next_wait = task[0]; wakeup_ptr = tmp; } } } static inline void __sleep_on(struct task_struct **p, int state) { unsigned int flags; if (!p) return; if (current == task[0]) panic("task[0] trying to sleep"); __asm__("pushfl ; popl %0":"=r" (flags)); current->next_wait = *p; task[0]->next_wait = NULL; *p = current; current->state = state; sti(); schedule(); if (current->next_wait != task[0]) wake_up(p); current->next_wait = NULL; __asm__("pushl %0 ; popfl"::"r" (flags)); }
无相关修改
添加了一项高优先级抢占功能:
当新唤醒的进程更优先的时候, 就重新调度.
实现:
1. scheduel 函数仅仅在开始加上一句代码:
need_resched = 0;
void wake_up(struct task_struct **p) { struct task_struct * wakeup_ptr, * tmp; if (p && *p) { wakeup_ptr = *p; *p = NULL; while (wakeup_ptr && wakeup_ptr != task[0]) { if (wakeup_ptr->state == TASK_ZOMBIE) printk("wake_up: TASK_ZOMBIE\n"); else if (wakeup_ptr->state != TASK_STOPPED) { wakeup_ptr->state = TASK_RUNNING; if (wakeup_ptr->counter > current->counter) need_resched = 1; } tmp = wakeup_ptr->next_wait; wakeup_ptr->next_wait = task[0]; wakeup_ptr = tmp; } } }
reschedule: pushl $ret_from_sys_call jmp _schedule .align 2 _system_call: pushl %eax # save orig_eax SAVE_ALL cmpl _NR_syscalls,%eax jae bad_sys_call call _sys_call_table(,%eax,4) movl %eax,EAX(%esp) # save the return value ret_from_sys_call: cmpw $0x0f,CS(%esp) # was old code segment supervisor ? jne 2f cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ? jne 2f 1: movl _current,%eax cmpl _task,%eax # task[0] cannot have signals je 2 cmpl $0,_need_resched # 检查need_resched, 若非0 就重新调度 jne reschedule cmpl $0,state(%eax) # state jne reschedule cmpl $0,counter(%eax) # counter je reschedule
1.修改nice 接口 ,保证优先级不会被改成0
int sys_nice(long increment) { if (increment < 0 && !suser()) return -EPERM; if (increment >= current->priority) // 原先是 increment > current->priority increment = current->priority-1; current->priority -= increment; return 0; }
1: movl _current,%eax cmpl _task,%eax # task[0] cannot have signals je 2f cmpl $0,_need_resched jne reschedule cmpl $0,state(%eax) # state jne reschedule cmpl $0,counter(%eax) # counter je reschedule
将定时器部分益处scheduel, 这部分功能由 do_timer 接管.
void schedule(void) { int i,next,c; struct task_struct ** p; /* check alarm, wake up any interruptible tasks that have got a signal */ need_resched = 0; for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) { if ((*p)->timeout && (*p)->timeout < jiffies) if ((*p)->state == TASK_INTERRUPTIBLE) { (*p)->timeout = 0; (*p)->state = TASK_RUNNING; } if (((*p)->signal & ~(*p)->blocked) && (*p)->state==TASK_INTERRUPTIBLE) (*p)->state=TASK_RUNNING; } /* this is the scheduler proper: */ while (1) { c = -1; next = 0; i = NR_TASKS; p = &task[NR_TASKS]; while (--i) { if (!*--p) continue; if ((*p)->state == TASK_RUNNING && (*p)->counter > c) c = (*p)->counter, next = i; } if (c) break; for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) (*p)->counter = ((*p)->counter >> 1) + (*p)->priority; } switch_to(next); }
无相关修改
1. 新的schedule
void schedule(void) { int i,next,c; struct task_struct ** p; /* check alarm, wake up any interruptible tasks that have got a signal */ need_resched = 0; for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) { if ((*p)->timeout && (*p)->timeout < jiffies) if ((*p)->state == TASK_INTERRUPTIBLE) { (*p)->timeout = 0; wake_one_task(*p); // 原来直接改变p->state } if (((*p)->signal & ~(*p)->blocked) && (*p)->state==TASK_INTERRUPTIBLE) wake_one_task(*p); // 原来直接改变p->state } /* this is the scheduler proper: */ while (1) { c = -1; next = 0; i = NR_TASKS; p = &task[NR_TASKS]; while (--i) { if (!*--p) continue; if ((*p)->state == TASK_RUNNING && (*p)->counter > c) c = (*p)->counter, next = i; } if (c) break; for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p) (*p)->counter = ((*p)->counter >> 1) + (*p)->priority; } sti(); // 保证原子操作 switch_to(next); }
辅助接口 :
void wake_one_task(struct task_struct * p) { p->state = TASK_RUNNING; if (p->counter > current->counter) need_resched = 1; }
接口代码:
extern inline void add_wait_queue(struct wait_queue ** p, struct wait_queue * wait) { unsigned long flags; struct wait_queue * tmp; __asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags)); wait->next = *p; tmp = wait; while (tmp->next) if ((tmp = tmp->next)->next == *p) break; *p = tmp->next = wait; __asm__ __volatile__("pushl %0 ; popfl"::"r" (flags)); } extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue * wait) { unsigned long flags; struct wait_queue * tmp; __asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags)); if (*p == wait) if ((*p = wait->next) == wait) *p = NULL; tmp = wait; while (tmp && tmp->next != wait) tmp = tmp->next; if (tmp) tmp->next = wait->next; wait->next = NULL; __asm__ __volatile__("pushl %0 ; popfl"::"r" (flags)); }
static inline void __sleep_on(struct wait_queue **p, int state) { unsigned long flags; if (!p) return; if (current == task[0]) panic("task[0] trying to sleep"); if (current->wait.next) printk("__sleep_on: wait->next exists\n"); __asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags)); current->state = state; add_wait_queue(p,¤t->wait); sti(); schedule(); remove_wait_queue(p,¤t->wait); __asm__("pushl %0 ; popfl"::"r" (flags)); }
首先, scheduel函数重新接管了定时器, 当然代码更加复杂了. 但是流程图回到了 V1.2 版.
其次, 为了产生更高效率的机器码, 使用 for(;;) 代替while , 使用goto 代替if
最后, 不再依靠遍历task数组来遍历所有的进程,改为遍历环状链表.
源代码:
asmlinkage void schedule(void) { int c; struct task_struct * p; struct task_struct * next; unsigned long ticks; /* check alarm, wake up any interruptible tasks that have got a signal */ cli(); ticks = itimer_ticks; itimer_ticks = 0; itimer_next = ~0; sti(); need_resched = 0; p = &init_task; for (;;) { if ((p = p->next_task) == &init_task) goto confuse_gcc1; if (ticks && p->it_real_value) { if (p->it_real_value <= ticks) { send_sig(SIGALRM, p, 1); if (!p->it_real_incr) { p->it_real_value = 0; goto end_itimer; } do { p->it_real_value += p->it_real_incr; } while (p->it_real_value <= ticks); } p->it_real_value -= ticks; if (p->it_real_value < itimer_next) itimer_next = p->it_real_value; } end_itimer: if (p->state != TASK_INTERRUPTIBLE) continue; if (p->signal & ~p->blocked) { p->state = TASK_RUNNING; continue; } if (p->timeout && p->timeout <= jiffies) { p->timeout = 0; p->state = TASK_RUNNING; } } confuse_gcc1: /* this is the scheduler proper: */ #if 0 /* give processes that go to sleep a bit higher priority.. */ /* This depends on the values for TASK_XXX */ /* This gives smoother scheduling for some things, but */ /* can be very unfair under some circumstances, so.. */ if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state && current->counter < current->priority*2) { ++current->counter; } #endif c = -1; next = p = &init_task; for (;;) { if ((p = p->next_task) == &init_task) goto confuse_gcc2; if (p->state == TASK_RUNNING && p->counter > c) c = p->counter, next = p; } confuse_gcc2: if (!c) { for_each_task(p) p->counter = (p->counter >> 1) + p->priority; } if(current != next) kstat.context_swtch++; switch_to(next); /* Now maybe reload the debug registers */ if(current->debugreg[7]){ loaddebug(0); loaddebug(1); loaddebug(2); loaddebug(3); loaddebug(6); }; }
1. wake_up 对环状列表的所有睡眠的进程进行唤醒
void wake_up(struct wait_queue **q) { struct wait_queue *tmp; struct task_struct * p; if (!q || !(tmp = *q)) return; do { if ((p = tmp->task) != NULL) { if ((p->state == TASK_UNINTERRUPTIBLE) || (p->state == TASK_INTERRUPTIBLE)) { p->state = TASK_RUNNING; if (p->counter > current->counter) need_resched = 1; } } if (!tmp->next) { printk("wait_queue is bad (eip = %08lx)\n",((unsigned long *) q)[-1]); printk(" q = %p\n",q); printk(" *q = %p\n",*q); printk(" tmp = %p\n",tmp); break; } tmp = tmp->next; } while (tmp != *q); }
2. 加入接口wake_up_interruptible, 仅唤醒可中断睡眠的进行
void wake_up_interruptible(struct wait_queue **q) { struct wait_queue *tmp; struct task_struct * p; if (!q || !(tmp = *q)) return; do { if ((p = tmp->task) != NULL) { if (p->state == TASK_INTERRUPTIBLE) { p->state = TASK_RUNNING; if (p->counter > current->counter) need_resched = 1; } } if (!tmp->next) { printk("wait_queue is bad (eip = %08lx)\n",((unsigned long *) q)[-1]); printk(" q = %p\n",q); printk(" *q = %p\n",*q); printk(" tmp = %p\n",tmp); break; } tmp = tmp->next; } while (tmp != *q); }
void __down(struct semaphore * sem) { struct wait_queue wait = { current, NULL }; add_wait_queue(&sem->wait, &wait); current->state = TASK_UNINTERRUPTIBLE; while (sem->count <= 0) { schedule(); current->state = TASK_UNINTERRUPTIBLE; } current->state = TASK_RUNNING; remove_wait_queue(&sem->wait, &wait); }
asmlinkage int sys_nice(long increment) { int newprio; if (increment < 0 && !suser()) return -EPERM; newprio = current->priority - increment; if (newprio < 1) newprio = 1; if (newprio > 35) newprio = 35; current->priority = newprio; return 0; }
加入了个对中断的输出:
if (intr_count) { printk("Aiee: scheduling in interrupt\n"); intr_count = 0; }
删除了system_call.s 文件. 添加了arch文件夹, 系统跳用移动到对应内核的entry.S
1. 添加了对 scheduel时处理tq_scheduler的支持.
run_task_queue(&tq_scheduler);
2. 每次统计当前运行的进程数目
nr_running = 0; ... nr_running++;
大量的修改 , 先看源码 ,再一一解释:
asmlinkage void schedule(void) { int c; struct task_struct * p; struct task_struct * prev, * next; unsigned long timeout = 0; int this_cpu=smp_processor_id(); /* check alarm, wake up any interruptible tasks that have got a signal */ if (intr_count) goto scheduling_in_interrupt; if (bh_active & bh_mask) { intr_count = 1; do_bottom_half(); // 1. 添加了对 buttom half 的支持. intr_count = 0; } run_task_queue(&tq_scheduler); need_resched = 0; prev = current; cli(); /* move an exhausted RR process to be last.. */ if (!prev->counter && prev->policy == SCHED_RR) { prev->counter = prev->priority; move_last_runqueue(prev); //2. 加入了runquene的概念 } switch (prev->state) { case TASK_INTERRUPTIBLE: if (prev->signal & ~prev->blocked) goto makerunnable; timeout = prev->timeout; if (timeout && (timeout <= jiffies)) { prev->timeout = 0; timeout = 0; makerunnable: prev->state = TASK_RUNNING; break; } default: del_from_runqueue(prev); case TASK_RUNNING: } p = init_task.next_run; sti(); #ifdef __SMP__ /* * This is safe as we do not permit re-entry of schedule() */ prev->processor = NO_PROC_ID; #define idle_task (task[cpu_number_map[this_cpu]]) #else #define idle_task (&init_task) #endif /* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. */ /* this is the scheduler proper: */ c = -1000; next = idle_task; while (p != &init_task) { int weight = goodness(p, prev, this_cpu); //3. 新的优先级计算方式 if (weight > c) c = weight, next = p; p = p->next_run; } /* if all runnable processes have "counter == 0", re-calculate counters */ if (!c) { for_each_task(p) p->counter = (p->counter >> 1) + p->priority; } #ifdef __SMP__ // 4. 多CPU支持 /* * Allocate process to CPU */ next->processor = this_cpu; next->last_processor = this_cpu; #endif #ifdef __SMP_PROF__ /* mark processor running an idle thread */ if (0==next->pid) set_bit(this_cpu,&smp_idle_map); else clear_bit(this_cpu,&smp_idle_map); #endif if (prev != next) { struct timer_list timer; kstat.context_swtch++; if (timeout) { init_timer(&timer); timer.expires = timeout; timer.data = (unsigned long) prev; timer.function = process_timeout; add_timer(&timer); } get_mmu_context(next); switch_to(prev,next); if (timeout) del_timer(&timer); } return; scheduling_in_interrupt: printk("Aiee: scheduling in interrupt %p\n", __builtin_return_address(0)); }
利用 bh_active 和 bh_mask 两个掩码来记录软中断信息. 每次scheduel统一执行之 .
if (bh_active & bh_mask) { intr_count = 1; do_bottom_half(); intr_count = 0; }
其中除了双向环状链表的操作, 就是对多CPU的支持.
static inline void add_to_runqueue(struct task_struct * p) { #ifdef __SMP__ int cpu=smp_processor_id(); #endif #if 1 /* sanity tests */ if (p->next_run || p->prev_run) { printk("task already on run-queue\n"); return; } #endif if (p->counter > current->counter + 3) need_resched = 1; nr_running++; (p->prev_run = init_task.prev_run)->next_run = p; p->next_run = &init_task; init_task.prev_run = p; #ifdef __SMP__ /* this is safe only if called with cli()*/ while(set_bit(31,&smp_process_available)) { while(test_bit(31,&smp_process_available)) { if(clear_bit(cpu,&smp_invalidate_needed)) { local_flush_tlb(); set_bit(cpu,&cpu_callin_map[0]); } } } smp_process_available++; clear_bit(31,&smp_process_available); if ((0!=p->pid) && smp_threads_ready) { int i; for (i=0;i<smp_num_cpus;i++) { if (0==current_set[cpu_logical_map[i]]->pid) { smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0); break; } } } #endif } static inline void del_from_runqueue(struct task_struct * p) { struct task_struct *next = p->next_run; struct task_struct *prev = p->prev_run; #if 1 /* sanity tests */ if (!next || !prev) { printk("task not on run-queue\n"); return; } #endif if (p == &init_task) { static int nr = 0; if (nr < 5) { nr++; printk("idle task may not sleep\n"); } return; } nr_running--; next->prev_run = prev; prev->next_run = next; p->next_run = NULL; p->prev_run = NULL; } static inline void move_last_runqueue(struct task_struct * p) { struct task_struct *next = p->next_run; struct task_struct *prev = p->prev_run; /* remove from list */ next->prev_run = prev; prev->next_run = next; /* add back to list */ p->next_run = &init_task; prev = init_task.prev_run; init_task.prev_run = p; p->prev_run = prev; prev->next_run = p; }
加入了进程调度测略概念:
/* * Scheduling policies */ #define SCHED_OTHER 0 //一般的进程 #define SCHED_FIFO 1 // 实时进程, 一个进程执行完才执行另一个 #define SCHED_RR 2 // 实时进程, 固定执行时间片,轮转依次执行
/* * This is the function that decides how desirable a process is.. * You can weigh different processes against each other depending * on what CPU they've run on lately etc to try to handle cache * and TLB miss penalties. * * Return values: * -1000: never select this * 0: out of time, recalculate counters (but it might still be * selected) * +ve: "goodness" value (the larger, the better) * +1000: realtime process, select this. */ static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu) { int weight; #ifdef __SMP__ /* We are not permitted to run a task someone else is running */ if (p->processor != NO_PROC_ID) return -1000; // CPU 不支持直接 #ifdef PAST_2_0 /* This process is locked to a processor group */ if (p->processor_mask && !(p->processor_mask & (1<<this_cpu)) return -1000; // 绑定特定CPU , 不需要当前CPU #endif #endif /* * Realtime process, select the first one on the * runqueue (taking priorities within processes * into account). */ if (p->policy != SCHED_OTHER) return 1000 + p->rt_priority; // 实时程序, 立刻执行 /* * Give the process a first-approximation goodness value * according to the number of clock-ticks it has left. * * Don't do any other calculations if the time slice is * over.. */ weight = p->counter; if (weight) { #ifdef __SMP__ /* Give a largish advantage to the same processor... */ /* (this is equivalent to penalizing other processors) */ if (p->last_processor == this_cpu) weight += PROC_CHANGE_PENALTY; // 同一CPU #endif /* .. and a slight advantage to the current process */ if (p == prev) weight += 1; // 还是上一个进程 } return weight; }
加入接口 wake_up_process , 利用add_runquene , 被wake_up 和 wake_up_interruputible 取代p->state = TASK_RUNNING调用.
inline void wake_up_process(struct task_struct * p) { unsigned long flags; save_flags(flags); cli(); p->state = TASK_RUNNING; if (!p->next_run) add_to_runqueue(p); restore_flags(flags); }
nice接口策略修改, 限定 优先值 0 - DEF_PRIORITY*2
asmlinkage int sys_nice(int increment) { unsigned long newprio; int increase = 0; newprio = increment; if (increment < 0) { if (!suser()) return -EPERM; newprio = -increment; increase = 1; } if (newprio > 40) newprio = 40; /* * do a "normalization" of the priority (traditionally * unix nice values are -20..20, linux doesn't really * use that kind of thing, but uses the length of the * timeslice instead (default 150 msec). The rounding is * why we want to avoid negative values. */ newprio = (newprio * DEF_PRIORITY + 10) / 20; increment = newprio; if (increase) increment = -increment; newprio = current->priority - increment; if (newprio < 1) newprio = 1; if (newprio > DEF_PRIORITY*2) newprio = DEF_PRIORITY*2; current->priority = newprio; return 0; }
唤醒函数修改,统一使用wait_quene
void wake_up(struct wait_queue **q) { struct wait_queue *next; struct wait_queue *head; if (!q || !(next = *q)) return; head = WAIT_QUEUE_HEAD(q); while (next != head) { struct task_struct *p = next->task; next = next->next; if (p != NULL) { if ((p->state == TASK_UNINTERRUPTIBLE) || (p->state == TASK_INTERRUPTIBLE)) wake_up_process(p); } if (!next) goto bad; } return; bad: printk("wait_queue is bad (eip = %p)\n", __builtin_return_address(0)); printk(" q = %p\n",q); printk(" *q = %p\n",*q); } void wake_up_interruptible(struct wait_queue **q) { struct wait_queue *next; struct wait_queue *head; if (!q || !(next = *q)) return; head = WAIT_QUEUE_HEAD(q); while (next != head) { struct task_struct *p = next->task; next = next->next; if (p != NULL) { if (p->state == TASK_INTERRUPTIBLE) wake_up_process(p); } if (!next) goto bad; } return; bad: printk("wait_queue is bad (eip = %p)\n", __builtin_return_address(0)); printk(" q = %p\n",q); printk(" *q = %p\n",*q); }对 sleep_on 接口加入原子保护
修改nice接口的一个bug .
if (newprio < 1)
if ((signed) newprio < 1)
修改较多.
1. 添加CPU的的更多支持.
2. 对每个队列操作添加了信号量保护
源码:
asmlinkage void schedule(void) { struct schedule_data * sched_data; struct task_struct * prev, * next; int this_cpu; prev = current; this_cpu = prev->processor; /* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */ sched_data = & aligned_data[this_cpu].schedule_data; if (in_interrupt()) goto scheduling_in_interrupt; release_kernel_lock(prev, this_cpu); /* Do "administrative" work here while we don't hold any locks */ if (bh_active & bh_mask) do_bottom_half(); run_task_queue(&tq_scheduler); spin_lock(&scheduler_lock); spin_lock_irq(&runqueue_lock); /* move an exhausted RR process to be last.. */ prev->need_resched = 0; if (!prev->counter && prev->policy == SCHED_RR) { prev->counter = prev->priority; move_last_runqueue(prev); } switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { prev->state = TASK_RUNNING; break; } default: del_from_runqueue(prev); case TASK_RUNNING: } sched_data->prevstate = prev->state; { struct task_struct * p = init_task.next_run; /* * This is subtle. * Note how we can enable interrupts here, even * though interrupts can add processes to the run- * queue. This is because any new processes will * be added to the front of the queue, so "p" above * is a safe starting point. * run-queue deletion and re-ordering is protected by * the scheduler lock */ spin_unlock_irq(&runqueue_lock); #ifdef __SMP__ prev->has_cpu = 0; #endif /* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. */ /* this is the scheduler proper: */ { int c = -1000; next = idle_task; while (p != &init_task) { if (can_schedule(p)) { int weight = goodness(p, prev, this_cpu); if (weight > c) c = weight, next = p; } p = p->next_run; } /* Do we need to re-calculate counters? */ if (!c) { struct task_struct *p; read_lock(&tasklist_lock); for_each_task(p) p->counter = (p->counter >> 1) + p->priority; read_unlock(&tasklist_lock); } } } /* * maintain the per-process 'average timeslice' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP: */ #ifdef __SMP__ { cycles_t t, this_slice; t = get_cycles(); this_slice = t - sched_data->last_schedule; sched_data->last_schedule = t; /* * Simple, exponentially fading average calculation: */ prev->avg_slice = this_slice + prev->avg_slice; prev->avg_slice >>= 1; } /* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */ prev->has_cpu = 1; next->has_cpu = 1; next->processor = this_cpu; spin_unlock(&scheduler_lock); #endif /* __SMP__ */ if (prev != next) { #ifdef __SMP__ sched_data->prev = prev; #endif kstat.context_swtch++; get_mmu_context(next); switch_to(prev,next); __schedule_tail(); } reacquire_kernel_lock(current); return; scheduling_in_interrupt: printk("Scheduling in interrupt\n"); *(int *)0 = 0; }
添加接口reschedule_idle, 对唤醒进程后重新调度之前做了更过工作
void wake_up_process(struct task_struct * p) { unsigned long flags; spin_lock_irqsave(&runqueue_lock, flags); p->state = TASK_RUNNING; if (!p->next_run) { add_to_runqueue(p); reschedule_idle(p); } spin_unlock_irqrestore(&runqueue_lock, flags); }主要是对多CPU的支持. 同时对优先级更高有了新的定义 : 至少大于3
static inline void reschedule_idle(struct task_struct * p) { if (p->policy != SCHED_OTHER || p->counter > current->counter + 3) { current->need_resched = 1; return; } #ifdef __SMP__ /* * ("wakeup()" should not be called before we've initialized * SMP completely. * Basically a not-yet initialized SMP subsystem can be * considered as a not-yet working scheduler, simply dont use * it before it's up and running ...) * * SMP rescheduling is done in 2 passes: * - pass #1: faster: 'quick decisions' * - pass #2: slower: 'lets try and find another CPU' */ /* * Pass #1 * * There are two metrics here: * * first, a 'cutoff' interval, currently 0-200 usecs on * x86 CPUs, depending on the size of the 'SMP-local cache'. * If the current process has longer average timeslices than * this, then we utilize the idle CPU. * * second, if the wakeup comes from a process context, * then the two processes are 'related'. (they form a * 'gang') * * An idle CPU is almost always a bad thing, thus we skip * the idle-CPU utilization only if both these conditions * are true. (ie. a 'process-gang' rescheduling with rather * high frequency should stay on the same CPU). * * [We can switch to something more finegrained in 2.3.] */ if ((current->avg_slice < cacheflush_time) && related(current, p)) return; reschedule_idle_slow(p); #endif /* __SMP__ */ }
添加了新的睡眠接口,支持定时睡眠
signed long schedule_timeout(signed long timeout) { struct timer_list timer; unsigned long expire; switch (timeout) { case MAX_SCHEDULE_TIMEOUT: /* * These two special cases are useful to be comfortable * in the caller. Nothing more. We could take * MAX_SCHEDULE_TIMEOUT from one of the negative value * but I' d like to return a valid offset (>=0) to allow * the caller to do everything it want with the retval. */ schedule(); goto out; default: /* * Another bit of PARANOID. Note that the retval will be * 0 since no piece of kernel is supposed to do a check * for a negative retval of schedule_timeout() (since it * should never happens anyway). You just have the printk() * that will tell you if something is gone wrong and where. */ if (timeout < 0) { printk(KERN_ERR "schedule_timeout: wrong timeout " "value %lx from %p\n", timeout, __builtin_return_address(0)); goto out; } } expire = timeout + jiffies; init_timer(&timer); timer.expires = expire; timer.data = (unsigned long) current; timer.function = process_timeout; add_timer(&timer); schedule(); del_timer(&timer); timeout = expire - jiffies; out: return timeout < 0 ? 0 : timeout; } /* * This one aligns per-CPU data on cacheline boundaries. */ static union { struct schedule_data { struct task_struct * prev; long prevstate; cycles_t last_schedule; } schedule_data; char __pad [L1_CACHE_BYTES]; } aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; static inline void __schedule_tail (void) { #ifdef __SMP__ struct schedule_data * sched_data; /* * We might have switched CPUs: */ sched_data = & aligned_data[smp_processor_id()].schedule_data; /* * Subtle. In the rare event that we got a wakeup to 'prev' just * during the reschedule (this is possible, the scheduler is pretty * parallel), we should do another reschedule in the next task's * context. schedule() will do the right thing next time around. * this is equivalent to 'delaying' the wakeup until the reschedule * has finished. */ if (sched_data->prev->state != sched_data->prevstate) current->need_resched = 1; /* * Release the previous process ... * * We have dropped all locks, and we must make sure that we * only mark the previous process as no longer having a CPU * after all other state has been seen by other CPU's. Thus * the write memory barrier! */ wmb(); sched_data->prev->has_cpu = 0; #endif /* __SMP__ */ } void interruptible_sleep_on(struct wait_queue **p) { SLEEP_ON_VAR current->state = TASK_INTERRUPTIBLE; SLEEP_ON_HEAD schedule(); SLEEP_ON_TAIL } long interruptible_sleep_on_timeout(struct wait_queue **p, long timeout) { SLEEP_ON_VAR current->state = TASK_INTERRUPTIBLE; SLEEP_ON_HEAD timeout = schedule_timeout(timeout); SLEEP_ON_TAIL return timeout; } void sleep_on(struct wait_queue **p) { SLEEP_ON_VAR current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD schedule(); SLEEP_ON_TAIL } long sleep_on_timeout(struct wait_queue **p, long timeout) { SLEEP_ON_VAR current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD timeout = schedule_timeout(timeout); SLEEP_ON_TAIL return timeout; }
1. 对多CPU支持进行了更多修改, 这里不讨论.
2. scheduel函数本身逻辑并无太大修改,但是大量使用goto 替换原来的if { ... } \
3. 更多的使用信号量保护.
源码:
asmlinkage void schedule(void) { struct schedule_data * sched_data; struct task_struct *prev, *next, *p; int this_cpu, c; if (tq_scheduler) goto handle_tq_scheduler; tq_scheduler_back: prev = current; this_cpu = prev->processor; if (in_interrupt()) goto scheduling_in_interrupt; release_kernel_lock(prev, this_cpu); /* Do "administrative" work here while we don't hold any locks */ if (bh_mask & bh_active) goto handle_bh; handle_bh_back: /* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */ sched_data = & aligned_data[this_cpu].schedule_data; spin_lock_irq(&runqueue_lock); /* move an exhausted RR process to be last.. */ if (prev->policy == SCHED_RR) goto move_rr_last; move_rr_back: switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { prev->state = TASK_RUNNING; break; } default: del_from_runqueue(prev); case TASK_RUNNING: } prev->need_resched = 0; repeat_schedule: /* * this is the scheduler proper: */ p = init_task.next_run; /* Default process to select.. */ next = idle_task(this_cpu); c = -1000; if (prev->state == TASK_RUNNING) goto still_running; still_running_back: /* * This is subtle. * Note how we can enable interrupts here, even * though interrupts can add processes to the run- * queue. This is because any new processes will * be added to the front of the queue, so "p" above * is a safe starting point. * run-queue deletion and re-ordering is protected by * the scheduler lock */ /* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. */ while (p != &init_task) { if (can_schedule(p)) { int weight = goodness(prev, p, this_cpu); if (weight > c) c = weight, next = p; } p = p->next_run; } /* Do we need to re-calculate counters? */ if (!c) goto recalculate; /* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */ sched_data->curr = next; #ifdef __SMP__ next->has_cpu = 1; next->processor = this_cpu; #endif spin_unlock_irq(&runqueue_lock); if (prev == next) goto same_process; #ifdef __SMP__ /* * maintain the per-process 'average timeslice' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it's approximate, so we do not have to maintain * it while holding the runqueue spinlock. */ { cycles_t t, this_slice; t = get_cycles(); this_slice = t - sched_data->last_schedule; sched_data->last_schedule = t; /* * Exponentially fading average calculation, with * some weight so it doesnt get fooled easily by * smaller irregularities. */ prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2; } /* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */ #endif /* __SMP__ */ kstat.context_swtch++; get_mmu_context(next); switch_to(prev, next, prev); __schedule_tail(prev); same_process: reacquire_kernel_lock(current); return; recalculate: { struct task_struct *p; spin_unlock_irq(&runqueue_lock); read_lock(&tasklist_lock); for_each_task(p) p->counter = (p->counter >> 1) + p->priority; read_unlock(&tasklist_lock); spin_lock_irq(&runqueue_lock); goto repeat_schedule; } still_running: c = prev_goodness(prev, prev, this_cpu); next = prev; goto still_running_back; handle_bh: do_bottom_half(); goto handle_bh_back; handle_tq_scheduler: run_task_queue(&tq_scheduler); goto tq_scheduler_back; move_rr_last: if (!prev->counter) { prev->counter = prev->priority; move_last_runqueue(prev); } goto move_rr_back; scheduling_in_interrupt: printk("Scheduling in interrupt\n"); *(int *)0 = 0; return; }
唤醒函数
void __wake_up(struct wait_queue **q, unsigned int mode) { struct task_struct *p; struct wait_queue *head, *next; if (!q) goto out; /* * this is safe to be done before the check because it * means no deference, just pointer operations. */ head = WAIT_QUEUE_HEAD(q); read_lock(&waitqueue_lock); next = *q; if (!next) goto out_unlock; while (next != head) { p = next->task; next = next->next; if (p->state & mode) { /* * We can drop the read-lock early if this * is the only/last process. */ if (next == head) { read_unlock(&waitqueue_lock); wake_up_process(p); goto out; } wake_up_process(p); } } out_unlock: read_unlock(&waitqueue_lock); out: return; }
同时 __schedule_tail 接口也开始调用 reschedule_idle . 仍然是针对多CPU支持
/* * schedule_tail() is getting called from the fork return path. This * cleans up all remaining scheduler things, without impacting the * common case. */ static inline void __schedule_tail (struct task_struct *prev) { #ifdef __SMP__ if ((prev->state == TASK_RUNNING) && (prev != idle_task(smp_processor_id()))) reschedule_idle(prev); wmb(); prev->has_cpu = 0; #endif /* __SMP__ */ } void schedule_tail (struct task_struct *prev) { __schedule_tail(prev); }
if (!current->active_mm) BUG();
prepare_to_switch(); { struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; if (!mm) { if (next->active_mm) BUG(); next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next, this_cpu); } else { if (next->active_mm != mm) BUG(); switch_mm(oldmm, mm, next, this_cpu); } if (!prev->mm) { prev->active_mm = NULL; mmdrop(oldmm); } }
asmlinkage void schedule(void) { struct schedule_data * sched_data; struct task_struct *prev, *next, *p; struct list_head *tmp; int this_cpu, c; if (!current->active_mm) BUG(); need_resched_back: prev = current; this_cpu = prev->processor; if (in_interrupt()) goto scheduling_in_interrupt; release_kernel_lock(prev, this_cpu); /* Do "administrative" work here while we don't hold any locks */ if (softirq_active(this_cpu) & softirq_mask(this_cpu)) goto handle_softirq; handle_softirq_back: /* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */ sched_data = & aligned_data[this_cpu].schedule_data; spin_lock_irq(&runqueue_lock); /* move an exhausted RR process to be last.. */ if (prev->policy == SCHED_RR) goto move_rr_last; move_rr_back: switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { prev->state = TASK_RUNNING; break; } default: del_from_runqueue(prev); case TASK_RUNNING: } prev->need_resched = 0; /* * this is the scheduler proper: */ repeat_schedule: /* * Default process to select.. */ next = idle_task(this_cpu); c = -1000; if (prev->state == TASK_RUNNING) goto still_running; still_running_back: list_for_each(tmp, &runqueue_head) { p = list_entry(tmp, struct task_struct, run_list); if (can_schedule(p, this_cpu)) { int weight = goodness(p, this_cpu, prev->active_mm); if (weight > c) c = weight, next = p; } } /* Do we need to re-calculate counters? */ if (!c) goto recalculate; /* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */ sched_data->curr = next; #ifdef CONFIG_SMP next->has_cpu = 1; next->processor = this_cpu; #endif spin_unlock_irq(&runqueue_lock); if (prev == next) goto same_process; #ifdef CONFIG_SMP /* * maintain the per-process 'last schedule' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it's approximate, so we do not have to maintain * it while holding the runqueue spinlock. */ sched_data->last_schedule = get_cycles(); /* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */ #endif /* CONFIG_SMP */ kstat.context_swtch++; /* * there are 3 processes which are affected by a context switch: * * prev == .... ==> (last => next) * * It's the 'much more previous' 'prev' that is on next's stack, * but prev is set to (the just run) 'last' process by switch_to(). * This might sound slightly confusing but makes tons of sense. */ prepare_to_switch(); { struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; if (!mm) { if (next->active_mm) BUG(); next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next, this_cpu); } else { if (next->active_mm != mm) BUG(); switch_mm(oldmm, mm, next, this_cpu); } if (!prev->mm) { prev->active_mm = NULL; mmdrop(oldmm); } } /* * This just switches the register state and the * stack. */ switch_to(prev, next, prev); __schedule_tail(prev); same_process: reacquire_kernel_lock(current); if (current->need_resched) goto need_resched_back; return; recalculate: { struct task_struct *p; spin_unlock_irq(&runqueue_lock); read_lock(&tasklist_lock); for_each_task(p) p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); read_unlock(&tasklist_lock); spin_lock_irq(&runqueue_lock); } goto repeat_schedule; still_running: c = goodness(prev, this_cpu, prev->active_mm); next = prev; goto still_running_back; handle_softirq: do_softirq(); goto handle_softirq_back; move_rr_last: if (!prev->counter) { prev->counter = NICE_TO_TICKS(prev->nice); move_last_runqueue(prev); } goto move_rr_back; scheduling_in_interrupt: printk("Scheduling in interrupt\n"); BUG(); return; }
1. 修改了对runquene的操作. 双向链表操作抽象在list.h中 , 提供各种接口
使用例子:
static inline void add_to_runqueue(struct task_struct * p) { list_add(&p->run_list, &runqueue_head); nr_running++; } static inline void move_last_runqueue(struct task_struct * p) { list_del(&p->run_list); list_add_tail(&p->run_list, &runqueue_head); } static inline void move_first_runqueue(struct task_struct * p) { list_del(&p->run_list); list_add(&p->run_list, &runqueue_head); }
3. 添加异步唤醒接口, 唤醒后不去判断是否重新调度.
static inline void wake_up_process_synchronous(struct task_struct * p) { unsigned long flags; /* * We want the common case fall through straight, thus the goto. */ spin_lock_irqsave(&runqueue_lock, flags); p->state = TASK_RUNNING; if (task_on_runqueue(p)) goto out; add_to_runqueue(p); out: spin_unlock_irqrestore(&runqueue_lock, flags); }
4. 添加一系列唤醒接口, 可以根据不同的模式,选择CPU ,选择唤醒方式.
static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode, const int sync) { struct list_head *tmp, *head; struct task_struct *p, *best_exclusive; unsigned long flags; int best_cpu, irq; if (!q) goto out; best_cpu = smp_processor_id(); irq = in_interrupt(); best_exclusive = NULL; wq_write_lock_irqsave(&q->lock, flags); #if WAITQUEUE_DEBUG CHECK_MAGIC_WQHEAD(q); #endif head = &q->task_list; #if WAITQUEUE_DEBUG if (!head->next || !head->prev) WQ_BUG(); #endif tmp = head->next; while (tmp != head) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); tmp = tmp->next; #if WAITQUEUE_DEBUG CHECK_MAGIC(curr->__magic); #endif p = curr->task; state = p->state; if (state & mode) { #if WAITQUEUE_DEBUG curr->__waker = (long)__builtin_return_address(0); #endif /* * If waking up from an interrupt context then * prefer processes which are affine to this * CPU. */ if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) { if (!best_exclusive) best_exclusive = p; if (p->processor == best_cpu) { best_exclusive = p; break; } } else { if (sync) wake_up_process_synchronous(p); else wake_up_process(p); if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE) break; } } } if (best_exclusive) { if (sync) wake_up_process_synchronous(best_exclusive); else wake_up_process(best_exclusive); } wq_write_unlock_irqrestore(&q->lock, flags); out: return; } void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode) { __wake_up_common(q, mode, wq_mode, 0); } void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode) { __wake_up_common(q, mode, wq_mode, 1); }
1. 不再大量的使用 goto , 作为替代对 if 使用 unlikely .
源码:
asmlinkage void schedule(void) { struct schedule_data * sched_data; struct task_struct *prev, *next, *p; struct list_head *tmp; int this_cpu, c; spin_lock_prefetch(&runqueue_lock); if (!current->active_mm) BUG(); need_resched_back: prev = current; this_cpu = prev->processor; if (unlikely(in_interrupt())) { printk("Scheduling in interrupt\n"); BUG(); } release_kernel_lock(prev, this_cpu); /* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */ sched_data = & aligned_data[this_cpu].schedule_data; spin_lock_irq(&runqueue_lock); /* move an exhausted RR process to be last.. */ if (unlikely(prev->policy == SCHED_RR)) if (!prev->counter) { prev->counter = NICE_TO_TICKS(prev->nice); move_last_runqueue(prev); } switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { prev->state = TASK_RUNNING; break; } default: del_from_runqueue(prev); case TASK_RUNNING:; } prev->need_resched = 0; /* * this is the scheduler proper: */ repeat_schedule: /* * Default process to select.. */ next = idle_task(this_cpu); c = -1000; list_for_each(tmp, &runqueue_head) { p = list_entry(tmp, struct task_struct, run_list); if (can_schedule(p, this_cpu)) { int weight = goodness(p, this_cpu, prev->active_mm); if (weight > c) c = weight, next = p; } } /* Do we need to re-calculate counters? */ if (unlikely(!c)) { struct task_struct *p; spin_unlock_irq(&runqueue_lock); read_lock(&tasklist_lock); for_each_task(p) p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); read_unlock(&tasklist_lock); spin_lock_irq(&runqueue_lock); goto repeat_schedule; } /* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */ sched_data->curr = next; task_set_cpu(next, this_cpu); spin_unlock_irq(&runqueue_lock); if (unlikely(prev == next)) { /* We won't go through the normal tail, so do this by hand */ prev->policy &= ~SCHED_YIELD; goto same_process; } #ifdef CONFIG_SMP /* * maintain the per-process 'last schedule' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it's approximate, so we do not have to maintain * it while holding the runqueue spinlock. */ sched_data->last_schedule = get_cycles(); /* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */ #endif /* CONFIG_SMP */ kstat.context_swtch++; /* * there are 3 processes which are affected by a context switch: * * prev == .... ==> (last => next) * * It's the 'much more previous' 'prev' that is on next's stack, * but prev is set to (the just run) 'last' process by switch_to(). * This might sound slightly confusing but makes tons of sense. */ prepare_to_switch(); { struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; if (!mm) { if (next->active_mm) BUG(); next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next, this_cpu); } else { if (next->active_mm != mm) BUG(); switch_mm(oldmm, mm, next, this_cpu); } if (!prev->mm) { prev->active_mm = NULL; mmdrop(oldmm); } } /* * This just switches the register state and the * stack. */ switch_to(prev, next, prev); __schedule_tail(prev); same_process: reacquire_kernel_lock(current); if (current->need_resched) goto need_resched_back; return; }
1. 唤醒函数整合, 添加了参数有效性代码
static inline int try_to_wake_up(struct task_struct * p, int synchronous) { unsigned long flags; int success = 0; /* * We want the common case fall through straight, thus the goto. */ spin_lock_irqsave(&runqueue_lock, flags); p->state = TASK_RUNNING; if (task_on_runqueue(p)) goto out; add_to_runqueue(p); if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id()))) reschedule_idle(p); success = 1; out: spin_unlock_irqrestore(&runqueue_lock, flags); return success; } inline int wake_up_process(struct task_struct * p) { return try_to_wake_up(p, 0); }
oid __wake_up(wait_queue_head_t *q, unsigned int mode, int nr) { if (q) { unsigned long flags; wq_read_lock_irqsave(&q->lock, flags); __wake_up_common(q, mode, nr, 0); wq_read_unlock_irqrestore(&q->lock, flags); } } void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr) { if (q) { unsigned long flags; wq_read_lock_irqsave(&q->lock, flags); __wake_up_common(q, mode, nr, 1); wq_read_unlock_irqrestore(&q->lock, flags); } }