http://hi.baidu.com/%B0%B5%D4%C2%C1%F7%B9%E2/blog/item/0f68f850e5af7215377abe5d.html
(有时间我会给原版打个patch,让其更完整:)
文件:xen-3.4.2/xen/common/schedule.c
0、Per-CPU schedule data
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
struct vcpu *curr; /* current task */
struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
struct timer s_timer; /* scheduling timer */
} __cacheline_aligned; (和cache line对齐)
DECLARE_PER_CPU(struct schedule_data, schedule_data);
1、SCHEDULE_SOFTIRQ
这个软中断用于触发调度器的执行。在调度器初始化的时候初始化,中断服务例程是调度函数schedule。
代码:
/* Initialise the data structures. */
void __init scheduler_init(void)
{
....................................
open_softirq(SCHEDULE_SOFTIRQ, schedule);
....................................
使用:
raise_softirq(SCHEDULE_SOFTIRQ); 执行该代码之后会导致所在cpu执行调度
2、schedule
选择下一个待执行的vcpu,执行vcpu切换
/*
* The main function
* - deschedule the current domain (scheduler independent).
* - pick a new domain (scheduler dependent).
*/
static void schedule(void)
{
struct vcpu *prev = current, *next = NULL;
s_time_t now = NOW();
struct schedule_data *sd;
struct task_slice next_slice;
....................................
/* get policy-specific decision on scheduling... */
next_slice = ops.do_schedule(now); //对于credit调度算法,会执行credit调度器中注册的csched_schedule函数
next = next_slice.task;
sd->curr = next;
if ( next_slice.time >= 0 ) /* -ve means no limit */
set_timer(&sd->s_timer, now + next_slice.time); //设置next需要运行的时间
if ( unlikely(prev == next) )
{
spin_unlock_irq(&sd->schedule_lock);
trace_continue_running(next);
return continue_running(prev); //直接执行schedule_tail函数,跳过vcpu切换
}
..................................
if (test_bit(_VPF_blocked, &prev->pause_flags)) { // 原来那句太难懂啦,我弄个易懂的:)
vcpu_runstate_change(prev, RUNSTATE_blocked, now);
} else {
if (vcpu_runnable(prev))
vcpu_runstate_change(prev, RUNSTATE_runnable, now);
else
vcpu_runstate_change(prev, RUNSTATE_offline, now);
}
prev->last_run_time = now;
ASSERT(next->runstate.state != RUNSTATE_running);
vcpu_runstate_change(next, RUNSTATE_running, now);
ASSERT(!next->is_running);
next->is_running = 1; //设置正在运行标志位
.........................
context_switch(prev, next); //执行vcpu切换
}
----------------------------------------------------------------------------
Per-VCPU pause flags. All are for "bit" operation
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
3rd 2nd 1st 0th
v v v v
migration blk-xen down blocked
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
/* Domain is blocked waiting for an event. */
#define _VPF_blocked 0
#define VPF_blocked 1
/* VCPU is offline. */
#define _VPF_down 1
#define VPF_down 2
/* VCPU is blocked awaiting an event to be consumed by Xen. */
#define _VPF_blocked_in_xen 2
#define VPF_blocked_in_xen 4
/* VCPU affinity has changed: migrating to a new CPU. */
#define _VPF_migrating 3
#define VPF_migrating 8
----------------------------------------------------------------------------
3、block
/* Block the currently-executing domain until a pertinent event occurs. */
static long do_block(void)
{
struct vcpu *v = current;
local_event_delivery_enable();
set_bit(_VPF_blocked, &v->pause_flags); //设置block标志位 _VPF_blocked
/* Check for events /after/ blocking: avoids wakeup waiting race. */
if ( local_events_need_delivery() ) { //检测current vcpu的pending位是否被设置以及event delivery是否被允许
clear_bit(_VPF_blocked, &v->pause_flags);
} else {
raise_softirq(SCHEDULE_SOFTIRQ); //current vcpu被标识为block状态,调度其他vcpu运行
}
return 0;
}
4、wake
唤醒指定的vcpu,使其从block状态却换到可运行状态
void vcpu_wake(struct vcpu *v)
{
unsigned long flags;
vcpu_schedule_lock_irqsave(v, flags);
if ( likely(vcpu_runnable(v)) ) {
if ( v->runstate.state >= RUNSTATE_blocked )
vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
SCHED_OP(wake, v); //执行调度器中的wake函数
} else if ( !test_bit(_VPF_blocked, &v->pause_flags) ) {
if ( v->runstate.state == RUNSTATE_blocked )
vcpu_runstate_change(v, RUNSTATE_offline, NOW());
}
vcpu_schedule_unlock_irqrestore(v, flags);
}
重要函数记录:
struct domain *d;
struct vcpu *v;
for_each_vcpu ( d, v ) 遍历domain中的vcpu
{
if ( v != current )
vcpu_pause(v);
}
struct domain *rd;
domid_t dom;
rd = rcu_lock_domain_by_id(dom); 根据domid获取相应的数据结构