我们知道当一个task被fork或者wakeup起来的时候,都会选择一个目标CPU,进行入队操作.这个函数就是在core.c里面的select_task_rq函数,之后根据task的调度类型选择进入到不同分支.所以下面就分析下rt下的函数:select_task_rq_rt
其源码如下:
static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
bool test;
/*sd_flag在try_to_wake_up/wake_up_new_task的时候被设置为如下两flag数值.
其实就是确定在选择cpu的时候,是否做balance操作,如果没有设置,则直接运行在之前运
行过的CPU上.不在重新选择CPU.*/
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
goto out;
rq = cpu_rq(cpu);
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
/*
* If the current task on @p's runqueue is an RT task, then
* try to see if we can wake this RT task up on another
* runqueue. Otherwise simply start this RT task
* on its current runqueue.
*
* We want to avoid overloading runqueues. If the woken
* task is a higher priority, then it will stay on this CPU
* and the lower prio task should be moved to another CPU.
* Even though this will probably make the lower prio task
* lose its cache, we do not want to bounce a higher task
* around just because it gave up its CPU, perhaps for a
* lock?
*
* For equal prio tasks, we just let the scheduler sort it out.
*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*
* We take into account the capacity of the CPU to ensure it fits the
* requirement of the task - which is only important on heterogeneous
* systems like big.LITTLE.
*/
test = curr &&
unlikely(rt_task(curr)) &&
(curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
/*符合下面一个条件即可选择符合的target CPU
1. test: 确保test为true,就是当前rq上的存在一个rt task,并且(当前rt task只
能运行在此CPU上或者说当前rt task优先级高于task p),这样为了让p更快的运行,
只能去选择其他有低优先级task的的cpu了
2. rt_task_fits_capacity: 如果test=false,但是task p的负载太大了,导致
cpu的capacity不能容纳此task p,所以也希望p能够去选择一个存在低优先级的cpu作为target cpu
**上面是选择target cpu的原则.** .*/
if (test || !rt_task_fits_capacity(p, cpu)) {
/*这个就是第二篇文章讲解的cpu priority,即优先选择cpu priority高的cpu(其实
就是存在低优先级的task的cpu)*/
int target = find_lowest_rq(p);
/*
* Don't bother moving it if the destination CPU is
* not running a lower priority task.
*//*在此确认,target cpu上面没有优先级更高的rt task在运行.这样p
才可以在target_cpu上运行.优先级数值越低,优先级越大.*/
if (target != -1 &&
p->prio < cpu_rq(target)->rt.highest_prio.curr)
cpu = target;
}
rcu_read_unlock();
out:
return cpu;
}
/*uclamp的概念类似与之前ARM的schedtune 功能.这个以后单独章节讲解.其实就是一个限制/提升
task/task group/rq util的功能*/
#ifdef CONFIG_UCLAMP_TASK
/*
* Verify the fitness of task @p to run on @cpu taking into account the uclamp
* settings.
*
* This check is only important for heterogeneous systems where uclamp_min value
* is higher than the capacity of a @cpu. For non-heterogeneous system this
* function will always return true.
*
* The function will return true if the capacity of the @cpu is >= the
* uclamp_min and false otherwise.
*
* Note that uclamp_min will be clamped to uclamp_max if uclamp_min
* > uclamp_max.
*/
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned int min_cap;
unsigned int max_cap;
unsigned int cpu_cap;
/*只有ASYM系统才能去检测是否cpu 容量满足task运行*/
/* Only heterogeneous systems can benefit from this check */
if (!static_branch_unlikely(&sched_asym_cpucapacity))
return true;
/*获取进程p uclamp max/min value*/
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
max_cap = uclamp_eff_value(p, UCLAMP_MAX);
/*获取cpu的物理capacity.*/
cpu_cap = capacity_orig_of(cpu);
/*task p经过uclamp改变其util之后,cpu是否还能容纳p在其上面运行.*/
return cpu_cap >= min(min_cap, max_cap);
}
#else
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
#endif
接下来分析find_lowest_rq原理:
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
return -1;
/*cpu affinity限制,不能运行在其他CPU上.*/
if (task->nr_cpus_allowed == 1)
return -1; /* No other targets possible */
/*遍历task->prio转化为cpu priority低的第一个非空的cpumask,赋值给lowest_mask
为何需要这样做呢? */
/*
* If we're on asym system ensure we consider the different capacities
* of the CPUs when searching for the lowest_mask.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
/*考虑容量大小*/
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
task, lowest_mask,
rt_task_fits_capacity);
} else {
/*不考虑容量大小,因为所有CPU都是一致的.*/
ret = cpupri_find(&task_rq(task)->rd->cpupri,
task, lowest_mask);
}
if (!ret)
return -1; /* No targets found */
/*
* At this point we have built a mask of CPUs representing the
* lowest priority tasks in the system. Now we want to elect
* the best one based on our affinity and topology.
*
* We prioritize the last CPU that the task executed on since
* it is most likely cache-hot in that location.
*//*task最后一次运行的cpu也在此lowest_mask里面,考虑cache hot,可以直接使用这个
cpu作为target_cpu*/
if (cpumask_test_cpu(cpu, lowest_mask))
return cpu;
/*
* Otherwise, we consult the sched_domains span maps to figure
* out which CPU is logically closest to our hot cache data.
*/
if (!cpumask_test_cpu(this_cpu, lowest_mask))
this_cpu = -1; /* Skip this_cpu opt if not among lowest */
rcu_read_lock();
/*下面的思想比较简单,
1.唤醒此task的cpu在lowest_mask内,并且在当前sd->span内,则直接最为target_cpu
2.如果当前CPU不会lowest_mask或者sd->span内,则直接在lowest_mask&sd->span选择第一
个CPU id*/
for_each_domain(cpu, sd) {
/*唤醒task的那个唤醒者把CPU唤醒.一般都是设置这个flag的.*/
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;
/*
* "this_cpu" is cheaper to preempt than a
* remote processor.
*/
if (this_cpu != -1 &&
cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
rcu_read_unlock();
return this_cpu;
}
best_cpu = cpumask_first_and(lowest_mask,
sched_domain_span(sd));
if (best_cpu < nr_cpu_ids) {
rcu_read_unlock();
return best_cpu;
}
}
}
rcu_read_unlock();
/*
* And finally, if there were no matches within the domains
* just give the caller *something* to work with from the compatible
* locations.
*/
if (this_cpu != -1)
return this_cpu;
cpu = cpumask_any(lowest_mask);
if (cpu < nr_cpu_ids)
return cpu;
return -1;
}
比较简单,核心函数:cpupri_find_fitness,在这个章节已经分析完毕: [scheduler]二. CPU priority概念以及原理
总之是层层推进,选择符合要求的cpu id.