[scheduler]三. rt task如何选择目标CPU运行

概述

我们知道当一个task被fork或者wakeup起来的时候,都会选择一个目标CPU,进行入队操作.这个函数就是在core.c里面的select_task_rq函数,之后根据task的调度类型选择进入到不同分支.所以下面就分析下rt下的函数:select_task_rq_rt

select_task_rq_rt

其源码如下:

static int  
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)  
{  
        struct task_struct *curr;  
        struct rq *rq;  
        bool test;  
        /*sd_flag在try_to_wake_up/wake_up_new_task的时候被设置为如下两flag数值.
        其实就是确定在选择cpu的时候,是否做balance操作,如果没有设置,则直接运行在之前运
        行过的CPU上.不在重新选择CPU.*/
        /* For anything but wake ups, just return the task_cpu */  
        if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)  
                goto out;  
  
        rq = cpu_rq(cpu);  
  
        rcu_read_lock();  
        curr = READ_ONCE(rq->curr); /* unlocked access */  
  
        /* 
         * If the current task on @p's runqueue is an RT task, then 
         * try to see if we can wake this RT task up on another 
         * runqueue. Otherwise simply start this RT task 
         * on its current runqueue. 
         * 
         * We want to avoid overloading runqueues. If the woken 
         * task is a higher priority, then it will stay on this CPU 
         * and the lower prio task should be moved to another CPU. 
         * Even though this will probably make the lower prio task 
         * lose its cache, we do not want to bounce a higher task 
         * around just because it gave up its CPU, perhaps for a 
         * lock? 
         * 
         * For equal prio tasks, we just let the scheduler sort it out. 
         * 
         * Otherwise, just let it ride on the affined RQ and the 
         * post-schedule router will push the preempted task away 
         * 
         * This test is optimistic, if we get it wrong the load-balancer 
         * will have to sort it out. 
         * 
         * We take into account the capacity of the CPU to ensure it fits the 
         * requirement of the task - which is only important on heterogeneous 
         * systems like big.LITTLE. 
         */  
        test = curr &&  
               unlikely(rt_task(curr)) &&  
               (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);  
        /*符合下面一个条件即可选择符合的target CPU
         1. test: 确保test为true,就是当前rq上的存在一个rt task,并且(当前rt task只
         能运行在此CPU上或者说当前rt task优先级高于task p),这样为了让p更快的运行,
         只能去选择其他有低优先级task的的cpu了
         2. rt_task_fits_capacity: 如果test=false,但是task p的负载太大了,导致
         cpu的capacity不能容纳此task p,所以也希望p能够去选择一个存在低优先级的cpu作为target cpu
         **上面是选择target cpu的原则.** .*/
        if (test || !rt_task_fits_capacity(p, cpu)) {  
                /*这个就是第二篇文章讲解的cpu priority,即优先选择cpu priority高的cpu(其实
                就是存在低优先级的task的cpu)*/
                int target = find_lowest_rq(p);  
                /* 
                 * Don't bother moving it if the destination CPU is 
                 * not running a lower priority task. 
                 *//*在此确认,target cpu上面没有优先级更高的rt task在运行.这样p
                   才可以在target_cpu上运行.优先级数值越低,优先级越大.*/  
                if (target != -1 &&  
                    p->prio < cpu_rq(target)->rt.highest_prio.curr)  
                        cpu = target;  
        }  
        rcu_read_unlock();  
  
out:  
        return cpu;  
}  
/*uclamp的概念类似与之前ARM的schedtune 功能.这个以后单独章节讲解.其实就是一个限制/提升
 task/task group/rq util的功能*/
#ifdef CONFIG_UCLAMP_TASK  
/* 
 * Verify the fitness of task @p to run on @cpu taking into account the uclamp 
 * settings. 
 * 
 * This check is only important for heterogeneous systems where uclamp_min value 
 * is higher than the capacity of a @cpu. For non-heterogeneous system this 
 * function will always return true. 
 * 
 * The function will return true if the capacity of the @cpu is >= the 
 * uclamp_min and false otherwise. 
 * 
 * Note that uclamp_min will be clamped to uclamp_max if uclamp_min 
 * > uclamp_max. 
 */  
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)  
{  
        unsigned int min_cap;  
        unsigned int max_cap;  
        unsigned int cpu_cap;  
        /*只有ASYM系统才能去检测是否cpu 容量满足task运行*/
        /* Only heterogeneous systems can benefit from this check */  
        if (!static_branch_unlikely(&sched_asym_cpucapacity))  
                return true;  
        /*获取进程p uclamp max/min value*/
        min_cap = uclamp_eff_value(p, UCLAMP_MIN);  
        max_cap = uclamp_eff_value(p, UCLAMP_MAX);  
        /*获取cpu的物理capacity.*/  
        cpu_cap = capacity_orig_of(cpu);  
        /*task p经过uclamp改变其util之后,cpu是否还能容纳p在其上面运行.*/
        return cpu_cap >= min(min_cap, max_cap);  
}  
#else  
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)  
{  
        return true;  
}  
#endif  

find_lowest_rq

接下来分析find_lowest_rq原理:

static int find_lowest_rq(struct task_struct *task)  
{  
        struct sched_domain *sd;  
        struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);  
        int this_cpu = smp_processor_id();  
        int cpu      = task_cpu(task);  
  
        /* Make sure the mask is initialized first */  
        if (unlikely(!lowest_mask))  
                return -1;  
       /*cpu affinity限制,不能运行在其他CPU上.*/
        if (task->nr_cpus_allowed == 1)  
                return -1; /* No other targets possible */  
        /*遍历task->prio转化为cpu priority低的第一个非空的cpumask,赋值给lowest_mask
          为何需要这样做呢? */
/* 
 * If we're on asym system ensure we consider the different capacities 
 * of the CPUs when searching for the lowest_mask. 
 */  
if (static_branch_unlikely(&sched_asym_cpucapacity)) {  
              /*考虑容量大小*/
       	 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,  
             	                     task, lowest_mask,  
             		                     rt_task_fits_capacity);  
} else {  
              /*不考虑容量大小,因为所有CPU都是一致的.*/
       	 ret = cpupri_find(&task_rq(task)->rd->cpupri,  
                          task, lowest_mask);  
}  
  
if (!ret)  
       	 return -1; /* No targets found */  
 
  
        /* 
         * At this point we have built a mask of CPUs representing the 
         * lowest priority tasks in the system.  Now we want to elect 
         * the best one based on our affinity and topology. 
         * 
         * We prioritize the last CPU that the task executed on since 
         * it is most likely cache-hot in that location. 
         *//*task最后一次运行的cpu也在此lowest_mask里面,考虑cache hot,可以直接使用这个
  	 	cpu作为target_cpu*/  
        if (cpumask_test_cpu(cpu, lowest_mask))  
                return cpu;  
  
        /* 
         * Otherwise, we consult the sched_domains span maps to figure 
         * out which CPU is logically closest to our hot cache data. 
         */  
        if (!cpumask_test_cpu(this_cpu, lowest_mask))  
                this_cpu = -1; /* Skip this_cpu opt if not among lowest */  
  
        rcu_read_lock();  
        /*下面的思想比较简单,
          1.唤醒此task的cpu在lowest_mask内,并且在当前sd->span内,则直接最为target_cpu
          2.如果当前CPU不会lowest_mask或者sd->span内,则直接在lowest_mask&sd->span选择第一  
            个CPU id*/
        for_each_domain(cpu, sd) {
                /*唤醒task的那个唤醒者把CPU唤醒.一般都是设置这个flag的.*/  
                if (sd->flags & SD_WAKE_AFFINE) {  
                        int best_cpu;  
  
                        /* 
                         * "this_cpu" is cheaper to preempt than a 
                         * remote processor. 
                         */  
                        if (this_cpu != -1 &&  
                            cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {  
                                rcu_read_unlock();  
                                return this_cpu;  
                        }  
  
                        best_cpu = cpumask_first_and(lowest_mask,  
                                                     sched_domain_span(sd));  
                        if (best_cpu < nr_cpu_ids) {  
                                rcu_read_unlock();  
                                return best_cpu;  
                        }  
                }  
        }  
        rcu_read_unlock();  
  
        /* 
         * And finally, if there were no matches within the domains 
         * just give the caller *something* to work with from the compatible 
         * locations. 
         */  
        if (this_cpu != -1)  
                return this_cpu;  
  
        cpu = cpumask_any(lowest_mask);  
        if (cpu < nr_cpu_ids)  
                return cpu;  
  
        return -1;  
}  

比较简单,核心函数:cpupri_find_fitness,在这个章节已经分析完毕: [scheduler]二. CPU priority概念以及原理

总之是层层推进,选择符合要求的cpu id.

你可能感兴趣的:(linux,kerne,rt,scheduler)