在这里面会启动对应的governor。
static void cpufreq_init_policy(struct cpufreq_policy *policy)
{
struct cpufreq_policy new_policy;
int ret = 0;
memcpy(&new_policy, policy, sizeof(*policy));
/* assure that the starting sequence is run in cpufreq_set_policy */
policy->governor = NULL;
/* set default policy */
ret = cpufreq_set_policy(policy, &new_policy);
policy->user_policy.policy = policy->policy;
policy->user_policy.governor = policy->governor;
if (ret) {
pr_debug("setting policy failed\n");
if (cpufreq_driver->exit)
cpufreq_driver->exit(policy);
}
}
/*
* policy : current policy.
* new_policy: policy to be set.
*/
static int cpufreq_set_policy(struct cpufreq_policy *policy,
struct cpufreq_policy *new_policy)
{
int ret = 0, failed = 1;
pr_debug("setting new policy for CPU %u: %u - %u kHz\n", new_policy->cpu,
new_policy->min, new_policy->max);
memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
if (new_policy->min > policy->user_policy.max
|| new_policy->max < policy->user_policy.min) {
ret = -EINVAL;
goto error_out;
}
/* verify the cpu speed can be set within this limit */
//检测policy对应的cpu 最大最小频率是否符合要求
ret = cpufreq_driver->verify(new_policy);
if (ret)
goto error_out;
/* adjust if necessary - all reasons */
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_ADJUST, new_policy);
/* adjust if necessary - hardware incompatibility*/
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_INCOMPATIBLE, new_policy);
/*
* verify the cpu speed can be set within this limit, which might be
* different to the first one
*/
ret = cpufreq_driver->verify(new_policy);
if (ret)
goto error_out;
/* notification of the new policy */
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_NOTIFY, new_policy);
policy->min = new_policy->min;
policy->max = new_policy->max;
trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu);
pr_debug("new min and max freqs are %u - %u kHz\n",
policy->min, policy->max);
if (cpufreq_driver->setpolicy) {//驱动未实现此函数
policy->policy = new_policy->policy;
pr_debug("setting range\n");
ret = cpufreq_driver->setpolicy(new_policy);
} else {
if (new_policy->governor != policy->governor) {//前面设置了policy->governor为NULL,两个肯定不同
/* save old, working values */
struct cpufreq_governor *old_gov = policy->governor;
pr_debug("governor switch\n");
/* end old governor */
if (policy->governor) {
__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
up_write(&policy->rwsem);
__cpufreq_governor(policy,
CPUFREQ_GOV_POLICY_EXIT);
down_write(&policy->rwsem);
}
/* start new governor */
//执行governor init,start,limit函数
policy->governor = new_policy->governor;
if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) {
if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) {
failed = 0;
} else {
up_write(&policy->rwsem);
__cpufreq_governor(policy,
CPUFREQ_GOV_POLICY_EXIT);
down_write(&policy->rwsem);
}
}
if (failed) {
/* new governor failed, so re-start old one */
pr_debug("starting governor %s failed\n",
policy->governor->name);
if (old_gov) {
policy->governor = old_gov;
__cpufreq_governor(policy,
CPUFREQ_GOV_POLICY_INIT);
__cpufreq_governor(policy,
CPUFREQ_GOV_START);
}
ret = -EINVAL;
goto error_out;
}
/* might be a policy change, too, so fall through */
}
pr_debug("governor: change or update limits\n");
ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
}
error_out:
return ret;
}
static int __cpufreq_governor(struct cpufreq_policy *policy,
unsigned int event)
{
int ret;
/* Only must be defined when default governor is known to have latency
restrictions, like e.g. conservative or ondemand.
That this is the case is already ensured in Kconfig
*/
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
struct cpufreq_governor *gov = &cpufreq_gov_performance;
#else
struct cpufreq_governor *gov = NULL;
#endif
if (policy->governor->max_transition_latency &&
policy->cpuinfo.transition_latency >
policy->governor->max_transition_latency) {
if (!gov)
return -EINVAL;
else {
printk(KERN_WARNING "%s governor failed, too long"
" transition latency of HW, fallback"
" to %s governor\n",
policy->governor->name,
gov->name);
policy->governor = gov;
}
}
if (event == CPUFREQ_GOV_POLICY_INIT)
if (!try_module_get(policy->governor->owner))
return -EINVAL;
pr_debug("__cpufreq_governor for CPU %u, event %u\n",
policy->cpu, event);
mutex_lock(&cpufreq_governor_lock);
if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
|| (!policy->governor_enabled
&& (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
mutex_unlock(&cpufreq_governor_lock);
return -EBUSY;
}
if (event == CPUFREQ_GOV_STOP)
policy->governor_enabled = false;
else if (event == CPUFREQ_GOV_START)
policy->governor_enabled = true;
mutex_unlock(&cpufreq_governor_lock);
//执行governor主体函数
//初始化时先采用performance模式,以最高频率运行,保证开机速度,
//之后会通过transition notify切换到interactive
ret = policy->governor->governor(policy, event);
if (!ret) {
if (event == CPUFREQ_GOV_POLICY_INIT)
policy->governor->initialized++;
else if (event == CPUFREQ_GOV_POLICY_EXIT)
policy->governor->initialized--;
} else {
/* Restore original values */
mutex_lock(&cpufreq_governor_lock);
if (event == CPUFREQ_GOV_STOP)
policy->governor_enabled = true;
else if (event == CPUFREQ_GOV_START)
policy->governor_enabled = false;
mutex_unlock(&cpufreq_governor_lock);
}
if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
((event == CPUFREQ_GOV_POLICY_EXIT) && !ret))
module_put(policy->governor->owner);
return ret;
}
//interactive 使用到的数据结构,interactive是通过定时采样cpu workload,确定cpu的工作频率的,
//主要数据结构是timer跟struct cpufreq_interactive_tunables
struct cpufreq_interactive_cpuinfo {
//重要的两个timer list
struct timer_list cpu_timer;
struct timer_list cpu_slack_timer;
spinlock_t load_lock; /* protects the next 4 fields */
u64 time_in_idle; //cpu idle 时间
u64 time_in_idle_timestamp;//更新time_in_idle时间戳
u64 cputime_speedadj;
u64 cputime_speedadj_timestamp; //cputime_speedadj_timestamp=time_in_idle_timestamp,定时器每次启动时统计idle的时间戳
u64 last_evaluated_jiffy;
struct cpufreq_policy *policy;
struct cpufreq_frequency_table *freq_table; //frequency?
spinlock_t target_freq_lock; /*protects target freq */
unsigned int target_freq;//目标频率
unsigned int floor_freq;
unsigned int max_freq;
unsigned int min_freq;
u64 floor_validate_time;
u64 local_fvtime; /* per-cpu floor_validate_time */
u64 hispeed_validate_time; /* cluster hispeed_validate_time */
u64 local_hvtime; /* per-cpu hispeed_validate_time */
u64 max_freq_hyst_start_time;
struct rw_semaphore enable_sem;
bool reject_notification;
int governor_enabled;//governor有效标示
struct cpufreq_interactive_tunables *cached_tunables;
int first_cpu;
};
static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
/* realtime thread handles frequency scaling */
static struct task_struct *speedchange_task;
static cpumask_t speedchange_cpumask;
static spinlock_t speedchange_cpumask_lock;
static struct mutex gov_lock;
static int set_window_count;
static int migration_register_count;
static struct mutex sched_lock;
/* Target load. Lower values result in higher CPU speeds. */
#define DEFAULT_TARGET_LOAD 90
static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
static unsigned int default_above_hispeed_delay[] = {//20000us
DEFAULT_ABOVE_HISPEED_DELAY };
/*
*/
struct cpufreq_interactive_tunables {
int usage_count;//tunable引用计数
/* Hi speed to bump to from lo speed when load burst (default max) */
//负载超过go_hispeed_load时,频率就被增大到此数值,默认为policy初始化时最大值
//这个值是个中间值,高负载持续时间超过above_hispeed_delay,cpu频率继续升高
unsigned int hispeed_freq;
/* Go to hi speed when CPU load at or above this value. */
#define DEFAULT_GO_HISPEED_LOAD 99
//高频阈值,默认是99%,超过此值就提高cpu 频率,否则降频
unsigned long go_hispeed_load;
/* Target load. Lower values result in higher CPU speeds. */
spinlock_t target_loads_lock;
//数组,表示cpu期望的负载,cpu需要调整频率,使得当前负载接近这个值
//这个值越小,cpu频率就越高,此值是取得频率值
unsigned int *target_loads;
int ntarget_loads;//target_loads 数组大小
/*
* The minimum amount of time to spend at a frequency before we can ramp
* down.
*/
#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)//80ms采样一次
unsigned long min_sample_time;//最小采样时间80000us
/*
* The sample rate of the timer used to increase frequency
*/
//当CPU不处于idel状态时,timer_rate作为定时器采样速率来计算CPU的workload
unsigned long timer_rate;
/*
* Wait this long before raising speed above hispeed, by default a
* single timer interval.
*/
spinlock_t above_hispeed_delay_lock;
//当CPU频率大于等于hispeed_freq,并且此时workload仍在不停增加(continued high load),系统将等待一个above_hispeed_delay的时间,再提升cpu频率
unsigned int *above_hispeed_delay;//此变量是个数组,不同的频率范围,delay时间不同
//default_above_hispeed_delays数组中元素的个数
int nabove_hispeed_delay;
/* Non-zero means indefinite speed boost active */
/*
echo 1 > /sys/devices/system/cpu/cpufreq/interactive/boost
此时会立即将所有CPU的频率提高到至少hispeed_freq.写入0时,根据workload降低频率.默认为0.
boostpulse,每次触发boost功能时,立即拉高所有CPU的频率到hispeed_freq并保持在该频率至少boostpulse_duration的时间,在这段时间以后,根据当前的workload,频率才允许被降低。
*/
int boost_val;
/* Duration of a boot pulse in usecs */
//每次超频的持续时间
int boostpulse_duration_val;
/* End time of boost pulse in ktime converted to usecs */
//超频结束时间
u64 boostpulse_endtime;
bool boosted;//超频,echo 1 > /sys/devices/system/cpu/cpufreq/interactive/boost, CPU的频率提高到至少hispeed_freq
/*
* Max additional time to wait in idle, beyond timer_rate, at speeds
* above minimum before wakeup to reduce speed, or -1 if unnecessary.
*/
/*
当CPU处于idel状态,此时使用一个可延时定时器,
会导致CPU不能从idel状态苏醒来响应定时器.
定时器的最大的可延时时间用timer_slack表示,默认值80000 uS.
此处采用默认值
*/
#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
int timer_slack_val;
bool io_is_busy;
/* scheduler input related flags */
bool use_sched_load;
bool use_migration_notif;
/*
* Whether to align timer windows across all CPUs. When
* use_sched_load is true, this flag is ignored and windows
* will always be aligned.
*/
bool align_windows;
/*
* Stay at max freq for at least max_freq_hysteresis before dropping
* frequency.
*/
unsigned int max_freq_hysteresis;
};
//interactive初始化,初始化每个cpu定时器,创建内核线程,并将governor注册到cpufreq中
static int __init cpufreq_interactive_init(void)
{
unsigned int i;
struct cpufreq_interactive_cpuinfo *pcpu;
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
/* Initalize per-cpu timers */
//遍历cpu,初始化每个cpu timer
for_each_possible_cpu(i) {
pcpu = &per_cpu(cpuinfo, i);
init_timer_deferrable(&pcpu->cpu_timer);
pcpu->cpu_timer.function = cpufreq_interactive_timer;
pcpu->cpu_timer.data = i; //cpu id
init_timer(&pcpu->cpu_slack_timer);//初始化可延期定时器
pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
spin_lock_init(&pcpu->load_lock);
spin_lock_init(&pcpu->target_freq_lock);
init_rwsem(&pcpu->enable_sem);
}
spin_lock_init(&speedchange_cpumask_lock);
mutex_init(&gov_lock);
mutex_init(&sched_lock);
speedchange_task =
kthread_create(cpufreq_interactive_speedchange_task, NULL,
"cfinteractive");
if (IS_ERR(speedchange_task))
return PTR_ERR(speedchange_task);
sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m);
get_task_struct(speedchange_task);
/* NB: wake up so the thread does not look hung to the freezer */
//创建内核线程后,加入cpu的运行队列,等待调度执行
wake_up_process(speedchange_task);
//注册governor到内核
return cpufreq_register_governor(&cpufreq_gov_interactive);
}
//在cpufreq中会调用到interactive主体函数cpufreq_governor_interactive
//
static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
unsigned int event)
{
int rc;
unsigned int j;
struct cpufreq_interactive_cpuinfo *pcpu;
struct cpufreq_frequency_table *freq_table;
struct cpufreq_interactive_tunables *tunables;
unsigned long flags;
int first_cpu;
//每个policy有不同的governor,高通在dst中有设置此项,每个governor有自己的tunables
if (have_governor_per_policy())
tunables = policy->governor_data;
else
tunables = common_tunables;
BUG_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT));
switch (event) {
case CPUFREQ_GOV_POLICY_INIT:
if (have_governor_per_policy()) {
WARN_ON(tunables);
} else if (tunables) {//公用一个governor的tunables
tunables->usage_count++;
policy->governor_data = tunables;
return 0;
}
//policy管理的第一个cpu
first_cpu = cpumask_first(policy->related_cpus);
for_each_cpu(j, policy->related_cpus)//遍历policy管理的所有cpu,并给其中的first_cpu赋值
per_cpu(cpuinfo, j).first_cpu = first_cpu;
//获取cpu 0或者policy 第一个cpu的cached_tunables
tunables = restore_tunables(policy);
if (!tunables) {
tunables = alloc_tunable(policy);//分配一个tunables结构
if (IS_ERR(tunables))
return PTR_ERR(tunables);
}
tunables->usage_count = 1;
policy->governor_data = tunables;
if (!have_governor_per_policy()) {
WARN_ON(cpufreq_get_global_kobject());
common_tunables = tunables; //公用tunable
}
//在cpufreq目录下创建interactive目录,并建立属性文件
rc = sysfs_create_group(get_governor_parent_kobj(policy),
get_sysfs_attr());
if (rc) {
kfree(tunables);
policy->governor_data = NULL;
if (!have_governor_per_policy()) {
common_tunables = NULL;
cpufreq_put_global_kobject();
}
return rc;
}
//governor未初始化,需要注册notify
if (!policy->governor->initialized) {
idle_notifier_register(&cpufreq_interactive_idle_nb);//idle notify
cpufreq_register_notifier(&cpufreq_notifier_block, //cpu change freq notify
CPUFREQ_TRANSITION_NOTIFIER);
}
if (tunables->use_sched_load)
cpufreq_interactive_enable_sched_input(tunables);
break;
case CPUFREQ_GOV_POLICY_EXIT:
if (!--tunables->usage_count) {
if (policy->governor->initialized == 1) {//取消notify注册
cpufreq_unregister_notifier(&cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
idle_notifier_unregister(&cpufreq_interactive_idle_nb);
}
//移除interactive sysfs
sysfs_remove_group(get_governor_parent_kobj(policy),
get_sysfs_attr());
if (!have_governor_per_policy())
cpufreq_put_global_kobject();
common_tunables = NULL;
}
policy->governor_data = NULL;
if (tunables->use_sched_load)
cpufreq_interactive_disable_sched_input(tunables);
break;
case CPUFREQ_GOV_START://关键是启动workload采样定时器
mutex_lock(&gov_lock);
//得到cpu频率表
freq_table = cpufreq_frequency_get_table(policy->cpu);
if (!tunables->hispeed_freq)
tunables->hispeed_freq = policy->max;
//遍历policy管理的所有cpu
for_each_cpu(j, policy->cpus) {
pcpu = &per_cpu(cpuinfo, j);
pcpu->policy = policy;
pcpu->target_freq = policy->cur;
pcpu->freq_table = freq_table;
pcpu->floor_freq = pcpu->target_freq;
pcpu->floor_validate_time =
ktime_to_us(ktime_get());
pcpu->local_fvtime = pcpu->floor_validate_time;
pcpu->hispeed_validate_time =
pcpu->floor_validate_time;
pcpu->local_hvtime = pcpu->floor_validate_time;
pcpu->max_freq = policy->max;
pcpu->min_freq = policy->min;
pcpu->reject_notification = true;
down_write(&pcpu->enable_sem);
del_timer_sync(&pcpu->cpu_timer);//cpu 计算workload定时器
del_timer_sync(&pcpu->cpu_slack_timer);//cpu idle状态定时器
pcpu->last_evaluated_jiffy = get_jiffies_64();
cpufreq_interactive_timer_start(tunables, j);
pcpu->governor_enabled = 1;
up_write(&pcpu->enable_sem);
pcpu->reject_notification = false;
}
mutex_unlock(&gov_lock);
break;
case CPUFREQ_GOV_STOP://停止定时器工作
mutex_lock(&gov_lock);
for_each_cpu(j, policy->cpus) {
pcpu = &per_cpu(cpuinfo, j);
pcpu->reject_notification = true;
down_write(&pcpu->enable_sem);
pcpu->governor_enabled = 0;
pcpu->target_freq = 0;
del_timer_sync(&pcpu->cpu_timer);
del_timer_sync(&pcpu->cpu_slack_timer);
up_write(&pcpu->enable_sem);
pcpu->reject_notification = false;
}
mutex_unlock(&gov_lock);
break;
case CPUFREQ_GOV_LIMITS://设定目标频率后,修改定时器
__cpufreq_driver_target(policy,
policy->cur, CPUFREQ_RELATION_L);
for_each_cpu(j, policy->cpus) {
pcpu = &per_cpu(cpuinfo, j);
down_read(&pcpu->enable_sem);
if (pcpu->governor_enabled == 0) {
up_read(&pcpu->enable_sem);
continue;
}
spin_lock_irqsave(&pcpu->target_freq_lock, flags);
if (policy->max < pcpu->target_freq)
pcpu->target_freq = policy->max;
else if (policy->min > pcpu->target_freq)
pcpu->target_freq = policy->min;
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
if (policy->min < pcpu->min_freq)
cpufreq_interactive_timer_resched(j, true);
pcpu->min_freq = policy->min;
up_read(&pcpu->enable_sem);
/* Reschedule timer only if policy->max is raised.
* Delete the timers, else the timer callback may
* return without re-arm the timer when failed
* acquire the semaphore. This race may cause timer
* stopped unexpectedly.
*/
if (policy->max > pcpu->max_freq) {
pcpu->reject_notification = true;
down_write(&pcpu->enable_sem);
del_timer_sync(&pcpu->cpu_timer);
del_timer_sync(&pcpu->cpu_slack_timer);
cpufreq_interactive_timer_resched(j, false);
up_write(&pcpu->enable_sem);
pcpu->reject_notification = false;
}
pcpu->max_freq = policy->max;
}
break;
}
return 0;
}
/*继续分析定时器操作,定期计算cpu 当前workload,确定调频的目标频率,调整cpu频率
cpu workload简单来讲就是cpu 活跃时间占采样间隔的百分比
参数data是cpu id ,is_notif=false
*/
static void __cpufreq_interactive_timer(unsigned long data, bool is_notif)
{
u64 now;
unsigned int delta_time;
u64 cputime_speedadj;
int cpu_load;
struct cpufreq_interactive_cpuinfo *pcpu =
&per_cpu(cpuinfo, data);
//cpu 调频信息
struct cpufreq_interactive_tunables *tunables =
pcpu->policy->governor_data;
unsigned int new_freq;
unsigned int loadadjfreq;
unsigned int index;
unsigned long flags;
struct cpufreq_govinfo int_info;
u64 max_fvtime;
if (!down_read_trylock(&pcpu->enable_sem))
return;
if (!pcpu->governor_enabled)
goto exit;
spin_lock_irqsave(&pcpu->load_lock, flags);
pcpu->last_evaluated_jiffy = get_jiffies_64();//获取当前系统启动时间
now = update_load(data);//获取开机总时长
if (tunables->use_sched_load) {//这个值为1 ,表示使用调度器计算出来的负载
/*
* Unlock early to avoid deadlock.
*
* load_change_callback() for thread migration already
* holds rq lock. Then it locks load_lock to avoid racing
* with cpufreq_interactive_timer_resched/start().
* sched_get_busy() will also acquire rq lock. Thus we
* can't hold load_lock when calling sched_get_busy().
*
* load_lock used in this function protects time
* and load information. These stats are not used when
* scheduler input is available. Thus unlocking load_lock
* early is perfectly OK.
*/
spin_unlock_irqrestore(&pcpu->load_lock, flags);
//cpu工作总输出
cputime_speedadj = (u64)sched_get_busy(data) *
pcpu->policy->cpuinfo.max_freq;
do_div(cputime_speedadj, tunables->timer_rate);//一个定时周期内的cpu 工作量
//cputime_speedadj=cputime_speedadj/tunables->timer_rate
} else {
delta_time = (unsigned int)//两次统计idle时间的间隔
(now - pcpu->cputime_speedadj_timestamp);
cputime_speedadj = pcpu->cputime_speedadj;//cpu活跃时间跟频率的乘积
spin_unlock_irqrestore(&pcpu->load_lock, flags);
if (WARN_ON_ONCE(!delta_time))
goto rearm;
do_div(cputime_speedadj, delta_time);
}
//kernel不支持浮点运算,才会进行转换 *100运算结果不会出现小数
loadadjfreq = (unsigned int)cputime_speedadj * 100;
int_info.cpu = data;
int_info.load = loadadjfreq / pcpu->policy->max;
int_info.sampling_rate_us = tunables->timer_rate;
//notify通过qcom驱动检查当前workload perf_govinfo_notify
atomic_notifier_call_chain(&cpufreq_govinfo_notifier_list,
CPUFREQ_LOAD_CHANGE, &int_info);
spin_lock_irqsave(&pcpu->target_freq_lock, flags);
//cpu负载跟cpu 工作频率乘积当做一段时间内cpu工作总量
cpu_load = loadadjfreq / pcpu->policy->cur;
//是否有开启超频
tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime;
/*
cpu_load 大于go_hispeed_load或者开启超频后,
new_freq需要设置为大于等于tunables->hispeed_freq
*/
if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) {
if (pcpu->policy->cur < tunables->hispeed_freq &&
cpu_load <= MAX_LOCAL_LOAD) {//当前频率未达到最大频率,可以直接设置为最大频率
new_freq = tunables->hispeed_freq;
} else {//此时需要选择比hispeed_freq更大的频率
new_freq = choose_freq(pcpu, loadadjfreq);
if (new_freq < tunables->hispeed_freq)
new_freq = tunables->hispeed_freq;
}
} else {//choose_freq 设定新的频率,此时需要降频处理
new_freq = choose_freq(pcpu, loadadjfreq);
}
//检测是否有达到改变频率的条件,尤其是两次采样间隔小于调整到对应频率
//的delay时间的话,就跳过调频
if (cpu_load <= MAX_LOCAL_LOAD &&
pcpu->policy->cur >= tunables->hispeed_freq &&
new_freq > pcpu->policy->cur &&
now - pcpu->hispeed_validate_time <
freq_to_above_hispeed_delay(tunables, pcpu->policy->cur)) {//不同频率,调频等待的间隔不同
trace_cpufreq_interactive_notyet(
data, cpu_load, pcpu->target_freq,
pcpu->policy->cur, new_freq);
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
goto rearm;
}
pcpu->local_hvtime = now;
//取freq table中大于或等于new_freq的频率中最小的一个频率
if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
new_freq, CPUFREQ_RELATION_L,
&index)) {
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
goto rearm;
}
new_freq = pcpu->freq_table[index].frequency;
//此条件下,同样不需要调频
if (!is_notif && new_freq < pcpu->target_freq &&
now - pcpu->max_freq_hyst_start_time <
tunables->max_freq_hysteresis) {
trace_cpufreq_interactive_notyet(data, cpu_load,
pcpu->target_freq, pcpu->policy->cur, new_freq);
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
goto rearm;
}
/*
* Do not scale below floor_freq unless we have been at or above the
* floor frequency for the minimum sample time since last validated.
*/
//当new_freq < pcpu->floor_freq,并且两次floor_validate_time的间隔小于min_sample_time,此时不需要更新频率
max_fvtime = max(pcpu->floor_validate_time, pcpu->local_fvtime);
if (!is_notif && new_freq < pcpu->floor_freq &&
pcpu->target_freq >= pcpu->policy->cur) {
if (now - max_fvtime < tunables->min_sample_time) {
trace_cpufreq_interactive_notyet(
data, cpu_load, pcpu->target_freq,
pcpu->policy->cur, new_freq);
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
goto rearm;
}
}
/*
* Update the timestamp for checking whether speed has been held at
* or above the selected frequency for a minimum of min_sample_time,
* if not boosted to hispeed_freq. If boosted to hispeed_freq then we
* allow the speed to drop as soon as the boostpulse duration expires
* (or the indefinite boost is turned off).
*/
//不超频,并且new_freq>hispeed_freq
if (!tunables->boosted || new_freq > tunables->hispeed_freq) {
pcpu->floor_freq = new_freq;
if (pcpu->target_freq >= pcpu->policy->cur ||
new_freq >= pcpu->policy->cur)
pcpu->local_fvtime = now;
}
if (new_freq == pcpu->policy->max)
pcpu->max_freq_hyst_start_time = now;
if (pcpu->target_freq == new_freq) {
trace_cpufreq_interactive_already(
data, cpu_load, pcpu->target_freq,
pcpu->policy->cur, new_freq);
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
goto rearm;
}
trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
pcpu->policy->cur, new_freq);
pcpu->target_freq = new_freq;//更新本次计算后最终的目标频率,更新之前表示上次选频的目标频率
spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
spin_lock_irqsave(&speedchange_cpumask_lock, flags);
cpumask_set_cpu(data, &speedchange_cpumask);//当前cpu加入调频cpu列表
spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
wake_up_process(speedchange_task);//唤醒调频处理线程,这个线程里面进行真正的调频处理
rearm:
if (!timer_pending(&pcpu->cpu_timer))//跳过此次调频操作,重启定时器
cpufreq_interactive_timer_resched(data, false);
exit:
up_read(&pcpu->enable_sem);
return;
}
static u64 update_load(int cpu)
{
struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
struct cpufreq_interactive_tunables *tunables =
pcpu->policy->governor_data;
u64 now;
u64 now_idle;
unsigned int delta_idle;
unsigned int delta_time;
u64 active_time;
//now_idle:cpu自启动后总的idle时间,now:总开机时间,当前时间戳
now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy);
delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle);//上次计算workload时idle差值
delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp);//距离上次计算负载的时间间隔
if (delta_time <= delta_idle)
active_time = 0;
else
active_time = delta_time - delta_idle;//cpu 活跃时间
//一个定时周期的cpu活跃时间跟当前运行频率的乘积,
//每次定时器启动时cputime_speedadj被设置为0,
//此值可以表示定时周期内cpu工作的总输出
pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
pcpu->time_in_idle = now_idle;//idle时间戳
pcpu->time_in_idle_timestamp = now;//time_in_idle_timestamp:每次计算负载的时间戳
return now;
}
/*
choose_freq函数用来选频,使选频后的系统workload小于或等于target load
核心思想是:选择最小的频率来满足target load
loadadjfreq一段时间内工作量
*/
static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu,
unsigned int loadadjfreq)
{
unsigned int freq = pcpu->policy->cur;//当前频率
unsigned int prevfreq, freqmin, freqmax;
unsigned int tl;//target load
int index;
freqmin = 0;
freqmax = UINT_MAX;
do {
prevfreq = freq;
//计算当前频率对应的workload
tl = freq_to_targetload(pcpu->policy->governor_data, freq);
/*
* Find the lowest frequency where the computed load is less
* than or equal to the target load.
*/
//从freq_table中获取最优频率对应的index,取大于等于loadadjfreq / tl (target freq)的最小值
if (cpufreq_frequency_table_target(
pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
CPUFREQ_RELATION_L, &index))
break;
freq = pcpu->freq_table[index].frequency;
if (freq > prevfreq) {//提高频率
/* The previous frequency is too low. */
freqmin = prevfreq;
if (freq >= freqmax) {
/*
* Find the highest frequency that is less
* than freqmax.
*/
if (cpufreq_frequency_table_target(
pcpu->policy, pcpu->freq_table,
freqmax - 1, CPUFREQ_RELATION_H,
&index))
break;
freq = pcpu->freq_table[index].frequency;
if (freq == freqmin) {
/*
* The first frequency below freqmax
* has already been found to be too
* low. freqmax is the lowest speed
* we found that is fast enough.
*/
freq = freqmax;
break;
}
}
} else if (freq < prevfreq) {
/* The previous frequency is high enough. */
freqmax = prevfreq;
if (freq <= freqmin) {
/*
* Find the lowest frequency that is higher
* than freqmin.
*/
if (cpufreq_frequency_table_target(
pcpu->policy, pcpu->freq_table,
freqmin + 1, CPUFREQ_RELATION_L,
&index))
break;
freq = pcpu->freq_table[index].frequency;
/*
* If freqmax is the first frequency above
* freqmin then we have already found that
* this speed is fast enough.
*/
if (freq == freqmax)
break;
}
}
/* If same frequency chosen as previous then done. */
} while (freq != prevfreq);
return freq;
}
/*
CPU的频率设置为所有CPU的pcpu->target_freq值中最大的那一个
*/
static int cpufreq_interactive_speedchange_task(void *data)
{
unsigned int cpu;
cpumask_t tmp_mask;
unsigned long flags;
struct cpufreq_interactive_cpuinfo *pcpu;
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock_irqsave(&speedchange_cpumask_lock, flags);
if (cpumask_empty(&speedchange_cpumask)) {//没有需要调频的cpu,调度执行其他task
spin_unlock_irqrestore(&speedchange_cpumask_lock,
flags);
schedule();
if (kthread_should_stop())
break;
spin_lock_irqsave(&speedchange_cpumask_lock, flags);
}
set_current_state(TASK_RUNNING);
tmp_mask = speedchange_cpumask;
cpumask_clear(&speedchange_cpumask);//清空cpumask
spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
for_each_cpu(cpu, &tmp_mask) {//遍历所有需要调频cpu
unsigned int j;
unsigned int max_freq = 0;
struct cpufreq_interactive_cpuinfo *pjcpu;
u64 hvt = ~0ULL, fvt = 0;
pcpu = &per_cpu(cpuinfo, cpu);
if (!down_read_trylock(&pcpu->enable_sem))
continue;
if (!pcpu->governor_enabled) {
up_read(&pcpu->enable_sem);
continue;
}
//如果多个cpu公用一个policy,找到公用policy 的cpu 目标频率最大的值
for_each_cpu(j, pcpu->policy->cpus) {
pjcpu = &per_cpu(cpuinfo, j);
fvt = max(fvt, pjcpu->local_fvtime);
if (pjcpu->target_freq > max_freq) {
max_freq = pjcpu->target_freq;
hvt = pjcpu->local_hvtime;
} else if (pjcpu->target_freq == max_freq) {
hvt = min(hvt, pjcpu->local_hvtime);
}
}
for_each_cpu(j, pcpu->policy->cpus) {//写法不够简洁
pjcpu = &per_cpu(cpuinfo, j);
pjcpu->floor_validate_time = fvt;
}
//修改管理policy的cpu clock,共用同一个policy的cpu clock会一起改变
if (max_freq != pcpu->policy->cur) {
//调用驱动调频的接口
__cpufreq_driver_target(pcpu->policy,
max_freq,
CPUFREQ_RELATION_H);
for_each_cpu(j, pcpu->policy->cpus) {//更新cpu的hispeed_validate_time
pjcpu = &per_cpu(cpuinfo, j);
pjcpu->hispeed_validate_time = hvt;
}
}
trace_cpufreq_interactive_setspeed(cpu,
pcpu->target_freq,
pcpu->policy->cur);
up_read(&pcpu->enable_sem);
}
}
return 0;
}