prio 动态优先级 0~139 调度器最终使用的优先级值
static const int prio_to_weight[40] = {
/* -20 */ 88761, 71755, 56483, 46273, 36291,
/* -15 */ 29154, 23254, 18705, 14949, 11916,
/* -10 */ 9548, 7620, 6100, 4904, 3906,
/* -5 */ 3121, 2501, 1991, 1586, 1277,
/* 0 */ 1024, 820, 655, 526, 423,
/* 5 */ 335, 272, 215, 172, 137,
/* 10 */ 110, 87, 70, 56, 45,
/* 15 */ 36, 29, 23, 18, 15,
};
static const u32 prio_to_wmult[40] = {
/* -20 */ 48388, 59856, 76040, 92818, 118348,
/* -15 */ 147320, 184698, 229616, 287308, 360437,
/* -10 */ 449829, 563644, 704093, 875809, 1099582,
/* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
/* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
/* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
/* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
prio_to_wmult[i]=2^32/prio_to_weight[i]
nice_0_weight =1024, 每降低一个nice级别,优先级提高一个级别,相应进程多获得10% cpu时间;每升高一个nice级别,优先级降低一个级别,相应进程少获得10% cpu时间。如A和B进程的nice值为0,权重都是1024,各获得50%的cpu时间。如果A进程增加一个nice值,权重变为820,B进程nice不变,权重不变,则A进程获得cpu时间:820/(1024+820)≈44.5%,B进程获得cpu时间:1024/(1024+820)≈55.5%。cpu时间差约为10%
在2.6.23版本引入cfs调度算法,引入虚拟时间:
vruntime = delta_exec * nice_0_weight / weight
OR
vruntime = (delta_exec * (nice_0_weight * inv_weight)) >> 32
nice_0_weight = 1024
weight = prio_to_weight[i]
inv_weight = prio_to_wmult[i]
inv_weight = 2^32/weight
进程优先级越大,进程权重weight值越大,对比经过实际运行时间来说,虚拟时间增长越慢(越小);cfs中的就绪队列以vruntime为键值的红黑树,按虚拟时间从小到大排序,虚拟时间最小的进程最先被调度,相互追赶,以达到虚拟运行时间相同的目的。因而优先级越大的进程,scale到真实时钟的时间更长,也可以理解为按weight比重给进程分配cpu时间。
调度实体某一个历史period周期的负载,每经过32ms,都会衰减一半,后边也使用此方法计算经过32ms整数倍的负载衰减。
[kernel/sched/sched-pelt.h]
static const u32 runnable_avg_yN_inv[] = {
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
0x85aac367, 0x82cd8698,
};
runnable_avg_yN_inv[i]=(2^32-1) * y^i
y^32=1/2
i=0~31
y~=0.9785
Static const u32 runnable_avg_yN_org[] = {
0.999, 0.978, 0.957, 0.937, 0.917, 0.897,
……
0.522, 0.510
}
runnable_avg_yN_org[i]=runnable_avg_yN_inv[i]>>32
static u64 decay_load(u64 val, u64 n)
val表示n个周期前的负载值,n表示第n个周期
t0时刻负载为val,t1时刻,红色历史时间区域衰减负载值是:val*y^n=val* runnable_avg_yN_inv[n]>>32
static const u32 runnable_avg_yN_sum[] = {
0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103, 9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082, 17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
};
runnable_avg_yN_sum[n]=1024*(y^1+y^2+y^3+……+y^n)
1024:1ms
period=1024us
y~=0.9785; y^32=1/2
* d1 d2 d3
* ^ ^ ^
* | | |
* |<->|<----------------->|<--->|
* ... |---x---|------| ... |------|-----x (now)
*
* p-1
* u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
* n=1
* = u y^p + (Step 1)
* p-1
* d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
* n=1
* p-1
* c2 = 1024 \Sum y^n
* n=1
*
* inf inf
* = 1024 ( \Sum y^n - \Sum y^n - y^0 )
* n=0 n=p
u´= (u+delta1)*y^p + 1024* (y^1+y^2+……+y^(p-1)) + delta3*y^0
= u*y^p + delta1*y^p + 1024* (y^1+y^2+……+y^(p-1)) + delta3*y^0
= decay_load(u, p)+ decay_load(delta1, p)+ LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, p)-1024 + delta3
由此可见,进程负载=历史负载衰减+本次更新的负载累加衰减=udecay+contrib
LOAD_AVG_MAX=47742= 1024+1024*y+1024*y^2+……+y^n+…+y^inf (n>345)
LOAD_AVG_MAX=1024+1024* (y^1+y^2+…+y^(p-1)) + 1024* (y^p+…+y^inf)
1024* (y^p+…+y^inf) =1024*y^p(1+y+y^2+…+y^inf)=y^p*LOAD_AVG_MAX=decay_load(LOAD_AVG_MAX, p)
load_avg = load.weight * load_sum/(LOAD_AVG_MAX-1024+period_contrib)
runnable_load_avg = runnable_weight * runnable_load_sum/(LOAD_AVG_MAX-1024+period_contrib)
util_avg=util_sum/(LOAD_AVG_MAX-1024+period_contrib)
divider = LOAD_AVG_MAX - 1024 + sa->period_contrib
t1时刻平均负载的计算:
load_avg={decay(t0) + contrib(t1-t0)*weight}/divider
runnable_load_avg={decay(t0) + contrib(t1-t0)*weight}/divide
util_avg={decay(t0) + contrib(t1-t0)*weight}/divider
t2时刻平均负载的计算:
load_avg={decay(t1) + contrib(t2-t1)*weight}/divider
runnable_load_avg={decay(t1) + contrib(t2-t1)*weight}/divider
util_avg=decay(t0) /divider (contrib=0)
t4时刻平均负载的计算:
load_avg=decay(t3)/divider
runnable_load_avg=decay(t3)/divider
util_avg=decay(t3) /divider
V4.15-rc1 Commit: 1ea6c46a23 对task平均负载的计算发生了变化:
se->avg=