内核中有一个叫loops_per_jiffy的全局变量,它保存了每0.5个TICK时间范围内CPU可以执行空指令的条数。其定义在init/main.c中:
/*
* This should be approx 2 Bo*oMips to start (note initial shift), and will
* still work even if initially too large, it will just take slightly longer
*/
unsigned
long loops_per_jiffy = (1<<12);
在start_kernel函数中,将调用
calibrate_delay函数对此值进行初始化:
/*
* This is the number of bits of precision for the loops_per_jiffy. Each
* bit takes on average 1.5/HZ seconds. This (like the original) is a little
* better than 1%
*/
#define
LPS_PREC 8
void
__devinit calibrate_delay(void)
{
unsigned long ticks, loopbit;
int lps_precision = LPS_PREC;
if (preset_lpj) {
loops_per_jiffy = preset_lpj;
printk("Calibrating delay loop (skipped)... "
"%lu.%02lu BogoMIPS preset/n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
printk("Calibrating delay using timer specific routine.. ");
printk("%lu.%02lu BogoMIPS (lpj=%lu)/n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100,
loops_per_jiffy);
} else {
loops_per_jiffy = (1<<12);
printk(KERN_DEBUG "Calibrating delay loop... ");
while ((loops_per_jiffy <<= 1) != 0) {
/* wait for "start of" clock tick */
ticks = jiffies;
while (ticks == jiffies)
/* nothing */;
/* Go .. */
ticks = jiffies;
__delay(loops_per_jiffy);
ticks = jiffies - ticks;
if (ticks)
break;
}
/*
* Do a binary approximation to get loops_per_jiffy set to
* equal one clock (up to lps_precision bits)
*/
loops_per_jiffy >>= 1;
loopbit = loops_per_jiffy;
while (lps_precision-- && (loopbit >>= 1)) {
loops_per_jiffy |= loopbit;
ticks = jiffies;
while (ticks == jiffies)
/* nothing */;
ticks = jiffies;
__delay(loops_per_jiffy);
if (jiffies != ticks) /* longer than 1 tick */
loops_per_jiffy &= ~loopbit;
}
/* Round the value and print it */
printk("%lu.%02lu BogoMIPS (lpj=%lu)/n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100,
loops_per_jiffy);
}
}
其中,
preset_lpj是一个可以通过引导程序设置的值,默认情况下为0。
calibrate_delay_direct()函数则直接返回0。因而此函数直接进入else部分。
在这段代码中分成了两个部分,第一部分的while循环用于快速确定
loops_per_jiffy在较大范围内的值。第二部分的while循环则采用逐渐逼近的方法得到
loops_per_jiffy的一个比较准确的值。
经过这两轮的计算,可以得到CPU(单核)每0.5个TICK时间范围内可以执行空指令(NOP)的条数。经过计算,当内核运行速度为594M时(27M x 22),此得到每秒钟执行NOP指令的条数为1183.74M,此时
loops_per_jiffy的值为0x242000;当内核运行速度为540M时(27M x 20),此得到每秒钟执行NOP指令的条数为1077.24M,此时
loops_per_jiffy的值为0x20e000。
在上述函数中,__delay的实现位于include/asm/delay.h:
static
/*inline*/ void __delay(unsigned long loops)
{
if (ANOMALY_05000312) {
/* Interrupted loads to loop registers -> bad */
unsigned long tmp;
__asm__ __volatile__(
"[--SP] = LC0;"
"[--SP] = LT0;"
"[--SP] = LB0;"
"LSETUP (__delay_anomaly_1,__delay_anomaly_1) LC0 = %1;"
"__delay_anomaly_1: NOP;"
/* We take advantage of the fact that LC0 is 0 at
* the end of the loop. Otherwise we'd need some
* NOPs after the CLI here.
*/
"CLI %0;"
"LB0 = [SP++];"
"LT0 = [SP++];"
"LC0 = [SP++];"
"STI %0;"
: "=d" (tmp)
: "a" (loops)
);
} else
__asm__ __volatile__ (
"LSETUP(__delay_1, __delay_1) LC0 = %0;"
"__delay_1: NOP;"
:
: "a" (loops)
: "LT0", "LB0", "LC0"
);
}
就是一个简单的循环,不断执行NOP指令。