Tom Duff曾经写过这样的函数,其中使用了奇怪的switch...case,他自己称之为duff's device。猛一看,似乎不合乎C的语法规则。但实际上能在所有C编译器下编译通过。其效率据说可以大大提高。
Anoop写了一个程序进行测试。转贴如下:
/* The Duff device
*
* An infamous example of how a compiler can accept code that should
* be illegal as per the language definition. To add insult to injury,
* the illegal code actually runs faster.
*
* The functions send and send2 accomplish the same goal (copying a
* string from one location to another) but send2 manages to screw
* with your head and achieve its goal much faster (on most
* architectures).
*
* The answer to the puzzle of how send2 actually works is exposed in
* the function send3 (see the comment above the function send3).
*
* This strange piece of code is named after the programmer who
* discovered this 'optimization' technique.
*
* -- Anoop Sarkar <anoop at cs.sfu.ca>
**/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
/* pick BUFLEN to be a suitably large number to show the speed
difference between send and send2 */
const size_t BUFLEN = 100000000;
void
send (register char *to, register char *from, register int count)
{
do
*to++ = *from++;
while(--count>0);
}
void
send2 (register char *to, register char *from, register int count)
{
register int n = (count+7)/8;
switch (count % 8) {
case 0: do { *to++ = *from++;
case 7: *to++ = *from++;
case 6: *to++ = *from++;
case 5: *to++ = *from++;
case 4: *to++ = *from++;
case 3: *to++ = *from++;
case 2: *to++ = *from++;
case 1: *to++ = *from++;
} while(--n>0);
}
}
/* The answer to the mystery turns out to be simple loop unfolding.
* send2 uses the semantics for switch statements in C to provide a
* mnemonic for how many assignments should occur within the body of
* the do-while loop.
*
* So why is send2 faster than send on some architectures? The
* conditional is a slow instruction to execute on many machine
* architectures.
*
* Try compiling with gcc with and without the -O3 flag. Turning the
* optimizer on (using -O3) shows the power of code optimization: send
* runs as fast as send2 with the optimizer on.
**/
int main (int argc, char **argv)
{
char *from, *to;
int i;
struct timeval before, after;
from = (char *) malloc(BUFLEN * sizeof(char));
to = (char *) malloc(BUFLEN * sizeof(char));
memset(from, 'a', (BUFLEN * sizeof(char)));
printf("array init done/n");
printf("calling send/n");
gettimeofday(&before, NULL);
send(to, from, BUFLEN);
gettimeofday(&after, NULL);
printf("secs=%d/n", after.tv_sec - before.tv_sec);
printf("calling send2/n");
gettimeofday(&before, NULL);
send2(to, from, BUFLEN);
gettimeofday(&after, NULL);
printf("secs=%d/n", after.tv_sec - before.tv_sec);
if (strcmp(from,to) == 0) {
printf("from=to/n");
} else {
printf("from!=to/n");
}
free(from);
free(to);
return(0);
}
运行结果:
[redbull@calabash tmp]$gcc -O3 duff.c
[redbull@calabash tmp]$ ./a.out
array init done
calling send
secs=4
calling send2
secs=1
from=to