虽然arm cpu是32的,但是 sdram为16为总线,所以猜测 16 位的 thumb指令可以带来一些优化。
修改了原来的测试代码,避免 O2 把所有代码优化掉(这可是O2的特长)
测试结果表明,对于我们的浮点运算,这两个选项作用远没有 –ffast-math明显。
测试结果也表明, 对我们这个测试程序,–ffast-math 对精度没有影响。
分别使用marvell(gcc 4.1.1 ) 以及 maemo 编译器(codesourcery 2005q3-2,gcc3.4.4)并使用不同的编译选项进行测试对比。 程序运行在 PXA310 624M CPU 下。
#include <stdlib.h> /* exit */
#include <sys/time.h> /* struct timeval and friends */
#include <time.h> /* gettimeofday */
#define MAX_DIVIDEND 1000000.231
#define MIN_DIVIDEND 0.29
#define STEP_DIVIDEND 0.33
#define DIVISOR 23.0
#define BUFFER_SIZE 200
/**
* Utility to return a pointer to a statically allocated buffer that
* holds the text representation of seconds since this program was
* started. Not safe to use in threaded programs!
*/
static void timestamp(const char* buffer) {
/* Holds the starting timestamp. 0 means that it has not been
initialized. */
static int startSecond = 0;
static int startMs = 0;
/* Temp storage for the secs + microseconds time. */
struct timeval tv;
/* Temp storage for the difference between start and now. */
int deltaSecond, deltaMs;
/* Get current time and convert into microseconds flat. */
gettimeofday(&tv, NULL);
/* Running for the first time? */
if (startSecond == 0) {
/* Copy to prev so that we get 0 delta. */
startSecond = tv.tv_sec;
startMs = tv.tv_usec;
}
/* Calculate the delta (in microseconds). */
deltaSecond = tv.tv_sec - startSecond;
deltaMs = tv.tv_usec - startMs;
/* Create the string giving offset from start in seconds. */
snprintf(buffer, BUFFER_SIZE, "%u.%u",deltaSecond,deltaMs);
}
int main(int argc, char * argv[])
{
double divident, result;
double result1;
char buffer[BUFFER_SIZE];
char buffer1[BUFFER_SIZE];
char buffer2[BUFFER_SIZE];
char buffer3[BUFFER_SIZE];
char buffer4[BUFFER_SIZE];
timestamp(buffer);
for(divident=MIN_DIVIDEND; divident<MAX_DIVIDEND; divident+=STEP_DIVIDEND) {
result = divident/DIVISOR;
result1 += result;
}
timestamp(buffer1);
for(divident=MIN_DIVIDEND; divident<MAX_DIVIDEND; divident+=STEP_DIVIDEND) {
result = divident*DIVISOR;
result1 += result;
}
timestamp(buffer2);
for(divident=MIN_DIVIDEND; divident<MAX_DIVIDEND; divident+=STEP_DIVIDEND) {
result = divident+DIVISOR;
result1 += result;
}
timestamp(buffer3);
for(divident=MIN_DIVIDEND; divident<MAX_DIVIDEND; divident+=STEP_DIVIDEND) {
result = divident-DIVISOR;
result1 += result;
}
timestamp(buffer4);
printf("Start time is: %s/n",buffer);
printf("DIV End time is: %s/n",buffer1);
printf("MUL End time is: %s/n",buffer2);
printf("ADD End time is: %s/n",buffer3);
printf("SUB End time is: %s/n",buffer4);
printf("result1:%f/n", result1);
return 0;
}
arm-iwmmxt-linux-gnueabi-gcc float.c -o float1
arm-iwmmxt-linux-gnueabi-objdump -D float1 > float1d
arm-iwmmxt-linux-gnueabi-strip float1
arm-iwmmxt-linux-gnueabi-gcc -O2 float.c -o float2
arm-iwmmxt-linux-gnueabi-objdump -D float2 > float2d
arm-iwmmxt-linux-gnueabi-strip float2
arm-iwmmxt-linux-gnueabi-gcc -ffast-math float.c -o float3
arm-iwmmxt-linux-gnueabi-objdump -D float3 > float3d
arm-iwmmxt-linux-gnueabi-strip float3
arm-iwmmxt-linux-gnueabi-gcc -ffast-math -O2 float.c -o float4
arm-iwmmxt-linux-gnueabi-objdump -D float4 > float4d
arm-iwmmxt-linux-gnueabi-strip float4
arm-iwmmxt-linux-gnueabi-gcc -mthumb -ffast-math float.c -o float5
arm-iwmmxt-linux-gnueabi-objdump -D float5 > float5d
arm-iwmmxt-linux-gnueabi-strip float5
arm-iwmmxt-linux-gnueabi-gcc -mthumb -ffast-math -O2 float.c -o float6
arm-iwmmxt-linux-gnueabi-objdump -D float6 > float6d
arm-iwmmxt-linux-gnueabi-strip float6
/mnt/share # ./float1;./float2;./float3;./float4;./float5;./float6
Start time is: 0.0
DIV End time is: 4.4294917314
MUL End time is: 5.360439
ADD End time is: 7.4294759123
SUB End time is: 8.257485
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 4.4294863363
MUL End time is: 5.353536
ADD End time is: 7.4294732719
SUB End time is: 8.177472
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294389481
MUL End time is: 3.4294799614
ADD End time is: 5.4294230977
SUB End time is: 6.4294696195
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294326050
MUL End time is: 3.4294682183
ADD End time is: 4.131570
SUB End time is: 6.4294510822
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294628850
MUL End time is: 3.315699
ADD End time is: 5.4294958414
SUB End time is: 7.4294668472
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 1.451045
MUL End time is: 2.895364
ADD End time is: 4.361311
SUB End time is: 5.861070
result1:37944672758190.882812
/mnt/share # ./float1;./float2;./float3;./float4;./float5;./float6
Start time is: 0.0
DIV End time is: 4.4294916422
MUL End time is: 5.359094
ADD End time is: 6.790939
SUB End time is: 8.256269
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 4.4294863825
MUL End time is: 5.353796
ADD End time is: 7.4294733205
SUB End time is: 8.178055
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294389534
MUL End time is: 3.4294799570
ADD End time is: 4.263373
SUB End time is: 6.4294696403
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 1.358483
MUL End time is: 3.4294682785
ADD End time is: 4.132290
SUB End time is: 6.4294511573
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 1.661561
MUL End time is: 3.315752
ADD End time is: 5.4294958877
SUB End time is: 6.700518
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294418996
MUL End time is: 3.4294863151
ADD End time is: 5.4294328820
SUB End time is: 6.4294828801
result1:37944672758190.882812
[sbox-CHINOOK_ARMEL: ~] > cat 1.sh
gcc -mfloat-abi=soft -march=iwmmxt float.c -o cs_float1
objdump -D cs_float1 > cs_float1d
strip cs_float1
gcc -mfloat-abi=soft -march=iwmmxt -O2 float.c -o cs_float2
objdump -D cs_float2 > cs_float2d
strip cs_float2
gcc -mfloat-abi=soft -march=iwmmxt -ffast-math float.c -o cs_float3
objdump -D cs_float3 > cs_float3d
strip cs_float3
gcc -mfloat-abi=soft -march=iwmmxt -ffast-math -O2 float.c -o cs_float4
objdump -D cs_float4 > cs_float4d
strip cs_float4
gcc -mfloat-abi=soft -march=iwmmxt -mthumb -ffast-math float.c -o cs_float5
objdump -D cs_float5 > cs_float5d
strip cs_float5
gcc -mfloat-abi=soft -march=iwmmxt -mthumb -ffast-math -O2 float.c -o cs_float6
objdump -D cs_float6 > cs_float6d
strip cs_float6
/mnt/share # ./cs_float1;./cs_float2;./cs_float3;./cs_float4;./cs_float5;./cs_float6
Start time is: 0.0
DIV End time is: 5.4294362943
MUL End time is: 7.4294184865
ADD End time is: 9.4294103937
SUB End time is: 11.4294057006
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 4.371613
MUL End time is: 6.208795
ADD End time is: 8.142781
SUB End time is: 10.145741
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294797294
MUL End time is: 4.4294618808
ADD End time is: 5.610039
SUB End time is: 7.564100
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 1.843834
MUL End time is: 3.641867
ADD End time is: 5.536020
SUB End time is: 7.503841
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294952400
MUL End time is: 4.4294930974
ADD End time is: 6.38597
SUB End time is: 8.148249
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294830987
MUL End time is: 4.4294686170
ADD End time is: 6.4294639970
SUB End time is: 8.4294627964
result1:37944672758190.882812
/mnt/share # ./cs_float1;./cs_float2;./cs_float3;./cs_float4;./cs_float5;./cs_float6
Start time is: 0.0
DIV End time is: 4.396515
MUL End time is: 6.217185
ADD End time is: 8.137225
SUB End time is: 10.91183
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 4.371910
MUL End time is: 6.208795
ADD End time is: 8.143338
SUB End time is: 10.146496
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294796584
MUL End time is: 4.4294619369
ADD End time is: 6.4294578720
SUB End time is: 8.4294533344
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294811396
MUL End time is: 4.4294608746
ADD End time is: 6.4294504581
SUB End time is: 7.505244
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294953577
MUL End time is: 4.4294932613
ADD End time is: 6.41299
SUB End time is: 8.152282
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294830429
MUL End time is: 3.719345
ADD End time is: 5.673306
SUB End time is: 7.661928
result1:37944672758190.882812
Nokia-N810-42-19:/mnt/share# ./cs_float1;./cs_float2;./cs_float3;./cs_float4;./cs_float5;./cs_float6
Start time is: 0.0
DIV End time is: 4.376203
MUL End time is: 6.189725
ADD End time is: 8.100677
SUB End time is: 10.45632
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 4.350762
MUL End time is: 6.178718
ADD End time is: 8.104297
SUB End time is: 10.97945
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294787951
MUL End time is: 3.634264
ADD End time is: 5.584049
SUB End time is: 7.528977
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294802638
MUL End time is: 4.4294591854
ADD End time is: 6.4294478357
SUB End time is: 8.4294437883
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294943224
MUL End time is: 4.4294912370
ADD End time is: 6.11563
SUB End time is: 8.111958
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294821997
MUL End time is: 4.4294669333
ADD End time is: 6.4294614187
SUB End time is: 8.4294593228
result1:37944672758190.882812
Nokia-N810-42-19:/mnt/share# ./cs_float1;./cs_float2;./cs_float3;./cs_float4;./cs_float5;./cs_float6
Start time is: 0.0
DIV End time is: 4.377758
MUL End time is: 6.191395
ADD End time is: 8.102222
SUB End time is: 10.47143
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 4.351143
MUL End time is: 6.179261
ADD End time is: 8.104836
SUB End time is: 10.98367
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294787966
MUL End time is: 3.634115
ADD End time is: 5.584012
SUB End time is: 7.528855
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294802441
MUL End time is: 4.4294591819
ADD End time is: 6.4294478172
SUB End time is: 8.4294438132
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294943712
MUL End time is: 4.4294912686
ADD End time is: 6.11965
SUB End time is: 8.112546
result1:37944672758190.882812
Start time is: 0.0
DIV End time is: 2.4294821782
MUL End time is: 4.4294669237
ADD End time is: 5.646800
SUB End time is: 7.626067
result1:37944672758190.882812