#include
#include
#define INIT_TIMER_VALIABLE \
struct timeval tpstart,tpend; \
float timeuse;
#define START_TIMER gettimeofday(&tpstart,NULL);
#define END_PRINTF_TIMER(name) \
gettimeofday(&tpend,NULL); \
timeuse=(tpend.tv_sec*1000*1000+tpend.tv_usec)-(tpstart.tv_sec*1000*1000+tpstart.tv_usec); \
printf("func :%s:time use(us) %f\n",name,timeuse);
float vfp_operate(float f1, float f2)
{
float sum=0;
__asm__ __volatile__(
"vmov s1, %1\n"
"vmov s2, %2\n"
"fmuls s0, s1, s2\n"
"vmov %0, s0\n"
:"=r"(sum)
:"r"(f1),"r"(f2)
);
return sum;
}
int main()
{
float f1,f2;
float result;
INIT_TIMER_VALIABLE
printf("input float data1:");
scanf("%f",&f1);
printf("input float data2:");
scanf("%f",&f2);
START_TIMER
result=f1*f2;
END_PRINTF_TIMER("use system function")
printf("result is %f\n",result);
START_TIMER
result=vfp_operate(f1,f2);
END_PRINTF_TIMER("use vfp_operate")
printf("result is %f\n",result);
}
/*
测试结果:
./vfp_helloworld
input float data1:0.125
input float data2:1.684
func :use system function:time use(us) 12.000000
result is 0.210500
func :use system function:time use(us) 5.000000
result is 0.210500
*/
测试使用编译选项 :
源代码:
#include
int main()
{
float f1,f2;
printf("input float data1:");
scanf("%f",&f1);
printf("input float data2:");
scanf("%f",&f2);
printf("float %f x %f =%f\n", f1,f2,f1*f2);
}
编译选项:(只是多增加了-mfloat-abi=softfp 测试发现=sofltfp和=hard编译出的汇编一样)
arm-none-linux-gnueabi-gcc vfp_helloworld.c -S arm-none-linux-gnueabi-gcc vfp_helloworld.c -S -o vfp_helloworld.asm
-mfloat-abi=softfp -o vfp_helloworld.asm
.cpu arm10tdmi .cpu arm10tdmi
.eabi_attribute 27, 3 .fpu softvfp
.fpu vfp .eabi_attribute 20, 1
.eabi_attribute 20, 1 .eabi_attribute 21, 1
.eabi_attribute 21, 1 .eabi_attribute 23, 3
.eabi_attribute 23, 3 .eabi_attribute 24, 1
.eabi_attribute 24, 1 .eabi_attribute 25, 1
.eabi_attribute 25, 1 .eabi_attribute 26, 2
.eabi_attribute 26, 2 .eabi_attribute 30, 6
.eabi_attribute 30, 6 .eabi_attribute 18, 4
.eabi_attribute 18, 4 .file "vfp_helloworld.c"
.file "vfp_helloworld.c" .section .rodata
.section .rodata .align 2
.align 2 .LC0:
.LC0: .ascii "input float data1:\000"
.ascii "input float data1:\000" .align 2
.align 2 .LC1:
.LC1: .ascii "%f\000"
.ascii "%f\000" .align 2
.align 2 .LC2:
.LC2: .ascii "input float data2:\000"
.ascii "input float data2:\000" .global __aeabi_f2d
.align 2 .global __aeabi_fmul <<<------这里说明软件浮点运算。
.LC3: .align 2
.ascii "float %f x %f =%f\012\000" .LC3:
.text .ascii "float %f x %f =%f\012\000"
.align 2 .text
.global main .align 2
.type main, %function .global main
main: .type main, %function
.fnstart main:
.LFB2: .fnstart
@ args = 0, pretend = 0, frame = 16 .LFB2:
@ frame_needed = 1, uses_anonymous_args = 0 @ args = 0, pretend = 0, frame = 16
stmfd sp!, {fp, lr} @ frame_needed = 1, uses_anonymous_args = 0
.save {fp, lr} stmfd sp!, {r4, r5, r6, r7, r8, fp, lr}
.LCFI0: .save {r4, r5, r6, r7, r8, fp, lr}
.setfp fp, sp, #4 .LCFI0:
add fp, sp, #4 .setfp fp, sp, #24
.LCFI1: add fp, sp, #24
.pad #32 .LCFI1:
sub sp, sp, #32 .pad #36
.LCFI2: sub sp, sp, #36
ldr r0, .L3 .LCFI2:
bl printf ldr r0, .L3
sub r3, fp, #8 bl printf
ldr r0, .L3+4 sub r3, fp, #32
mov r1, r3 ldr r0, .L3+4
bl scanf mov r1, r3
ldr r0, .L3+8 bl scanf
bl printf ldr r0, .L3+8
sub r3, fp, #12 bl printf
ldr r0, .L3+4 sub r3, fp, #36
mov r1, r3 ldr r0, .L3+4
bl scanf mov r1, r3
flds s15, [fp, #-8] bl scanf
fcvtds d5, s15 ldr r3, [fp, #-32] @ float
flds s15, [fp, #-12] mov r0, r3
fcvtds d6, s15 bl __aeabi_f2d
flds s14, [fp, #-8] mov r5, r0
flds s15, [fp, #-12] mov r6, r1
fmuls s15, s14, s15 <<<-----直接使用硬件浮点指令 ldr r3, [fp, #-36] @ float
fcvtds d7, s15 mov r0, r3
fstd d6, [sp, #0] bl __aeabi_f2d
fstd d7, [sp, #8] mov r7, r0
ldr r0, .L3+12 mov r8, r1
fmrrd r2, r3, d5 ldr r3, [fp, #-32] @ float
bl printf ldr r2, [fp, #-36] @ float
sub sp, fp, #4 mov r0, r3
ldmfd sp!, {fp, pc} mov r1, r2
.L4: bl __aeabi_fmul <<<------这里调用软件浮点运算。
.align 2 mov r3, r0
.L3: mov r0, r3
.word .LC0 bl __aeabi_f2d
.word .LC1 mov r3, r0
.word .LC2 mov r4, r1
.word .LC3 stmia sp, {r7-r8}
.LFE2: str r3, [sp, #8]
.fnend str r4, [sp, #12]
.size main, .-main ldr r0, .L3+12
.ident "GCC: (Sourcery G++ Lite 2009q1-203) 4.3.3" mov r2, r5
.section .note.GNU-stack,"",%progbits mov r3, r6
bl printf
sub sp, fp, #24
ldmfd sp!, {r4, r5, r6, r7, r8, fp, pc}
.L4:
.align 2
.L3:
.word .LC0
.word .LC1
.word .LC2
.word .LC3
.LFE2:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q1-203) 4.3.3"
.section .note.GNU-stack,"",%progbits
摘自:RM的pdf文档 的说明
浮点运算的支持
ARM 处理器内核不包含浮点硬件。 必须使用以下两种方法之一,另行提供对浮点算法的支持:缺省情况下,如果有 VFP 协处理器,则会生成 VFP 指令。 如果没有 VFP 协处理器,则编译器会生成调用软件浮点库 fplib 的代码,用于执行浮点运算。fplib 是 C 库 RealView Development Suite 标准分发的组成部分。