Linux下VFP NEON浮点编译

NEON:SIMD(Single Instruction Multiple Data 单指令多重数据) 指令集, 其针对多媒体和讯号处理程式具备标准化的加速能力。

VFP: (Vector Float Point), 向量浮点运算单元,arm11(s3c6410 支持VFPv2),Cortex-A8(s5pv210)支持VFPv3.

NEON和VFPv3 浮点协处理器共享寄存器组,所以在汇编时,指令是一样的。

编译选项:

-mfpu = name(neon or vfpvx)指定FPU 单元

-mfloat-abi = name(soft、hard、 softfp):指定软件浮点或硬件浮点或兼容软浮点调用接口

如果只指定 -mfpu,那么默认编译不会选择选择硬件浮点指令集

如果只指定 -mfloat-abi = hard或者softfp,那么编译会使用硬件浮点指令集

测试C文件

int main(void)
{
	float f1, f2, f3;
	f1 = 1.2;
	f2 = 1.3;
	f3 = f1 / f2;
	return 0;
}

1、 arm-eabi-gcc  -S hello.c -mfpu=neon

	.arch armv5te
	.fpu softvfp
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.global	__aeabi_fdiv
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	stmfd	sp!, {fp, lr}
	.save {fp, lr}
.LCFI0:
	.setfp fp, sp, #4
	add	fp, sp, #4
.LCFI1:
	.pad #16
	sub	sp, sp, #16
.LCFI2:
	ldr	r3, .L3	@ float
	str	r3, [fp, #-16]	@ float
	ldr	r3, .L3+4	@ float
	str	r3, [fp, #-12]	@ float
	ldr	r0, [fp, #-16]	@ float
	ldr	r1, [fp, #-12]	@ float
	bl	__aeabi_fdiv
	mov	r3, r0
	str	r3, [fp, #-8]	@ float
	mov	r3, #0
	mov	r0, r3
	sub	sp, fp, #4
	ldmfd	sp!, {fp, pc}
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

2、 arm-eabi-gcc  -S hello.c -mfpu=vfp

	.arch armv5te
	.fpu softvfp
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.global	__aeabi_fdiv
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	stmfd	sp!, {fp, lr}
	.save {fp, lr}
.LCFI0:
	.setfp fp, sp, #4
	add	fp, sp, #4
.LCFI1:
	.pad #16
	sub	sp, sp, #16
.LCFI2:
	ldr	r3, .L3	@ float
	str	r3, [fp, #-16]	@ float
	ldr	r3, .L3+4	@ float
	str	r3, [fp, #-12]	@ float
	ldr	r0, [fp, #-16]	@ float
	ldr	r1, [fp, #-12]	@ float
	bl	__aeabi_fdiv
	mov	r3, r0
	str	r3, [fp, #-8]	@ float
	mov	r3, #0
	mov	r0, r3
	sub	sp, fp, #4
	ldmfd	sp!, {fp, pc}
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

可以看到上面两个例子,使用的是 .fpu softvfp


3、 arm-eabi-gcc  -S hello.c -mfpu=neon -mfloat-abi=hard

	.arch armv5te
	.eabi_attribute 27, 3
	.eabi_attribute 28, 1
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

4、 arm-eabi-gcc  -S hello.c -mfpu=neon -mfloat-abi=softfp

	.arch armv5te
	.eabi_attribute 27, 3
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

5、 arm-eabi-gcc  -S hello.c -mfpu=vfpv3 -mfloat-abi=softfp

	.arch armv5te
	.eabi_attribute 27, 3
	.fpu vfpv3
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

6、 arm-eabi-gcc  -S hello.c -mfpu=vfpv3 -mfloat-abi=hard

	.arch armv5te
	.eabi_attribute 27, 3
	.eabi_attribute 28, 1
	.fpu vfpv3
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

从上面可以看到,使用softfp和hard使用的指令集是一样的,都是硬件浮点, neon和vfp的区别,仅仅体现在.fpu vfpv3和.fpu neon.


7、 arm-eabi-gcc  -S hello.c -mfloat-abi=hard

	.arch armv5te
	.eabi_attribute 27, 3
	.eabi_attribute 28, 1
	.fpu vfp
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

当直接使用-mfloat-abi=hard时,会默认使用.fpu vfp硬件浮点。


你可能感兴趣的:(Linux下VFP NEON浮点编译)