声明:转载请注明原链接http://my.oschina.net/u/1167407/blog/486311
写了一个汇编函数调用printf用来打印字符串如下
#文件名myprint.s .section .text .global myprint .type myprint,@function myprint:#只接受一个字符串(的地址)作为参数 pushq %rbp movq %rsp,%rbp call printf leave ret
用C语言调用上边的汇编函数:
//文件名helloworld.c int main(){ myprint("Hello World!\n"); return 0; }
用gcc myprint.s helloworld.c -o helloworld编译得到可执行程序
main函数编译出的代码如下:
000000000040054d <main>: 40054d: 55 push %rbp 40054e: 48 89 e5 mov %rsp,%rbp 400551: bf 04 06 40 00 mov $0x400604,%edi 400556: b8 00 00 00 00 mov $0x0,%eax 40055b: e8 07 00 00 00 callq 400567 <myprint> 400560: b8 00 00 00 00 mov $0x0,%eax 400565: 5d pop %rbp 400566: c3 retq
后来觉得还是加上个extern语句,比较合适。于是C源程序变为:
extern int myprint(char *); int main(){ myprint("Hello World!\n"); return 0; }
再次编译,main函数部分如下:
000000000040054d <main>: 40054d: 55 push %rbp 40054e: 48 89 e5 mov %rsp,%rbp 400551: bf f4 05 40 00 mov $0x4005f4,%edi 400556: e8 07 00 00 00 callq 400562 <myprint> 40055b: b8 00 00 00 00 mov $0x0,%eax 400560: 5d pop %rbp 400561: c3 retq
比较两次编译后的main函数,发现没加extern语句时编译出的代码多出了一句mov $0x0,%eax,当时不明就里。
后来有一次遇到了段错误,用GDB调试时发现是在执行printf函数时发生的,不明白原因的我于是就去查看printf的汇编代码。发现printf函数在%al为0时,不会操作%xmm寄存器(下面的代码说明了这一现象,test %al,%al测试%al是否为0,je 0x7ffff7a946b3 <printf+99>这一语句表明,若%al为0则跳转到<printf+99>处执行,刚好将对%xmm寄存器的操作略去)
Dump of assembler code for function printf: 0x00007ffff7a94650 <+0>: sub $0xd8,%rsp 0x00007ffff7a94657 <+7>: test %al,%al 0x00007ffff7a94659 <+9>: mov %rsi,0x28(%rsp) 0x00007ffff7a9465e <+14>: mov %rdx,0x30(%rsp) 0x00007ffff7a94663 <+19>: mov %rcx,0x38(%rsp) 0x00007ffff7a94668 <+24>: mov %r8,0x40(%rsp) 0x00007ffff7a9466d <+29>: mov %r9,0x48(%rsp) 0x00007ffff7a94672 <+34>: je 0x7ffff7a946b3 <printf+99> 0x00007ffff7a94674 <+36>: vmovaps %xmm0,0x50(%rsp) 0x00007ffff7a9467a <+42>: vmovaps %xmm1,0x60(%rsp) 0x00007ffff7a94680 <+48>: vmovaps %xmm2,0x70(%rsp) 0x00007ffff7a94686 <+54>: vmovaps %xmm3,0x80(%rsp) 0x00007ffff7a9468f <+63>: vmovaps %xmm4,0x90(%rsp) 0x00007ffff7a94698 <+72>: vmovaps %xmm5,0xa0(%rsp) 0x00007ffff7a946a1 <+81>: vmovaps %xmm6,0xb0(%rsp) 0x00007ffff7a946aa <+90>: vmovaps %xmm7,0xc0(%rsp) 0x00007ffff7a946b3 <+99>: lea 0x8(%rsp),%rdx 0x00007ffff7a946b8 <+104>: mov %rdi,%rsi 0x00007ffff7a946bb <+107>: movl $0x8,0x8(%rsp) 0x00007ffff7a946c3 <+115>: lea 0xe0(%rsp),%rax 0x00007ffff7a946cb <+123>: movl $0x30,0xc(%rsp) 0x00007ffff7a946d3 <+131>: mov %rax,0x10(%rsp) 0x00007ffff7a946d8 <+136>: lea 0x20(%rsp),%rax 0x00007ffff7a946dd <+141>: mov %rax,0x18(%rsp) 0x00007ffff7a946e2 <+146>: mov 0x341857(%rip),%rax # 0x7ffff7dd5f40 0x00007ffff7a946e9 <+153>: mov (%rax),%rdi 0x00007ffff7a946ec <+156>: callq 0x7ffff7a89af0 <vfprintf> 0x00007ffff7a946f1 <+161>: add $0xd8,%rsp 0x00007ffff7a946f8 <+168>: retq End of assembler dump.
由于%xmm寄存器和浮点操作有关,联想到可能和浮点参数有关。于是就写一个浮点操作的函数测试一下:
double foo(double a,double b){ return a+b; } int main(){ foo(1.0,2.0); return 0; }
编译出的foo和main函数汇编代码如下:
00000000004004fd <foo>: 4004fd: 55 push %rbp 4004fe: 48 89 e5 mov %rsp,%rbp 400501: f2 0f 11 45 f8 movsd %xmm0,-0x8(%rbp) 400506: f2 0f 11 4d f0 movsd %xmm1,-0x10(%rbp) 40050b: f2 0f 10 45 f8 movsd -0x8(%rbp),%xmm0 400510: f2 0f 58 45 f0 addsd -0x10(%rbp),%xmm0 400515: f2 0f 11 45 e8 movsd %xmm0,-0x18(%rbp) 40051a: 48 8b 45 e8 mov -0x18(%rbp),%rax 40051e: 48 89 45 e8 mov %rax,-0x18(%rbp) 400522: f2 0f 10 45 e8 movsd -0x18(%rbp),%xmm0 400527: 5d pop %rbp 400528: c3 retq 0000000000400529 <main>: 400529: 55 push %rbp 40052a: 48 89 e5 mov %rsp,%rbp 40052d: 48 83 ec 08 sub $0x8,%rsp 400531: 48 b8 00 00 00 00 00 movabs $0x4000000000000000,%rax 400538: 00 00 40 40053b: 48 89 45 f8 mov %rax,-0x8(%rbp) 40053f: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1 400544: f2 0f 10 05 9c 00 00 movsd 0x9c(%rip),%xmm0 # 4005e8 <_IO_stdin_used+0x8> 40054b: 00 40054c: e8 ac ff ff ff callq 4004fd <foo> 400551: b8 00 00 00 00 mov $0x0,%eax 400556: c9 leaveq 400557: c3 retq 400558: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) 40055f: 00
似乎在callq之前也没有给%eax赋值啊。再一想printf的特点是接受变长参数列表,于是将foo函数也改成接受变长参数列表的如下:
00000000004004fd <foo>: 4004fd: 55 push %rbp 4004fe: 48 89 e5 mov %rsp,%rbp 400501: 48 83 ec 50 sub $0x50,%rsp 400505: f2 0f 11 85 48 ff ff movsd %xmm0,-0xb8(%rbp) 40050c: ff 40050d: 48 89 bd 50 ff ff ff mov %rdi,-0xb0(%rbp) 400514: 48 89 b5 58 ff ff ff mov %rsi,-0xa8(%rbp) 40051b: 48 89 95 60 ff ff ff mov %rdx,-0xa0(%rbp) 400522: 48 89 8d 68 ff ff ff mov %rcx,-0x98(%rbp) 400529: 4c 89 85 70 ff ff ff mov %r8,-0x90(%rbp) 400530: 4c 89 8d 78 ff ff ff mov %r9,-0x88(%rbp) 400537: 84 c0 test %al,%al 400539: 74 18 je 400553 <foo+0x56> 40053b: 0f 29 55 a0 movaps %xmm2,-0x60(%rbp) 40053f: 0f 29 5d b0 movaps %xmm3,-0x50(%rbp) 400543: 0f 29 65 c0 movaps %xmm4,-0x40(%rbp) 400547: 0f 29 6d d0 movaps %xmm5,-0x30(%rbp) 40054b: 0f 29 75 e0 movaps %xmm6,-0x20(%rbp) 40054f: 0f 29 7d f0 movaps %xmm7,-0x10(%rbp) 400553: f2 0f 11 8d 40 ff ff movsd %xmm1,-0xc0(%rbp) 40055a: ff 40055b: f2 0f 10 85 48 ff ff movsd -0xb8(%rbp),%xmm0 400562: ff 400563: f2 0f 58 85 40 ff ff addsd -0xc0(%rbp),%xmm0 40056a: ff 40056b: f2 0f 11 85 38 ff ff movsd %xmm0,-0xc8(%rbp) 400572: ff 400573: 48 8b 85 38 ff ff ff mov -0xc8(%rbp),%rax 40057a: 48 89 85 38 ff ff ff mov %rax,-0xc8(%rbp) 400581: f2 0f 10 85 38 ff ff movsd -0xc8(%rbp),%xmm0 400588: ff 400589: c9 leaveq 40058a: c3 retq 000000000040058b <main>: 40058b: 55 push %rbp 40058c: 48 89 e5 mov %rsp,%rbp 40058f: 48 83 ec 10 sub $0x10,%rsp 400593: 48 b8 00 00 00 00 00 movabs $0x4000000000000000,%rax 40059a: 00 00 40 40059d: 48 89 45 f8 mov %rax,-0x8(%rbp) 4005a1: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1 4005a6: f2 0f 10 05 9a 00 00 movsd 0x9a(%rip),%xmm0 # 400648 <_IO_stdin_used+0x8> 4005ad: 00 4005ae: b8 02 00 00 00 mov $0x2,%eax 4005b3: e8 45 ff ff ff callq 4004fd <foo> 4005b8: b8 00 00 00 00 mov $0x0,%eax 4005bd: c9 leaveq 4005be: c3 retq 4005bf: 90 nop
这下在callq 4004fd <foo>之前有了对%eax赋值的mov $0x2,%eax语句,而且似乎这个%eax的值还和浮点数个数有关。再试一下改变传给foo的参数列表
double foo(double a,double b,...){ return a+b; } int main(){ foo(1.0,2.0,3.0,4.0,5.0,6.0,7.0,10); return 0; }
这下main函数部分对应的代码变成了下面的形式
000000000040058b <main>: 40058b: 55 push %rbp 40058c: 48 89 e5 mov %rsp,%rbp 40058f: 48 83 ec 10 sub $0x10,%rsp 400593: 49 b9 00 00 00 00 00 movabs $0x401c000000000000,%r9 40059a: 00 1c 40 40059d: 49 b8 00 00 00 00 00 movabs $0x4018000000000000,%r8 4005a4: 00 18 40 4005a7: 48 be 00 00 00 00 00 movabs $0x4014000000000000,%rsi 4005ae: 00 14 40 4005b1: 48 b9 00 00 00 00 00 movabs $0x4010000000000000,%rcx 4005b8: 00 10 40 4005bb: 48 ba 00 00 00 00 00 movabs $0x4008000000000000,%rdx 4005c2: 00 08 40 4005c5: 48 b8 00 00 00 00 00 movabs $0x4000000000000000,%rax 4005cc: 00 00 40 4005cf: bf 0a 00 00 00 mov $0xa,%edi 4005d4: 4c 89 4d f8 mov %r9,-0x8(%rbp) 4005d8: f2 0f 10 75 f8 movsd -0x8(%rbp),%xmm6 4005dd: 4c 89 45 f8 mov %r8,-0x8(%rbp) 4005e1: f2 0f 10 6d f8 movsd -0x8(%rbp),%xmm5 4005e6: 48 89 75 f8 mov %rsi,-0x8(%rbp) 4005ea: f2 0f 10 65 f8 movsd -0x8(%rbp),%xmm4 4005ef: 48 89 4d f8 mov %rcx,-0x8(%rbp) 4005f3: f2 0f 10 5d f8 movsd -0x8(%rbp),%xmm3 4005f8: 48 89 55 f8 mov %rdx,-0x8(%rbp) 4005fc: f2 0f 10 55 f8 movsd -0x8(%rbp),%xmm2 400601: 48 89 45 f8 mov %rax,-0x8(%rbp) 400605: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1 40060a: f2 0f 10 05 a6 00 00 movsd 0xa6(%rip),%xmm0 # 4006b8 <_IO_stdin_used+0x8> 400611: 00 400612: b8 07 00 00 00 mov $0x7,%eax 400617: e8 e1 fe ff ff callq 4004fd <foo> 40061c: b8 00 00 00 00 mov $0x0,%eax 400621: c9 leaveq 400622: c3 retq 400623: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 40062a: 00 00 00 40062d: 0f 1f 00 nopl (%rax)
可以看到向foo函数传递了7个浮点数和一个整数,在callq 4004fd <foo>前变成了mov $0x7,%eax,刚好是参数列表中浮点数的个数。我们还可以试验浮点参数多于8个的情况,以及foo的默认参数没有浮点数时编译出的foo函数的代码等
由此我们得到了以下的猜想和结论
在X86_64平台下有
猜想:对于没有用extern声明的外部函数GCC都将其看做可变参数列表的函数
结论1:浮点数是用%xmm0~%xmm7总共8寄存器传递的,多过8个的那部分浮点参数则压到栈上
结论2:对于可变参数列表的函数,GCC使用%eax来表示参数列表中是否有浮点数,而且很有可能是用于浮点数的个数的计数(由于用于传递参数的%xmm寄存器一共只有8个,故参数列表中多过8个浮点数则%eax直接赋8)