Linux C++ 调试神技--如何将Linux C++ 可执行文件逆向工程到Intel格式汇编
对于许多在windows 上调试代码的人而言, Intel IA32格式的汇编代码可能并不陌生,因为种种的原因,很多软件工程师不得不去尝试理解汇编代码。Windows PE的反汇编格式默认是Intel格式的,但是由于历史的原因,在这个世界上还存在另外一种汇编,ATT格式,这也是Linux C++ 可执行代码的默认反汇编格式。
难道还真需要哥们学习两种格式的汇编么?一种学会了已经很NB了?
难道哥们在Windows上学的汇编到Linux上就白费了么?玩不转了么?
底层的处理器都是一个架构的,机器码都是一样的,这两种汇编代码一定可以互相转换,否则工具设计者智商一定低到写不出来工具。
对于这个问题且听兄弟以一个例子详细到道来。假设有如下的C++代码,我们将其在Linux上编译为一个名字为hellod的执行文件。
1 #include<iostream> 2 int main() 3 { 4 std::cout << "Enter two numbers:" << std::endl; 5 int v1 = 0, v2 = 0; 6 std::cin >> v1 >> v2; 7 std::cout << "The sum of " << v1 << " and " << v2 8 << " is " << v1 + v2 << std::endl; 9 return 0; 10 }
如果想看看现在的默认反汇编格式是什么,可以使用如下命令,可以看到Linux默认的是ATT格式的
(gdb) show disassembly-flavor The disassembly flavor is "att".
反汇编结果如下:
(gdb) disassemble main Dump of assembler code for function main(): 0x000000000040092d <+0>: push %rbp 0x000000000040092e <+1>: mov %rsp,%rbp 0x0000000000400931 <+4>: push %r13 0x0000000000400933 <+6>: push %r12 0x0000000000400935 <+8>: push %rbx 0x0000000000400936 <+9>: sub $0x18,%rsp 0x000000000040093a <+13>: mov $0x400ad4,%esi 0x000000000040093f <+18>: mov $0x6011a0,%edi 0x0000000000400944 <+23>: callq 0x400800 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x0000000000400949 <+28>: mov $0x400830,%esi 0x000000000040094e <+33>: mov %rax,%rdi 0x0000000000400951 <+36>: callq 0x400820 <_ZNSolsEPFRSoS_E@plt> 0x0000000000400956 <+41>: movl $0x0,-0x28(%rbp) 0x000000000040095d <+48>: movl $0x0,-0x24(%rbp) 0x0000000000400964 <+55>: lea -0x28(%rbp),%rax 0x0000000000400968 <+59>: mov %rax,%rsi 0x000000000040096b <+62>: mov $0x601080,%edi 0x0000000000400970 <+67>: callq 0x400810 <_ZNSirsERi@plt> 0x0000000000400975 <+72>: lea -0x24(%rbp),%rdx 0x0000000000400979 <+76>: mov %rdx,%rsi 0x000000000040097c <+79>: mov %rax,%rdi ---Type <return> to continue, or q <return> to quit--- 0x000000000040097f <+82>: callq 0x400810 <_ZNSirsERi@plt> 0x0000000000400984 <+87>: mov -0x28(%rbp),%edx 0x0000000000400987 <+90>: mov -0x24(%rbp),%eax 0x000000000040098a <+93>: lea (%rdx,%rax,1),%r13d 0x000000000040098e <+97>: mov -0x24(%rbp),%ebx 0x0000000000400991 <+100>: mov -0x28(%rbp),%r12d 0x0000000000400995 <+104>: mov $0x400ae7,%esi 0x000000000040099a <+109>: mov $0x6011a0,%edi 0x000000000040099f <+114>: callq 0x400800 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x00000000004009a4 <+119>: mov %r12d,%esi 0x00000000004009a7 <+122>: mov %rax,%rdi 0x00000000004009aa <+125>: callq 0x4007a0 <_ZNSolsEi@plt> 0x00000000004009af <+130>: mov $0x400af3,%esi 0x00000000004009b4 <+135>: mov %rax,%rdi 0x00000000004009b7 <+138>: callq 0x400800 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x00000000004009bc <+143>: mov %ebx,%esi 0x00000000004009be <+145>: mov %rax,%rdi 0x00000000004009c1 <+148>: callq 0x4007a0 <_ZNSolsEi@plt> 0x00000000004009c6 <+153>: mov $0x400af9,%esi 0x00000000004009cb <+158>: mov %rax,%rdi 0x00000000004009ce <+161>: callq 0x400800 <_ZStlsISt11char_traitsIcEERSt13---Type <return> to continue, or q <return> to quit--- basic_ostreamIcT_ES5_PKc@plt> 0x00000000004009d3 <+166>: mov %r13d,%esi 0x00000000004009d6 <+169>: mov %rax,%rdi 0x00000000004009d9 <+172>: callq 0x4007a0 <_ZNSolsEi@plt> 0x00000000004009de <+177>: mov $0x400830,%esi 0x00000000004009e3 <+182>: mov %rax,%rdi 0x00000000004009e6 <+185>: callq 0x400820 <_ZNSolsEPFRSoS_E@plt> 0x00000000004009eb <+190>: mov $0x0,%eax 0x00000000004009f0 <+195>: add $0x18,%rsp 0x00000000004009f4 <+199>: pop %rbx 0x00000000004009f5 <+200>: pop %r12 0x00000000004009f7 <+202>: pop %r13 0x00000000004009f9 <+204>: pop %rbp 0x00000000004009fa <+205>: retq End of assembler dump.
那如果我看不懂,我想使用Intel格式的汇编怎么办呢?下面的命令就可以做到,牛X吧?哈哈
(gdb) set disassembly-flavor intel (gdb) show disassembly-flavor The disassembly flavor is "intel".
再来看看这下我们反出来的汇编代码, 已经变成Intel 格式的了
(gdb) disassemble main Dump of assembler code for function main(): 0x000000000040092d <+0>: push rbp 0x000000000040092e <+1>: mov rbp,rsp 0x0000000000400931 <+4>: push r13 0x0000000000400933 <+6>: push r12 0x0000000000400935 <+8>: push rbx 0x0000000000400936 <+9>: sub rsp,0x18 0x000000000040093a <+13>: mov esi,0x400ad4 0x000000000040093f <+18>: mov edi,0x6011a0 0x0000000000400944 <+23>: call 0x400800 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x0000000000400949 <+28>: mov esi,0x400830 0x000000000040094e <+33>: mov rdi,rax 0x0000000000400951 <+36>: call 0x400820 <_ZNSolsEPFRSoS_E@plt> 0x0000000000400956 <+41>: mov DWORD PTR [rbp-0x28],0x0 0x000000000040095d <+48>: mov DWORD PTR [rbp-0x24],0x0 0x0000000000400964 <+55>: lea rax,[rbp-0x28] 0x0000000000400968 <+59>: mov rsi,rax 0x000000000040096b <+62>: mov edi,0x601080 0x0000000000400970 <+67>: call 0x400810 <_ZNSirsERi@plt> 0x0000000000400975 <+72>: lea rdx,[rbp-0x24] 0x0000000000400979 <+76>: mov rsi,rdx 0x000000000040097c <+79>: mov rdi,rax ---Type <return> to continue, or q <return> to quit--- 0x000000000040097f <+82>: call 0x400810 <_ZNSirsERi@plt> 0x0000000000400984 <+87>: mov edx,DWORD PTR [rbp-0x28] 0x0000000000400987 <+90>: mov eax,DWORD PTR [rbp-0x24] 0x000000000040098a <+93>: lea r13d,[rdx+rax*1] 0x000000000040098e <+97>: mov ebx,DWORD PTR [rbp-0x24] 0x0000000000400991 <+100>: mov r12d,DWORD PTR [rbp-0x28] 0x0000000000400995 <+104>: mov esi,0x400ae7 0x000000000040099a <+109>: mov edi,0x6011a0 0x000000000040099f <+114>: call 0x400800 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x00000000004009a4 <+119>: mov esi,r12d 0x00000000004009a7 <+122>: mov rdi,rax 0x00000000004009aa <+125>: call 0x4007a0 <_ZNSolsEi@plt> 0x00000000004009af <+130>: mov esi,0x400af3 0x00000000004009b4 <+135>: mov rdi,rax 0x00000000004009b7 <+138>: call 0x400800 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x00000000004009bc <+143>: mov esi,ebx 0x00000000004009be <+145>: mov rdi,rax 0x00000000004009c1 <+148>: call 0x4007a0 <_ZNSolsEi@plt> 0x00000000004009c6 <+153>: mov esi,0x400af9 0x00000000004009cb <+158>: mov rdi,rax 0x00000000004009ce <+161>: call 0x400800 <_ZStlsISt11char_traitsIcEERSt13---Type <return> to continue, or q <return> to quit--- basic_ostreamIcT_ES5_PKc@plt> 0x00000000004009d3 <+166>: mov esi,r13d 0x00000000004009d6 <+169>: mov rdi,rax 0x00000000004009d9 <+172>: call 0x4007a0 <_ZNSolsEi@plt> 0x00000000004009de <+177>: mov esi,0x400830 0x00000000004009e3 <+182>: mov rdi,rax 0x00000000004009e6 <+185>: call 0x400820 <_ZNSolsEPFRSoS_E@plt> 0x00000000004009eb <+190>: mov eax,0x0 0x00000000004009f0 <+195>: add rsp,0x18 0x00000000004009f4 <+199>: pop rbx 0x00000000004009f5 <+200>: pop r12 0x00000000004009f7 <+202>: pop r13 0x00000000004009f9 <+204>: pop rbp 0x00000000004009fa <+205>: ret End of assembler dump.
总结
对于很多计算机工程领域的技术问题,理解原理是最重要的,软件工程师很忙,忙着学东西,但是有的东西你一旦知道了道理,靠分析已经能节省很多的时间,根本不用去再学一遍,就拿本例来说,如果不去分析,再去学一遍另一个版本的,可能也是事倍功半,站的高方能望的远,和大家共勉。