Linux
可执行程序如何产生? /*
* filename: 01_helloWorld.c
* */
#include
int main(void)
{
printf("hello world\n");
return 0;
}
预处理–预处理用于处理预处理命令,头文件的扩展、宏替换、注释的删除
#include
是一条预处理命令,它的作用是将头文件的内容包含到本文件中
“包含”是指将头文件中的所有代码都会在#include
处展开
gcc -E 01_helloWorld.c > 01_helloWorld.i
在预处理之后自动停止后面的操作,并把预处理的结果重定向到01_helloWorld.i
这个文件中。
为什么不能在头文件中定义全局变量?
因为定义全局变量的代码会存在于所有以#include
包含该头文件的文件中。也就是说,所有的这些文件,都会定义一个同样的全局变量,这样就不可避免的造成了冲突;
编译环节----对源代码进行语法分析,并优化产生对应的汇编代码
gcc -S 01_helloWorld.c -o 01_helloWorld.o
这里是产生汇编文件,不是最终的二进制文本文件。
汇编过程–将源代码翻译成可执行的指令,并生成目标文件
gcc -c 01_helloWorld.c -o 01_helloWorld.o
链接过程–将各个目标文件,包括库文件,链接成一个可执行程序
地址和空间的分配
符号解析
重定向
在Linux
环境下,该工作是由GNU
的链接器ld
完成的
gcc -g -Wall -v 01_helloWorld.c -o 01_helloWorld
通过-v
选项可以查看完整和详细的gcc
编译过程
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/lto-wrapper
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto --enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/isl-install --with-cloog=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/cloog-install --enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC)
COLLECT_GCC_OPTIONS='-g' '-Wall' '-v' '-o' '01_helloWorld' '-mtune=generic' '-march=x86-64'
/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/cc1 -quiet -v 01_helloWorld.c -quiet -dumpbase 01_helloWorld.c -mtune=generic -march=x86-64 -auxbase 01_helloWorld -g -Wall -version -o /tmp/ccjd04QA.s
GNU C (GCC) version 4.8.5 20150623 (Red Hat 4.8.5-36) (x86_64-redhat-linux)
compiled by GNU C version 4.8.5 20150623 (Red Hat 4.8.5-36), GMP version 6.0.0, MPFR version 3.1.1, MPC version 1.0.1
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
ignoring nonexistent directory "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/include-fixed"
ignoring nonexistent directory "/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../x86_64-redhat-linux/include"
#include "..." search starts here:
#include <...> search starts here:
/usr/lib/gcc/x86_64-redhat-linux/4.8.5/include
/usr/local/include
/usr/include
End of search list.
GNU C (GCC) version 4.8.5 20150623 (Red Hat 4.8.5-36) (x86_64-redhat-linux)
compiled by GNU C version 4.8.5 20150623 (Red Hat 4.8.5-36), GMP version 6.0.0, MPFR version 3.1.1, MPC version 1.0.1
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: 592abcad67b46aec035d56e51f71d007
COLLECT_GCC_OPTIONS='-g' '-Wall' '-v' '-o' '01_helloWorld' '-mtune=generic' '-march=x86-64'
as -v --64 -o /tmp/ccFbz7iT.o /tmp/ccjd04QA.s
GNU assembler version 2.27 (x86_64-redhat-linux) using BFD version version 2.27-34.base.el7
COMPILER_PATH=/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/:/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/:/usr/libexec/gcc/x86_64-redhat-linux/:/usr/lib/gcc/x86_64-redhat-linux/4.8.5/:/usr/lib/gcc/x86_64-redhat-linux/
LIBRARY_PATH=/usr/lib/gcc/x86_64-redhat-linux/4.8.5/:/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/:/lib/../lib64/:/usr/lib/../lib64/:/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-g' '-Wall' '-v' '-o' '01_helloWorld' '-mtune=generic' '-march=x86-64'
/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/collect2 --build-id --no-add-needed --eh-frame-hdr --hash-style=gnu -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o 01_helloWorld /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crt1.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crti.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtbegin.o -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5 -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -L/usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../.. /tmp/ccFbz7iT.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-redhat-linux/4.8.5/crtend.o /usr/lib/gcc/x86_64-redhat-linux/4.8.5/../../../../lib64/crtn.o
Linux
下二进制可执行程序的一般格式为ELF
格式,可以通过readelf
命令查看可执行程序的ELF
格式:
readelf -h 01_helloWorld
# 查看ELF Header
ELF Header:
Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
Class: ELF64
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: Advanced Micro Devices X86-64
Version: 0x1
Entry point address: 0x400430
Start of program headers: 64 (bytes into file)
Start of section headers: 7168 (bytes into file)
Flags: 0x0
Size of this header: 64 (bytes)
Size of program headers: 56 (bytes)
Number of program headers: 9
Size of section headers: 64 (bytes)
Number of section headers: 36
Section header string table index: 35
readelf -S 01_helloWorld
# 查看Section Headers
Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .interp PROGBITS 0000000000400238 00000238
000000000000001c 0000000000000000 A 0 0 1
[ 2] .note.ABI-tag NOTE 0000000000400254 00000254
0000000000000020 0000000000000000 A 0 0 4
[ 3] .note.gnu.build-i NOTE 0000000000400274 00000274
0000000000000024 0000000000000000 A 0 0 4
[ 4] .gnu.hash GNU_HASH 0000000000400298 00000298
000000000000001c 0000000000000000 A 5 0 8
[ 5] .dynsym DYNSYM 00000000004002b8 000002b8
0000000000000060 0000000000000018 A 6 1 8
[ 6] .dynstr STRTAB 0000000000400318 00000318
000000000000003d 0000000000000000 A 0 0 1
[ 7] .gnu.version VERSYM 0000000000400356 00000356
0000000000000008 0000000000000002 A 5 0 2
[ 8] .gnu.version_r VERNEED 0000000000400360 00000360
0000000000000020 0000000000000000 A 6 1 8
[ 9] .rela.dyn RELA 0000000000400380 00000380
0000000000000018 0000000000000018 A 5 0 8
[10] .rela.plt RELA 0000000000400398 00000398
0000000000000030 0000000000000018 AI 5 24 8
[11] .init PROGBITS 00000000004003c8 000003c8
000000000000001a 0000000000000000 AX 0 0 4
[12] .plt PROGBITS 00000000004003f0 000003f0
0000000000000030 0000000000000010 AX 0 0 16
[13] .plt.got PROGBITS 0000000000400420 00000420
0000000000000008 0000000000000000 AX 0 0 8
[14] .text PROGBITS 0000000000400430 00000430
0000000000000182 0000000000000000 AX 0 0 16
[15] .fini PROGBITS 00000000004005b4 000005b4
0000000000000009 0000000000000000 AX 0 0 4
[16] .rodata PROGBITS 00000000004005c0 000005c0
000000000000001c 0000000000000000 A 0 0 8
[17] .eh_frame_hdr PROGBITS 00000000004005dc 000005dc
0000000000000034 0000000000000000 A 0 0 4
[18] .eh_frame PROGBITS 0000000000400610 00000610
00000000000000f4 0000000000000000 A 0 0 8
[19] .init_array INIT_ARRAY 0000000000600e10 00000e10
0000000000000008 0000000000000008 WA 0 0 8
[20] .fini_array FINI_ARRAY 0000000000600e18 00000e18
0000000000000008 0000000000000008 WA 0 0 8
[21] .jcr PROGBITS 0000000000600e20 00000e20
0000000000000008 0000000000000000 WA 0 0 8
[22] .dynamic DYNAMIC 0000000000600e28 00000e28
00000000000001d0 0000000000000010 WA 6 0 8
[23] .got PROGBITS 0000000000600ff8 00000ff8
0000000000000008 0000000000000008 WA 0 0 8
[24] .got.plt PROGBITS 0000000000601000 00001000
0000000000000028 0000000000000008 WA 0 0 8
[25] .data PROGBITS 0000000000601028 00001028
0000000000000004 0000000000000000 WA 0 0 1
[26] .bss NOBITS 000000000060102c 0000102c
0000000000000004 0000000000000000 WA 0 0 1
[27] .comment PROGBITS 0000000000000000 0000102c
000000000000002d 0000000000000001 MS 0 0 1
[28] .debug_aranges PROGBITS 0000000000000000 00001059
0000000000000030 0000000000000000 0 0 1
[29] .debug_info PROGBITS 0000000000000000 00001089
0000000000000091 0000000000000000 0 0 1
[30] .debug_abbrev PROGBITS 0000000000000000 0000111a
0000000000000044 0000000000000000 0 0 1
[31] .debug_line PROGBITS 0000000000000000 0000115e
0000000000000044 0000000000000000 0 0 1
[32] .debug_str PROGBITS 0000000000000000 000011a2
00000000000000c1 0000000000000001 MS 0 0 1
[33] .symtab SYMTAB 0000000000000000 00001268
0000000000000678 0000000000000018 34 52 8
[34] .strtab STRTAB 0000000000000000 000018e0
00000000000001d2 0000000000000000 0 0 1
[35] .shstrtab STRTAB 0000000000000000 00001ab2
000000000000014c 0000000000000000 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
L (link order), O (extra OS processing required), G (group), T (TLS),
C (compressed), x (unknown), o (OS specific), E (exclude),
l (large), p (processor specific)
ELF
文件的主要内容是由各个section
及symbol
表组成
section
列表中,最熟悉的有:
text段
:代码段,用于保存可执行指令
data段
:数据段,用于保存有非0初始值的全局变量和静态变量
bss段
:用于保存没有初始值或者初值为0的全局变量和静态变量
当程序加载时,bss段
中的变量会初始化为0
Linux
环境下,可以使用strace
跟踪系统调用,帮助自己研究系统程序加载、运行和退出的过程。
strace ./01_helloWorld
execve("./01_helloWorld", ["./01_helloWorld"], [/* 22 vars */]) = 0
brk(NULL) = 0x244a000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff59fe3f000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=44167, ...}) = 0
mmap(NULL, 44167, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7ff59fe34000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340$\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2151672, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7ff59f852000
mprotect(0x7ff59fa14000, 2097152, PROT_NONE) = 0
mmap(0x7ff59fc14000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7ff59fc14000
mmap(0x7ff59fc1a000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7ff59fc1a000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff59fe33000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff59fe31000
arch_prctl(ARCH_SET_FS, 0x7ff59fe31740) = 0
mprotect(0x7ff59fc14000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ) = 0
mprotect(0x7ff59fe40000, 4096, PROT_READ) = 0
munmap(0x7ff59fe34000, 44167) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff59fe3e000
write(1, "hello world\n", 12hello world
) = 12
exit_group(0) = ?
+++ exited with 0 +++
在Linux
环境中,执行一个命令时,首先是由shell
调用fork
,然后再子进程中来真正执行这个命令。
首先是调用execve
来加载01_helloWorld
,然后ld
会分别检查ld.so.nohwcap
和ld.so.preload
。其中,如果ld.so.nohwcap
存在,则ld
会加载其中未优化版本的库。如果ld.so.preload
存在,则ld
会加载其中的库,之后利用mmap
将ld.so.cache
映射到内存中,ld.so.cache
中保存了库的路径,这样就完成了所有的准备工作。接着ld
加载c
库–libc.so.6
,利用mmap
及mprotect
设置程序的各个内存区域,到这里,程序运行环境已经完成。后面的write
会向文件描述符1
(标准输出)输出hello world\n
,返回值为12,它表示write
成功的字符个数。最后调用exit_group
退出程序,此时参数为0,表示程序退出状态。
Linux
环境下,使用的C库一般都是libc
,它封装了几乎所有的系统调用,代码中使用的“系统调用”实际上就是调用C库中的库函数。C库函数同样位于用户态,所以编译器可以统一处理所有的函数调用,而不是区分该函数到底是不是系统调用。