深度探索Linux操作系统 —— 编译过程分析

在这里插入图片描述

linux系统构建——1.交叉编译工具链

深度探索Linux操作系统 —— 编译过程分析
深度探索Linux操作系统 —— 构建工具链
深度探索Linux操作系统 —— 构建内核
深度探索Linux操作系统 —— 构建initramfs
深度探索Linux操作系统 —— 从内核空间到用户空间
深度探索Linux操作系统 —— 构建根文件系统
深度探索Linux操作系统 —— 构建桌面环境
深度探索Linux操作系统 —— Linux图形原理探讨


文章目录

  • 一、源码
    • foo.h
    • hello.c
    • foo1.c
    • foo2.c
  • GCC 指令
  • 预处理
    • 命令
    • hello.i
  • 编译(Compile only)
    • 命令
    • foo2.s
  • 汇编
    • 命令
    • readelf
    • readelf -h
    • readelf -S
    • readelf -r
    • readelf -s
    • strip
  • 链接


    本文基于《深度探索Linux操作系统:系统构建和原理解析》

一、源码

foo.h

#if !defined(_FOO_H_)
#define _FOO_H_

#define PI 3.1415926
#define AREA

struct foo_struct {
  int a;
};

#endif // _FOO_H_

hello.c

#include "foo.h"

void foo2_func();
extern int foo2;

int main(int argc, char const *argv[]) {
  int result;
  int r = 5;
#ifdef AREA
  result = PI * r * r;
#else
  result = PI * r * 2;
#endif
  r = foo2;
  foo2_func();

  return 0;
}

foo1.c

#include 

int foo1;
static int foo1_static;

static void foo1_func_1() { printf("====>"); }

void foo1_func() {
  int foo1_local = 1;
  printf("a: %d", foo1_local);
}

foo2.c

#include 

int foo2;
static int foo2_static;

static void foo2_func_1() { printf("====>"); }

void foo2_func() {
  int foo2_local = 1;
  printf("a: %d", foo2_local);
}

GCC 指令

gcc --help
-v                       Display the programs invoked by the compiler.
  -E                       Preprocess only; do not compile, assemble or link.
  -S                       Compile only; do not assemble or link.
  -c                       Compile and assemble, but do not link.
  -pie                     Create a dynamically linked position independent
                           executable.
  -shared                  Create a shared library.
  -x <language>            Specify the language of the following input files.
                           Permissible languages include: c c++ assembler none
                           'none' means revert to the default behavior of
                           guessing the language based on the file's extension.

预处理

命令

gcc -E hello.c -o hello.i

hello.i

# 0 "hello.c"
# 0 ""
# 0 ""
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 0 "" 2
# 1 "hello.c"
# 1 "foo.h" 1






struct foo_struct {
  int a;
};
# 2 "hello.c" 2

void foo2_func();
extern int foo2;

int main(int argc, char const *argv[]) {
  int result;
  int r = 5;

  result = 3.1415926 * r * r;



  r = foo2;
  foo2_func();

  return 0;
}

编译(Compile only)

命令

  -S                       Compile only; do not assemble or link.
  
gcc -S foo2.c

foo2.s

	.file	"foo2.c"
	.text
	.globl	foo2
	.bss
	.align 4
	.type	foo2, @object
	.size	foo2, 4
foo2:
	.zero	4
	.local	foo2_static
	.comm	foo2_static,4,4
	.section	.rodata
.LC0:
	.string	"====>"
	.text
	.type	foo2_func_1, @function
foo2_func_1:
.LFB0:
	.cfi_startproc
	endbr64
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	leaq	.LC0(%rip), %rax
	movq	%rax, %rdi
	movl	$0, %eax
	call	printf@PLT
	nop
	popq	%rbp
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE0:
	.size	foo2_func_1, .-foo2_func_1
	.section	.rodata
.LC1:
	.string	"a: %d"
	.text
	.globl	foo2_func
	.type	foo2_func, @function
foo2_func:
.LFB1:
	.cfi_startproc
	endbr64
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	subq	$16, %rsp
	movl	$1, -4(%rbp)
	movl	-4(%rbp), %eax
	movl	%eax, %esi
	leaq	.LC1(%rip), %rax
	movq	%rax, %rdi
	movl	$0, %eax
	call	printf@PLT
	nop
	leave
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE1:
	.size	foo2_func, .-foo2_func
	.ident	"GCC: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0"
	.section	.note.GNU-stack,"",@progbits
	.section	.note.gnu.property,"a"
	.align 8
	.long	1f - 0f
	.long	4f - 1f
	.long	5
0:
	.string	"GNU"
1:
	.align 8
	.long	0xc0000002
	.long	3f - 2f
2:
	.long	0x3
3:
	.align 8
4:

    在文件 foo2.c 中,除定义了一个全局变量 foo2 外,仅定义了一个函数 foo2_func,而该函数体中也只有区区一行代码,但为什么产生的汇编代码如此之长?事实上,仔细观察可以发现,文件 foo2.s 中相当一部分是汇编器的伪指令。伪指令是不参与CPU运行的,只指导编译链接过程。比如,代码中以 “.cfi” 开头的伪指令是辅助汇编器创建栈帧(stack frame)信息的。

汇编

命令

  -c                       Compile and assemble, but do not link.
  
gcc -c hello.c foo1.c foo2.c
# 执行后产生 hello.o foo1.o foo2.o

readelf

readelf --help
用法:readelf <选项> elf-文件
 显示关于 ELF 格式文件内容的信息
 Options are:
  -a --all               Equivalent to: -h -l -S -s -r -d -V -A -I
  -h --file-header       Display the ELF file header
  -l --program-headers   Display the program headers
     --segments          An alias for --program-headers
  -S --section-headers   Display the sections' header
     --sections          An alias for --section-headers
  -g --section-groups    Display the section groups
  -t --section-details   Display the section details
  -e --headers           Equivalent to: -h -l -S
  -s --syms              Display the symbol table
     --symbols           An alias for --syms
     --dyn-syms          Display the dynamic symbol table
     --lto-syms          Display LTO symbol tables
     --sym-base=[0|8|10|16]
                         Force base for symbol sizes.  The options are
                         mixed (the default), octal, decimal, hexadecimal.
  -C --demangle[=STYLE]  Decode mangled/processed symbol names
                           STYLE can be "none", "auto", "gnu-v3", "java",
                           "gnat", "dlang", "rust"
     --no-demangle       Do not demangle low-level symbol names.  (default)
     --recurse-limit     Enable a demangling recursion limit.  (default)
     --no-recurse-limit  Disable a demangling recursion limit
     -U[dlexhi] --unicode=[default|locale|escape|hex|highlight|invalid]
                         Display unicode characters as determined by the current locale
                          (default), escape sequences, "", highlighted
                          escape sequences, or treat them as invalid and display as
                          "{hex sequences}"
  -n --notes             Display the core notes (if present)
  -r --relocs            Display the relocations (if present)
  -u --unwind            Display the unwind info (if present)
  -d --dynamic           Display the dynamic section (if present)
  -V --version-info      Display the version sections (if present)
  -A --arch-specific     Display architecture specific information (if any)
  -c --archive-index     Display the symbol/file index in an archive
  -D --use-dynamic       Use the dynamic section info when displaying symbols
  -L --lint|--enable-checks
                         Display warning messages for possible problems
  -x --hex-dump=<number|name>
                         Dump the contents of section <number|name> as bytes
  -p --string-dump=<number|name>
                         Dump the contents of section <number|name> as strings
  -R --relocated-dump=<number|name>
                         Dump the relocated contents of section <number|name>
  -z --decompress        Decompress section before dumping it
  -w --debug-dump[a/=abbrev, A/=addr, r/=aranges, c/=cu_index, L/=decodedline,
                  f/=frames, F/=frames-interp, g/=gdb_index, i/=info, o/=loc,
                  m/=macro, p/=pubnames, t/=pubtypes, R/=Ranges, l/=rawline,
                  s/=str, O/=str-offsets, u/=trace_abbrev, T/=trace_aranges,
                  U/=trace_info]
                         Display the contents of DWARF debug sections
  -wk --debug-dump=links Display the contents of sections that link to separate
                          debuginfo files
  -P --process-links     Display the contents of non-debug sections in separate
                          debuginfo files.  (Implies -wK)
  -wK --debug-dump=follow-links
                         Follow links to separate debug info files (default)
  -wN --debug-dump=no-follow-links
                         Do not follow links to separate debug info files
  --dwarf-depth=N        Do not display DIEs at depth N or greater
  --dwarf-start=N        Display DIEs starting at offset N
  --ctf=<number|name>    Display CTF info from section <number|name>
  --ctf-parent=<name>    Use CTF archive member <name> as the CTF parent
  --ctf-symbols=<number|name>
                         Use section <number|name> as the CTF external symtab
  --ctf-strings=<number|name>
                         Use section <number|name> as the CTF external strtab
  -I --histogram         Display histogram of bucket list lengths
  -W --wide              Allow output width to exceed 80 characters
  -T --silent-truncation If a symbol name is truncated, do not add [...] suffix
  @<file>                Read options from <file>
  -H --help              Display this information
  -v --version           Display the version number of readelf

readelf -h

#   -h --file-header       Display the ELF file header

readelf -h foo2.o
ELF 头:
  Magic:   7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
  类别:                              ELF64
  数据:                              2 补码,小端序 (little endian)
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI 版本:                          0
  类型:                              REL (可重定位文件)
  系统架构:                          Advanced Micro Devices X86-64
  版本:                              0x1
  入口点地址:               0x0
  程序头起点:          0 (bytes into file)
  Start of section headers:          856 (bytes into file)
  标志:             0x0
  Size of this header:               64 (bytes)
  Size of program headers:           0 (bytes)
  Number of program headers:         0
  Size of section headers:           64 (bytes)
  Number of section headers:         14
  Section header string table index: 13

readelf -S

#   -S --section-headers   Display the sections' header

readelf -S foo2.o
There are 14 section headers, starting at offset 0x358:

节头:
  [] 名称              类型             地址              偏移量
       大小              全体大小          旗标   链接   信息   对齐
  [ 0]                   NULL             0000000000000000  00000000
       0000000000000000  0000000000000000           0     0     0
  [ 1] .text             PROGBITS         0000000000000000  00000040
       000000000000004e  0000000000000000  AX       0     0     1
  [ 2] .rela.text        RELA             0000000000000000  00000250
       0000000000000060  0000000000000018   I      11     1     8
  [ 3] .data             PROGBITS         0000000000000000  0000008e
       0000000000000000  0000000000000000  WA       0     0     1
  [ 4] .bss              NOBITS           0000000000000000  00000090
       0000000000000008  0000000000000000  WA       0     0     4
  [ 5] .rodata           PROGBITS         0000000000000000  00000090
       000000000000000c  0000000000000000   A       0     0     1
  [ 6] .comment          PROGBITS         0000000000000000  0000009c
       000000000000002c  0000000000000001  MS       0     0     1
  [ 7] .note.GNU-stack   PROGBITS         0000000000000000  000000c8
       0000000000000000  0000000000000000           0     0     1
  [ 8] .note.gnu.pr[...] NOTE             0000000000000000  000000c8
       0000000000000020  0000000000000000   A       0     0     8
  [ 9] .eh_frame         PROGBITS         0000000000000000  000000e8
       0000000000000058  0000000000000000   A       0     0     8
  [10] .rela.eh_frame    RELA             0000000000000000  000002b0
       0000000000000030  0000000000000018   I      11     9     8
  [11] .symtab           SYMTAB           0000000000000000  00000140
       00000000000000d8  0000000000000018          12     6     8
  [12] .strtab           STRTAB           0000000000000000  00000218
       0000000000000036  0000000000000000           0     0     1
  [13] .shstrtab         STRTAB           0000000000000000  000002e0
       0000000000000074  0000000000000000           0     0     1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
  L (link order), O (extra OS processing required), G (group), T (TLS),
  C (compressed), x (unknown), o (OS specific), E (exclude),
  D (mbind), l (large), p (processor specific)

readelf -r

#  -r --relocs            Display the relocations (if present)

readelf -r hello.o

重定位节 '.rela.text' at offset 0x1f8 contains 3 entries:
  偏移量          信息           类型           符号值        符号名称 + 加数
000000000027  000300000002 R_X86_64_PC32     0000000000000000 .rodata - 4
000000000045  000500000002 R_X86_64_PC32     0000000000000000 foo2 - 4
000000000052  000600000004 R_X86_64_PLT32    0000000000000000 foo2_func - 4

重定位节 '.rela.eh_frame' at offset 0x240 contains 1 entry:
  偏移量          信息           类型           符号值        符号名称 + 加数
000000000020  000200000002 R_X86_64_PC32     0000000000000000 .text + 0

readelf -s

#  -s --syms              Display the symbol table
#     --symbols           An alias for --syms
#     --dyn-syms          Display the dynamic symbol table
#     --lto-syms          Display LTO symbol tables
#     --sym-base=[0|8|10|16]
#                         Force base for symbol sizes.  The options are
#                         mixed (the default), octal, decimal, hexadecimal.
                         
readelf -s foo2.o

Symbol table '.symtab' contains 9 entries:
   Num:    Value          Size Type    Bind   Vis      Ndx Name
     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS foo2.c
     2: 0000000000000000     0 SECTION LOCAL  DEFAULT    1 .text
     3: 0000000000000004     4 OBJECT  LOCAL  DEFAULT    4 foo2_static
     4: 0000000000000000     0 SECTION LOCAL  DEFAULT    5 .rodata
     5: 0000000000000000    31 FUNC    LOCAL  DEFAULT    1 foo2_func_1
     6: 0000000000000000     4 OBJECT  GLOBAL DEFAULT    4 foo2
     7: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT  UND printf
     8: 000000000000001f    47 FUNC    GLOBAL DEFAULT    1 foo2_func


# ======================================================================

readelf -s hello.o

Symbol table '.symtab' contains 7 entries:
   Num:    Value          Size Type    Bind   Vis      Ndx Name
     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS hello.c
     2: 0000000000000000     0 SECTION LOCAL  DEFAULT    1 .text
     3: 0000000000000000     0 SECTION LOCAL  DEFAULT    5 .rodata
     4: 0000000000000000    93 FUNC    GLOBAL DEFAULT    1 main
     5: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT  UND foo2
     6: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT  UND foo2_func
     

    符号 foo2foo2_func 都在模块 foo2 中定义,对于模块 hello 来说是外部符号,没有在任何一个段中,所以在列 Ndx 中,foo2foo2_func 的值是 UNDUNDUndefined 的缩写,表示符号 foo2、foo2_func 是未定义的。

strip

Linux 可执行文件瘦身指令 strip 使用示例

    在链接时,对于模块中引用的外部符号,链接器将根据符号表进行符号的重定位。如果我们将符号表删除了,那么链接器在链接时将找不到符号的定义,从而不能进行正确的符号解析。如我们将 foo2.o 中的符号表删除,再次进行链接,则链接器将因找不到符号定义而终止链接,如下所示

gcc -o hello *.o
/usr/bin/ld: hello.o: warning: relocation against `foo2' in read-only section `.text'
/usr/bin/ld: error in foo2.o(.eh_frame); no .eh_frame_hdr table will be created
/usr/bin/ld: hello.o: in function `main':
hello.c:(.text+0x45): undefined reference to `foo2'
/usr/bin/ld: hello.c:(.text+0x52): undefined reference to `foo2_func'
/usr/bin/ld: warning: creating DT_TEXTREL in a PIE
collect2: error: ld returned 1 exit status

链接

    按照前面我们提到的目标文件合并理论,理论上三个目标文件 hello.o、foo1.o、foo2.o的 “.text” 段的尺寸加起来应该与可执行文件 hello 的 “.text” 段的尺寸大小相等。但是,通过 readelf 的输出可见,三个目标文件的 “.text” 段的尺寸加起来是 0x46(0x26+0x10+0x10)字节,远小于可执行文件 hello 的 “.text” 段的大小 0x1b8。如果读者在编译时向 gcc 传递了参数 -v,仔细观察 gcc 的输出可以发现,实际上在链接时链接器自作主张地链接了一些特别的文件,包括 crt1.o、crti.o、crtn.o、crtbegin.ocrtend.o 等,其实就是我们前面提到的启动文件。所以多出来的尺寸都是合并这些文件的 “.text” 导致的。

   

你可能感兴趣的:(编译与链接,Linux内核,linux,运维,服务器,GCC)