C语言到汇编-函数与程序结构3

经过前面的学习，这章还剩下两个内容：

4.头文件
10.C预处理器

前面说过，编译过程可以分为“预处理、编译、汇编、连接”四个步骤。
以下面程序为例：

#include  
main()
{
  printf("hello, world\n");
}

假设这段代码保存在hello.c文件中。
第1步预处理过程中，C预处理器cpp将头文件“stdio.h”中的内容复制到包含文件“hello.c”中，并处理其中的宏定义等内容，最终得到后缀为.i的预处理文件hello.i。
hello.i文件内容如下（中间省略了很多）：

# 1 "hello.c"
# 1 ""
# 1 ""
# 1 "hello.c"
# 1 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 1 3
# 19 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 3
# 1 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/_mingw.h" 1 3
# 31 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/_mingw.h" 3
       
# 32 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/_mingw.h" 3
# 20 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 2 3

# 1 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 1 3 4
# 213 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 3 4
typedef unsigned int size_t;
# 325 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 3 4
typedef short unsigned int wchar_t;
# 354 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 3 4
typedef short unsigned int wint_t;
# 27 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 2 3
...
...
...
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _fgetwchar (void);
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _fputwchar (wint_t);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _getw (FILE*);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _putw (int, FILE*);


 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) fgetwchar (void);
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) fputwchar (wint_t);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) getw (FILE*);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) putw (int, FILE*);
# 2 "hello.c" 2
int main()
{
 printf("hello,world\n");
}

先不分析具体代码，总之能看到hello.c文件中多了很多东西，这些都是包含了stdio.h 文件带来的。
而第2步编译过程中，编译器根据头文件中的函数声明（已复制到hello.i文件中），来检查我们自己写的程序中hello.c中调用的函数是否正确，即printf 函数。如果正确，则编译通过，生成hello.s文件。
hello.s文件内容如下：

    .file   "hello.c"
    .intel_syntax
    .def    ___main;    .scl    2;  .type   32; .endef
    .section .rdata,"dr"
LC0:
    .ascii "hello,world\12\0"
    .text
.globl _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
    push    ebp
    mov ebp, esp
    sub esp, 8
    and esp, -16
    mov eax, 0
    add eax, 15
    add eax, 15
    shr eax, 4
    sal eax, 4
    mov DWORD PTR [ebp-4], eax
    mov eax, DWORD PTR [ebp-4]
    call    __alloca
    call    ___main
    mov DWORD PTR [esp], OFFSET FLAT:LC0
    call    _printf
    leave
    ret
    .def    _printf;    .scl    2;  .type   32; .endef

可以看到，hello.s文件的内容又变少了许多，只有原hello.c中的代码被编译成了汇编代码。接下来的汇编、连接过程都是针对这个hello.s文件的，也就是说，头文件里的内容后面都用不到了。
在上面这个程序中，stdio.h 头文件的唯一作用就是对hello.c 文件main 函数中调用的printf 函数进行类型检查，因为stdio.h 头文件中有printf 函数的声明。
若函数未声明会怎样？
假设程序如下：

main()
{
  printf("hello, world\n");
}

没有第一行的“#include ”，程序还是能正确的编译并执行。为了更明显一些，这里将标准库函数printf 换成自己写的函数进行对比：

main()
{
  a(1,2);
}

上面的a 函数并没有定义，也没有声明，编译后会报一个连接器ld的错误。但是如果不进行连接，只进行预处理、编译、汇编，则不会报错。
预处理后的代码：

# 1 "hello.c"
# 1 ""
# 1 ""
# 1 "hello.c"

main()
{
  a(1,2);
}

编译后的代码：

    .file   "hello.c"
    .intel_syntax
    .def    ___main;    .scl    2;  .type   32; .endef
    .text
.globl _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
    push    ebp
    mov ebp, esp
    sub esp, 24
    and esp, -16
    mov eax, 0
    add eax, 15
    add eax, 15
    shr eax, 4
    sal eax, 4
    mov DWORD PTR [ebp-4], eax
    mov eax, DWORD PTR [ebp-4]
    call    __alloca
    call    ___main
    mov DWORD PTR [esp+4], 2
    mov DWORD PTR [esp], 1
    call    _a
    leave
    ret
    .def    _a; .scl    2;  .type   32; .endef

可以看到，预处理器和编译器并没有对a 函数做任何检查。在编译后的汇编代码中，_main 函数像正常情况一样处理参数，调用_a 函数（也就是源代码中的a 函数）：

    mov DWORD PTR [esp+4], 2
    mov DWORD PTR [esp], 1
    call    _a

当编译过程执行到第4步“连接”的时候，才会发现不存在a 这个函数，随即引发错误。
这样看来，前面没有包含头文件的printf 函数也是同样的处理过程，只不过因为在第4步的“连接”过程中，连接器从标准库中找到了printf 函数的定义文件。
如果代码这样呢：

int a(int);
main()
{
  a(1,2);
}

依然没有定义a 函数，但是添加了a 函数的函数声明。添加声明后编译器就会在编译到a(1,2);这一行的时候，根据上面的声明进行检查，发现参数个数不匹配，所以在这里（第2步，编译）就会抛出一个错误，无法编译成功。
经过上面的例子，应该对函数声明和头文件的作用有了一定理解。当然头文件中也可以有变量定义、函数定义等代码，如果有的话，在进行预处理、编译这两步后，它们也会被加入到原程序中并被编译成汇编程序。
之所以上面的hello.i文件编译后没有增加什么东西，是因为stdio.h 文件中只有一堆函数声明、宏定义、类型定义等无法编译成汇编指令、只是给编译器处理用的伪指令。
最后，再来看一下预处理之后hello.i文件中的内容：

# 1 "hello.c"
# 1 ""
# 1 ""
# 1 "hello.c"
# 1 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 1 3
# 19 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 3
# 1 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/_mingw.h" 1 3
# 31 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/_mingw.h" 3
       
# 32 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/_mingw.h" 3
# 20 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 2 3

# 1 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 1 3 4
# 213 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 3 4
typedef unsigned int size_t;
# 325 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 3 4
typedef short unsigned int wchar_t;
# 354 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/include/stddef.h" 3 4
typedef short unsigned int wint_t;
# 27 "D:/code/MinGW/bin/../lib/gcc/mingw32/3.4.5/../../../../include/stdio.h" 2 3
...
...
...
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _fgetwchar (void);
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _fputwchar (wint_t);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _getw (FILE*);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _putw (int, FILE*);


 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) fgetwchar (void);
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) fputwchar (wint_t);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) getw (FILE*);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) putw (int, FILE*);
# 2 "hello.c" 2
int main()
{
 printf("hello,world\n");
}

解释一下这里面的#号后面的数字以及行末尾的数字的意思。#号后面的数字，指的是后面文件的行数，意思是处理到了这个文件的这一行。而后面的数字，1 表示进入这个文件，2 表示返回这个文件，因为包含的头文件中也可能包含其它的头文件，用这种行号加数字的方式，就能实现递归文件将内容都复制到hello.c中。
数字3 表示以下文本来自系统头文件，因此应禁止某些警告。数字4 表示应将以下文本视为包含在隐式extern "C"块中。
参考预处理器cpp官方文档：
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
下面的代码，例如：

 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _fgetwchar (void);
 wint_t __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _fputwchar (wint_t);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _getw (FILE*);
 int __attribute__((__cdecl__)) __attribute__ ((__nothrow__)) _putw (int, FILE*);

这几行都是函数声明，中间多出来的东西是函数的调用规则。而其它被省略的代码也都是类似的函数声明、类型定义、宏定义、条件包含的宏定义等，总之都是不会被编译成汇编代码的东西。
（以上所有内容并不保证完全正确。）
好了，第4章的内容就先学习到这里，下一篇开始学习第5章，指针与数组。

C语言到汇编-函数与程序结构3

你可能感兴趣的:(C语言到汇编-函数与程序结构3)