命令行上定义的宏定义的构建先于源代码中的宏定义。如果命令行及源代码给出同名的宏定义,编译器将给出警告,并且源文件的定义将覆盖命令行上的定义。
finish_options (continue)
1422 /* We're about to send user input to cpplib, so make it warn for
1423 things that we previously (when we sent it internal definitions)
1424 told it to not warn.
1425
1426 C99 permits implementation-defined characters in identifiers.
1427 The documented meaning of -std= is to turn off extensions that
1428 conflict with the specified standard, and since a strictly
1429 conforming program cannot contain a '$', we do not condition
1430 their acceptance on the -std= setting. */
1431 cpp_opts->warn_dollars = (cpp_opts->pedantic && !cpp_opts->c99);
1432
1433 cpp_change_file (parse_in, LC_RENAME, _("<command line>"));
1434 for (i = 0; i < deferred_count; i++)
1435 {
1436 struct deferred_opt *opt = &deferred_opts[i];
1437
1438 if (opt->code == OPT_D)
1439 cpp_define (parse_in, opt->arg);
1440 else if (opt->code == OPT_U)
1441 cpp_undef (parse_in, opt->arg);
1442 else if (opt->code == OPT_A)
1443 {
1444 if (opt->arg[0] == '-')
1445 cpp_unassert (parse_in, opt->arg + 1);
1446 else
1447 cpp_assert (parse_in, opt->arg);
1448 }
1449 }
命令行选项“–imacro file” 指示GCC包含入file中的宏定义。这些宏定义应该排在在-D或-U选项后面来处理。
finish_options (continue)
1451 /* Handle -imacros after -D and -U. */
1452 for (i = 0; i < deferred_count; i++)
1453 {
1454 struct deferred_opt *opt = &deferred_opts[i];
1455
1456 if (opt->code == OPT_imacros
1457 && cpp_push_include (parse_in, opt->arg))
1458 {
1459 /* Disable push_command_line_include callback for now. */
1460 include_cursor = deferred_count + 1;
1461 cpp_scan_nooutput (parse_in);
1462 }
1463 }
1464 }
给定这个文件名,GCC在读入前,需要定位它。
1049 bool
1050 cpp_push_include (cpp_reader *pfile, const char *fname) in cppfiles.c
1051 {
1052 /* Make the command line directive take up a line. */
1053 pfile->line++;
1054 return _cpp_stack_include (pfile, fname, false, IT_CMDLINE);
1055 }
注意到,当调用cpp_push_include时,参数angle_brackets是false,表示由–imacros引入的头文件应当视为用户定义文件。
750 bool
751 _cpp_stack_include (cpp_reader *pfile, const char *fname, int angle_brackets, in cppfiles.c
752 enum include_type type)
753 {
754 struct cpp_dir *dir;
755
756 dir = search_path_head (pfile, fname, angle_brackets, type);
757 if (!dir)
758 return false;
759
760 return _cpp_stack_file (pfile, _cpp_find_file (pfile, fname, dir, false,
761 angle_brackets),
762 type == IT_IMPORT);
763 }
文件定位由search_path_head完成。回忆在cpp_reader中,域angle_brackets保存系统头文件的查找目录,quote_include保存用户定义文件的查找目录,而no_search_path保存包含文件的绝对路径。
692 static struct cpp_dir *
693 search_path_head (cpp_reader *pfile, const char *fname, int angle_brackets, in cppfiles.c
694 enum include_type type)
695 {
696 cpp_dir *dir;
697 _cpp_file *file;
698
699 if (IS_ABSOLUTE_PATH (fname))
700 return &pfile->no_search_path;
701
702 /* pfile->buffer is NULL when processing an -include command-line flag. */
703 file = pfile->buffer == NULL ? pfile->main_file : pfile->buffer->file;
704
705 /* For #include_next, skip in the search path past the dir in which
706 the current file was found, but if it was found via an absolute
707 path use the normal search logic. */
708 if (type == IT_INCLUDE_NEXT && file->dir)
709 dir = file->dir->next;
710 else if (angle_brackets)
711 dir = pfile->bracket_include;
712 else if (type == IT_CMDLINE)
713 /* -include and -imacros use the #include "" chain with the
714 preprocessor's cwd prepended. */
715 return make_cpp_dir (pfile, "./", false);
716 else if (pfile->quote_ignores_source_dir)
717 dir = pfile->quote_include;
718 else
719 return make_cpp_dir (pfile, dir_name_of_file (file), pfile->map->sysp);
720
721 if (dir == NULL)
722 cpp_error (pfile, CPP_DL_ERROR,
723 "no include path in which to search for %s", fname);
724
725 return dir;
726 }
看到对由–imacros引入的文件,起始的查找点是当前目录,不过这只是建议,在上面_cpp_stack_include中760行的_cpp_find_file将按照dir链表查找,直到找出一个结果。
在708行,处理的是#include_next指示,它用在一个头文件中来包含另一个文件,它使得对新的头文件的查找,从当前头文件所在目录后的目录开始。例如,如果一个头文件的正常查找次序是目录A,B,C,D和E,如果当前头文件在目录B中找到,当前头文件中的#include_next指示将使得,在目录C,D和E中,查找这个新的头文件。
还要注意,对于普通文件,如果没有建议开始地方,将从该文件所在目录开始查找头文件。
当头文件被找到时,760行的_cpp_stack_file将读入文件,参考读入文件一节。
由–imacros包含的文件仅提取宏定义。因此cpp_scan_nooutput调用cpp_get_token来获取宏定义直到到达文件末尾。cpp_get_token也是预处理器的核心部分,是预处理的主要场所。因此,在这里我们花力气研究它。
1136 void
1137 cpp_scan_nooutput (cpp_reader *pfile) in cppmacro.c
1138 {
1139 /* Request a CPP_EOF token at the end of this file, rather than
1140 transparently continuing with the including file. */
1141 pfile->buffer->return_at_eof = true;
1142
1143 if (CPP_OPTION (pfile, traditional))
1144 while (_cpp_read_logical_line_trad (pfile))
1145 ;
1146 else
1147 while (cpp_get_token (pfile)->type != CPP_EOF)
1148 ;
1149 }
cpp_reader中的context域是一个链表,它构成一系列相对独立的上下文。如果链表中仅有一个节点,这就是所谓的基本上下文,函数直接从文件中提取符号;而其他节点则包含一组的符号,代表特定的处理上下文,这样的例子有,宏展开、宏实参替换等。由这个context链表及精心设计的cpp_hashnode,cpp_get_token实现了看到宏的同时(非定义处)即将其展开。
1049 const cpp_token *
1050 cpp_get_token (cpp_reader *pfile) in cppmacro.c
1051 {
1052 const cpp_token *result;
1053
1054 for (;;)
1055 {
1056 cpp_hashnode *node;
1057 cpp_context *context = pfile->context;
1058
1059 /* Context->prev == 0 <=> base context. */
1060 if (!context->prev)
1061 result = _cpp_lex_token (pfile);
1062 else if (FIRST (context).token != LAST (context).token)
1063 {
1064 if (context->direct_p)
1065 result = FIRST (context).token++;
1066 else
1067 result = *FIRST (context).ptoken++;
1068
1069 if (result->flags & PASTE_LEFT)
1070 {
1071 paste_all_tokens (pfile, result);
1072 if (pfile->state.in_directive)
1073 continue;
1074 return padding_token (pfile, result);
1075 }
1076 }
1077 else
1078 {
1079 _cpp_pop_context (pfile);
1080 if (pfile->state.in_directive)
1081 continue;
1082 return &pfile->avoid_paste;
1083 }
上面1062行,FIRST和LAST分别指向上下文中第一个和最后一个符号后的位置,如果2者相等,表明该上下文已经处理完成,通过1079行的_cpp_pop_context删除该上下文。1072及1080行的in_directive如果非0,表示我们正在处理指示(比如,#define,#ifdef等)。通常,处理完一个上下文时,其结果将被放入新的上下文中。
1082行的avoid_paste,其type是CPP_PADDING,其val.source是NULL,而1074行的padding_token则有如下定义:
900 static const cpp_token *
901 padding_token (cpp_reader *pfile, const cpp_token *source) in cppmacro.c
902 {
903 cpp_token *result = _cpp_temp_token (pfile);
904
905 result->type = CPP_PADDING;
906 result->val.source = source;
907 result->flags = 0;
908 return result;
909 }
CPP_PADDING代表空白符号。以符号粘贴为例,当前gcc的实现是这样的。比如,宏的定义为:#define appA(PRE, A) PRE##A,随后调用appA (super, man)。而gcc的预处理器在一个新的上下文中,则为appA记录下展开体:super和man,并将super的flags置为PASTE_LEFT,随后返回一个CPP_PADDING(见下面cpp_get_token的1103行)。而调用cpp_get_token的程序一般会直接丢掉这个CPP_PADDING,继续从刚压入的新上下文中读入符号,这时上面1069行条件满足,super和man被粘贴paste_all_tokens成superman,并压入另一个新的上下文,而再次返回1个CPP_PADDING,这个空白符依然将被丢掉。而从这个新的上下文读入粘贴后的superman,而在下一次读入时,由于上下文已经空了,在1082行,也返回一个空白符,并移除这个上下文。现在,我们回到了宏展开体的上下文中,它已经是空的了,因而在下一次读入时,亦返回一个空白符,并回到基本上下文中。由此可见,这是一种强有力的隔离手段,能很好地保护上下文产生的结果(虽然在当前gcc里,这个保护似乎过分强大了)。而作为in_directive非0的情况,处理指示是直接使用基本上下文的,不存在上下文切换,不需要也不能插入空白符。
为了方便起见,我们把_cpp_lex_token的代码,在下面重新显示一次。
691 const cpp_token *
692 _cpp_lex_token (cpp_reader *pfile) in cpplex.c
693 {
694 cpp_token *result;
695
696 for (;;)
697 {
698 if (pfile->cur_token == pfile->cur_run->limit)
699 {
700 pfile->cur_run = next_tokenrun (pfile->cur_run);
701 pfile->cur_token = pfile->cur_run->base;
702 }
703
704 if (pfile->lookaheads)
705 {
706 pfile->lookaheads--;
707 result = pfile->cur_token++;
708 }
709 else
710 result = _cpp_lex_direct (pfile);
711
712 if (result->flags & BOL)
713 {
714 /* Is this a directive. If _cpp_handle_directive returns
715 false, it is an assembler #. */
716 if (result->type == CPP_HASH
717 /* 6.10.3 p 11: Directives in a list of macro arguments
718 gives undefined behavior. This implementation
719 handles the directive as normal. */
720 && pfile->state.parsing_args != 1
721 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
722 continue;
723 if (pfile->cb.line_change && !pfile->state.skipping)
724 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
725 }
726
727 /* We don't skip tokens in directives. */
728 if (pfile->state.in_directive)
729 break;
730
731 /* Outside a directive, invalidate controlling macros. At file
732 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
733 get here and MI optimization works. */
734 pfile->mi_valid = false;
735
736 if (!pfile->state.skipping || result->type == CPP_EOF)
737 break;
738 }
739
740 return result;
741 }
上面的720行,域parsing_args表明我们解析宏的阶段。当解析其定义时,其值为0;而当进行宏替换并且正在查找左括号时,其值为1;当左括号找到后,其值为2。为了理解716行开始的条件,考虑下面的例子:
#define FUN(c) c
int main() {
FUN(
#ifdef SMALL
12,
#else
24
#endif
);
}
在721行,当看到FUN的定义时,_cpp_handle_directive将被调用。如果指示有缩进,参数indented是非0值,这种情况传统模式将发出警告或错误。在后面我们还会看到,当看到第5行的#ifdef时,_cpp_handle_directive将再一次被调用。
333 int
334 _cpp_handle_directive (cpp_reader *pfile, int indented) in cpplib.c
335 {
336 const directive *dir = 0;
337 const cpp_token *dname;
338 bool was_parsing_args = pfile->state.parsing_args;
339 int skip = 1;
340
341 if (was_parsing_args)
342 {
343 if (CPP_OPTION (pfile, pedantic))
344 cpp_error (pfile, CPP_DL_PEDWARN,
345 "embedding a directive within macro arguments is not portable");
346 pfile->state.parsing_args = 0;
347 pfile->state.prevent_expansion = 0;
348 }
349 start_directive (pfile);
350 dname = _cpp_lex_token (pfile);
351
352 if (dname->type == CPP_NAME)
353 {
354 if (dname->val.node->is_directive)
355 dir = &dtable[dname->val.node->directive_index];
356 }
357 /* We do not recognize the # followed by a number extension in
358 assembler code. */
359 else if (dname->type == CPP_NUMBER && CPP_OPTION (pfile, lang) != CLK_ASM)
360 {
361 dir = &linemarker_dir;
362 if (CPP_PEDANTIC (pfile) && ! CPP_OPTION (pfile, preprocessed)
363 && ! pfile->state.skipping)
364 cpp_error (pfile, CPP_DL_PEDWARN,
365 "style of line directive is a GCC extension");
366 }
回忆directive类型的dtable是根据DIRECTIVE_TABLE来初始化的(参考_cpp_init_directives,相应的hashnode被创建并保存入ident_hash,因此在354行,dname的val.node 就是这个标识符)。
作为GCC的一个扩展,行标志(linemarker)具有形式#linenum filename flags,它被插入输出需要的地方(但不会在一个字符串或字符常量中),表示下面跟着的行来自文件filename从linenum行开始的地方。为了处理它,在系统中有一个全局的directive实例——linemarker_dir(上面361行),它包含了操作行标志的句柄——do_linemarker。注意到在362行,cpp_reader中opts的preprocessed如果非0,表示我们正在查看已经被预处理的代码(或者由选项–fpreprocessed设置来禁止预处理,即便命令行上的文件所具有的后缀,提示它需要被预处理),行标志不能出现,而cpp_reader中opts的pedantic如果非0,表示给出所有ANSI标准所要求的错误消息。
_cpp_handle_directive (continue)
368 if (dir)
369 {
370 /* If we have a directive that is not an opening conditional,
371 invalidate any control macro. */
372 if (! (dir->flags & IF_COND))
373 pfile->mi_valid = false;
374
375 /* Kluge alert. In order to be sure that code like this
376
377 #define HASH #
378 HASH define foo bar
379
380 does not cause '#define foo bar' to get executed when
381 compiled with -save-temps, we recognize directives in
382 -fpreprocessed mode only if the # is in column 1. cppmacro.c
383 puts a space in front of any '#' at the start of a macro. */
384 if (CPP_OPTION (pfile, preprocessed)
385 && (indented || !(dir->flags & IN_I)))
386 {
387 skip = 0;
388 dir = 0;
389 }
390 else
391 {
392 /* In failed conditional groups, all non-conditional
393 directives are ignored. Before doing that, whether
394 skipping or not, we should lex angle-bracketed headers
395 correctly, and maybe output some diagnostics. */
396 pfile->state.angled_headers = dir->flags & INCL;
397 pfile->state.directive_wants_padding = dir->flags & INCL;
398 if (!CPP_OPTION (pfile, preprocessed))
399 directive_diagnostics (pfile, dir, indented);
400 if (pfile->state.skipping && !(dir->flags & COND))
401 dir = 0;
402 }
403 }
404 else if (dname->type == CPP_EOF)
405 ; /* CPP_EOF is the "null directive". */
406 else
407 {
408 /* An unknown directive. Don't complain about it in assembly
409 source: we don't know where the comments are, and # may
410 introduce assembler pseudo-ops. Don't complain about invalid
411 directives in skipped conditional groups (6.10 p4). */
412 if (CPP_OPTION (pfile, lang) == CLK_ASM)
413 skip = 0;
414 else if (!pfile->state.skipping)
415 cpp_error (pfile, CPP_DL_ERROR, "invalid preprocessing directive #%s",
416 cpp_token_as_text (pfile, dname));
417 }
如果找到了指示对象,dir则不是NULL。在预处理中,这个指示要被诊断以发现不正确的使用,并给出错误消息。这个函数相当直观。
297 static void
298 directive_diagnostics (cpp_reader *pfile, const directive *dir, int indented) in cpplib.c
299 {
300 /* Issue -pedantic warnings for extensions. */
301 if (CPP_PEDANTIC (pfile)
302 && ! pfile->state.skipping
303 && dir->origin == EXTENSION)
304 cpp_error (pfile, CPP_DL_PEDWARN, "#%s is a GCC extension", dir->name);
305
306 /* Traditionally, a directive is ignored unless its # is in
307 column 1. Therefore in code intended to work with K+R
308 compilers, directives added by C89 must have their #
309 indented, and directives present in traditional C must not.
310 This is true even of directives in skipped conditional
311 blocks. #elif cannot be used at all. */
312 if (CPP_WTRADITIONAL (pfile))
313 {
314 if (dir == &dtable [T_ELIF])
315 cpp_error (pfile, CPP_DL_WARNING,
316 "suggest not using #elif in traditional C");
317 else if (indented && dir->origin == KANDR)
318 cpp_error (pfile, CPP_DL_WARNING,
319 "traditional C ignores #%s with the # indented",
320 dir->name);
321 else if (!indented && dir->origin != KANDR)
322 cpp_error (pfile, CPP_DL_WARNING,
323 "suggest hiding #%s from traditional C with an indented #",
324 dir->name);
325 }
326 }
如果指示是有效的,它被存入cpp_reader的directive域。通过调用操作句柄,该指示得到执行。对于我们的案例,这是do_define,其核心函数是_cpp_create_definition(参考创建宏定义 – ISO模式一节)。
_cpp_handle_directive (continue)
419 pfile->directive = dir;
420 if (CPP_OPTION (pfile, traditional))
421 prepare_directive_trad (pfile);
422
423 if (dir)
424 pfile->directive->handler (pfile);
425 else if (skip == 0)
426 _cpp_backup_tokens (pfile, 1);
427
428 end_directive (pfile, skip);
429 if (was_parsing_args)
430 {
431 /* Restore state when within macro args. */
432 pfile->state.parsing_args = 2;
433 pfile->state.prevent_expansion = 1;
434 }
435 return skip;
436 }
如果_cpp_handle_directive能处理指示并且能继续下去,它返回一个非0值,否则返回0,这个值通常表示错误的状态或遇到了汇编代码。因此,对于正常的情况,当从上面_cpp_lex_token 721处的_cpp_handle_directive返回后,该函数提前跟在宏定义后的第一个符号。这次这个符号不会再被认为是指示,它将返回给cpp_get_token。
cpp_get_token (continue)
1085 if (pfile->state.in_directive && result->type == CPP_COMMENT)
1086 continue;
1087
1088 if (result->type != CPP_NAME)
1089 break;
1090
1091 node = result->val.node;
1092
1093 if (node->type != NT_MACRO || (result->flags & NO_EXPAND))
1094 break;
1095
1096 if (!(node->flags & NODE_DISABLED))
1097 {
1098 if (!pfile->state.prevent_expansion
1099 && enter_macro_context (pfile, node))
1100 {
1101 if (pfile->state.in_directive)
1102 continue;
1103 return padding_token (pfile, result);
1104 }
1105 }
1106 else
1107 {
1108 /* Flag this token as always unexpandable. FIXME: move this
1109 to collect_args()?. */
1110 cpp_token *t = _cpp_temp_token (pfile);
1111 t->type = result->type;
1112 t->flags = result->flags | NO_EXPAND;
1113 t->val.str = result->val.str;
1114 result = t;
1115 }
1116
1117 break;
1118 }
1119
1120 return result;
1121 }