Field op_stack in cpp_reader aims to handle multiple-include optimization (using #if !defined and #endif to enclose #include directive), it will holds the tokens of the #if or #elseif expression.
cpp_create_reader (continue)
199 /* The expression parser stack. */
200 _cpp_expand_op_stack (pfile);
200
201 /* Initialize the buffer obstack. */
202 _obstack_begin (&pfile->buffer_ob, 0, 0,
203 (void *(*) (long)) xmalloc,
204 (void (*) (void *)) free);
205
206 _cpp_init_files (pfile);
207
208 _cpp_init_hashtable (pfile, table);
209
210 return pfile;
211 }
The stack is initialized and expanded by _cpp_expand_op_stack.
970 struct op *
971 _cpp_expand_op_stack (cpp_reader *pfile) in cppexp.c
972 {
973 size_t old_size = (size_t) (pfile->op_limit - pfile->op_stack);
974 size_t new_size = old_size * 2 + 20;
975
976 pfile->op_stack = xrealloc (pfile->op_stack, new_size * sizeof (struct op));
977 pfile->op_limit = pfile->op_stack + new_size;
978
979 return pfile->op_stack + old_size;
980 }
At first, the size of the stack is 20. And the definition of op is given in below.
31 struct op in cppexp.c
32 {
33 const cpp_token *token; /* The token forming op (for diagnostics). */
34 cpp_num value; /* The value logically "right" of op. */
35 enum cpp_ttype op;
36 };
The token field of op is used for holding diagnostics information for tokens. The value field records the token’s value (if it has) which is key for semantic action. The op field then holds the token type which is also a key for semantic analysis.
Go deeper into op.
165 #define PREV_WHITE (1 << 0) /* If whitespace before this token. */ in cpplib.h
166 #define DIGRAPH (1 << 1) /* If it was a digraph. */
167 #define STRINGIFY_ARG (1 << 2) /* If macro argument to be stringified. */
168 #define PASTE_LEFT (1 << 3) /* If on LHS of a ## operator. */
169 #define NAMED_OP (1 << 4) /* C++ named operators. */
170 #define NO_EXPAND (1 << 5) /* Do not macro-expand this token. */
171 #define BOL (1 << 6) /* Token at beginning of line. */
Above macros are used for the flags field of cpp_token to indicate some characteristics of the tokens. And the definition of cpp_token is as below.
175 struct cpp_token in cpplib.h
176 {
177 fileline line; /* Logical line of first char of token. */
178 unsigned short col; /* Column of first char of token. */
179 ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */
180 unsigned char flags; /* flags - see above */
181
182 union
183 {
184 cpp_hashnode *node; /* An identifier. */
185 const cpp_token *source; /* Inherit padding from this token. */
186 struct cpp_string str; /* A string, or number. */
187 unsigned int arg_no; /* Argument no. for a CPP_MACRO_ARG. */
188 } val;
189 };
The value field of op is defined as below. Notice that cpp_num_part in the struct is the widest integer type in the machine.
4929 struct cpp_num in cpplib.h
4930 {
4931 cpp_num_part high;
4932 cpp_num_part low;
4933 bool unsignedp; /* True if value should be treated as unsigned. */
4934 bool overflow; /* True if the most recent calculation overflowed. */
4935 };
Above at line 179, cpp_ttype is defined as below. Pay attention to the definiotn of OP and TK accompanied.
143 #define OP(e, s) e, in cpplib.h
144 #define TK(e, s) e,
145 enum cpp_ttype
146 {
147 TTYPE_TABLE
148 N_TTYPES
149 };
150 #undef OP
151 #undef TK
The content of TTYPE_TABLE at line 147 is as below, it contains all possible type.
60 #define TTYPE_TABLE / in cpplib.h
61 OP(CPP_EQ = 0, "=") /
62 OP (CPP_NOT, "!") /
63 OP (CPP_GREATER, ">") /* compare */ /
64 OP (CPP_LESS, "<") /
65 OP (CPP_PLUS, "+") /* math */ /
66 OP (CPP_MINUS, "-") /
67 OP (CPP_MULT, "*") /
68 OP (CPP_DIV, "/") /
69 OP (CPP_MOD, "%") /
70 OP (CPP_AND, "&") /* bit ops */ /
71 OP (CPP_OR, "|") /
72 OP (CPP_XOR, "^") /
73 OP (CPP_RSHIFT, ">>") /
74 OP (CPP_LSHIFT, "<<") /
75 OP (CPP_MIN, "<?") /* extension */ /
76 OP (CPP_MAX, ">?") /
77 /
78 OP (CPP_COMPL, "~") /
79 OP (CPP_AND_AND, "&&") /* logical */ /
80 OP (CPP_OR_OR, "||") /
81 OP (CPP_QUERY, "?") /
82 OP (CPP_COLON, ":") /
83 OP (CPP_COMMA, ",") /* grouping */ /
84 OP (CPP_OPEN_PAREN, "(") /
85 OP (CPP_CLOSE_PAREN, ")") /
86 TK(CPP_EOF, SPELL_NONE) /
87 OP (CPP_EQ_EQ, "==") /* compare */ /
88 OP (CPP_NOT_EQ, "!=") /
89 OP (CPP_GREATER_EQ, ">=") /
90 OP (CPP_LESS_EQ, "<=") /
91 /
92 /* These two are unary + / - in preprocessor expressions. */ /
93 OP (CPP_PLUS_EQ, "+=") /* math */ /
94 OP (CPP_MINUS_EQ, "-=") /
95 /
96 OP (CPP_MULT_EQ, "*=") /
97 OP (CPP_DIV_EQ, "/=") /
98 OP (CPP_MOD_EQ, "%=") /
99 OP (CPP_AND_EQ, "&=") /* bit ops */ /
100 OP (CPP_OR_EQ, "|=") /
101 OP (CPP_XOR_EQ, "^=") /
102 OP (CPP_RSHIFT_EQ, ">>=") /
103 OP (CPP_LSHIFT_EQ, "<<=") /
104 OP (CPP_MIN_EQ, "<?=") /* extension */ /
105 OP (CPP_MAX_EQ, ">?=") /
106 /* Digraphs together, beginning with CPP_FIRST_DIGRAPH. */ /
107 OP (CPP_HASH, "#") /* digraphs */ /
108 OP (CPP_PASTE, "##") /
109 OP (CPP_OPEN_SQUARE, "[") /
110 OP (CPP_CLOSE_SQUARE, "]") /
111 OP (CPP_OPEN_BRACE, "{") /
112 OP (CPP_CLOSE_BRACE, "}") /
113 /* The remainder of the punctuation. Order is not significant. */ /
114 OP (CPP_SEMICOLON, ";") /* structure */ /
115 OP (CPP_ELLIPSIS, "...") /
116 OP (CPP_PLUS_PLUS, "++") /* increment */ /
117 OP (CPP_MINUS_MINUS, "--") /
118 OP (CPP_DEREF, "->") /* accessors */ /
119 OP (CPP_DOT, ".") /
120 OP (CPP_SCOPE, "::") /
121 OP (CPP_DEREF_STAR, "->*") /
122 OP (CPP_DOT_STAR, ".*") /
123 OP (CPP_ATSIGN, "@") /* used in Objective-C */ /
124 /
125 TK (CPP_NAME, SPELL_IDENT) /* word */ /
126 TK (CPP_AT_NAME, SPELL_IDENT) /* @word - Objective-C */ /
127 TK (CPP_NUMBER, SPELL_LITERAL) /* 34_be+ta */ /
128 /
129 TK (CPP_CHAR, SPELL_LITERAL) /* 'char' */ /
130 TK (CPP_WCHAR, SPELL_LITERAL) /* L'char' */ /
131 TK (CPP_OTHER, SPELL_LITERAL) /* stray punctuation */ /
132 /
133 TK (CPP_STRING, SPELL_LITERAL) /* "string" */ /
134 TK (CPP_WSTRING, SPELL_LITERAL) /* L"string" */ /
135 TK (CPP_OBJC_STRING, SPELL_LITERAL) /* @"string" - Objective-C */ /
136 TK (CPP_HEADER_NAME, SPELL_LITERAL) /* <stdio.h> in #include */ /
137 /
138 TK (CPP_COMMENT, SPELL_LITERAL) /* Only if output comments. */ /
139 /* SPELL_LITERAL happens to DTRT. */ /
140 TK (CPP_MACRO_ARG, SPELL_NONE) /* Macro argument. */ /
141 TK (CPP_PADDING, SPELL_NONE) /* Whitespace for cpp0. */
In the definition, OP should be the abbreviation of “operator”, and TK should be the abbreviation of “token”.
cpp_reader uses hash tables to manage files under compiling and directories in which header files are searching for. Then, it initializes these hash tables.
930 void
931 _cpp_init_files (cpp_reader *pfile) in cppfiles.c
932 {
933 pfile->file_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,
934 NULL, xcalloc, free);
935 pfile->dir_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,
936 NULL, xcalloc, free);
937 allocate_file_hash_entries (pfile);
938 }
The last step of cpp_create_reader is invoking below _cpp_init_hashtable.
Here, we see that ident_hash is passed as the argument table at the invocation, which has entries for all identifiers: either macros defined by #define commands (type NT_MACRO), assertions created with #assert (NT_ASSERTION), or neither of the above (NT_VOID). Builtin macros like __LINE__ are flagged NODE_BUILTIN. Poisoned identifiers are flagged NODE_POISONED. NODE_OPERATOR (C++ only) indicates an identifier that behaves like an operator such as "xor". NODE_DIAGNOSTIC is for speed in lex token: it indicates a diagnostic may be required for this node. Currently this only applies to __VA_ARGS__ and poisoned identifiers.
47 void
48 _cpp_init_hashtable (cpp_reader *pfile, hash_table *table) in cpphash.c
49 {
50 struct spec_nodes *s;
51
52 if (table == NULL)
53 {
54 pfile->our_hashtable = 1;
55 table = ht_create (13); /* 8K (=2^13) entries. */
56 table->alloc_node = (hashnode (*) (hash_table *)) alloc_node;
57
58 _obstack_begin (&pfile->hash_ob, 0, 0,
59 (void *(*) (long)) xmalloc,
60 (void (*) (void *)) free);
61 }
62
63 table->pfile = pfile;
64 pfile->hash_table = table;
65
66 /* Now we can initialize things that use the hash table. */
67 _cpp_init_directives (pfile);
68 _cpp_init_internal_pragmas (pfile);
69
70 s = &pfile->spec_nodes;
71 s->n_defined = cpp_lookup (pfile, DSC("defined"));
72 s->n_true = cpp_lookup (pfile, DSC ("true"));
73 s->n_false = cpp_lookup (pfile, DSC ("false"));
74 s->n__VA_ARGS__ = cpp_lookup (pfile, DSC ("__VA_ARGS__"));
75 s->n__VA_ARGS__->flags |= NODE_DIAGNOSTIC;
76 }
In C/C++, series directives are defined. _cpp_init_directives ensures hashnode of directives are present in hash_table belonging to cpp_reader.
1983 void
1984 _cpp_init_directives (cpp_reader *pfile) in cpplib.c
1985 {
1986 unsigned int i;
1987 cpp_hashnode *node;
1988
1989 for (i = 0; i < (unsigned int) N_DIRECTIVES; i++)
1990 {
1991 node = cpp_lookup (pfile, dtable[i].name, dtable[i].length);
1992 node->is_directive = 1;
1993 node->directive_index = i;
1994 }
1995 }
At line 1991 above, dtable is initialized according to the content of DIRECTIVE_TABLE in following way.
179 #define D(name, t, origin, flags) / in cpplib.c
180 { do_##name, (const uchar *) #name, /
181 sizeof #name - 1, origin, flags },
182 static const directive dtable[] =
183 {
184 DIRECTIVE_TABLE
185 };
186 #undef D
187 #undef DIRECTIVE_TABLE
The node to record detail of directives has following definition. See handler points to the hanlder.
84 struct directive in cpplib.c
85 {
86 directive_handler handler; /* Function to handle directive. */
87 const uchar *name; /* Name of directive. */
88 unsigned short length; /* Length of name. */
89 unsigned char origin; /* Origin of directive. */
90 unsigned char flags; /* Flags describing this directive. */
91 };
Then in DIRECTIVE_TABLE, it is expanded by the macro of D. D is define at line 179 above, takes first line as example, after expansion, it becomes: do_define, (const unchar*) “define”, sizeof “define” -1, KANDR, IN_I.
143 #define DIRECTIVE_TABLE / in cpplib.c
144 D(define, T_DEFINE = 0, KANDR, IN_I) /* 270554 */ /
145 D(include, T_INCLUDE, KANDR, INCL | EXPAND) /* 52262 */ /
146 D(endif, T_ENDIF, KANDR, COND) /* 45855 */ /
147 D(ifdef, T_IFDEF, KANDR, COND | IF_COND) /* 22000 */ /
148 D(if, T_IF, KANDR, COND | IF_COND | EXPAND) /* 18162 */ /
149 D(else, T_ELSE, KANDR, COND) /* 9863 */ /
150 D(ifndef, T_IFNDEF, KANDR, COND | IF_COND) /* 9675 */ /
151 D(undef, T_UNDEF, KANDR, IN_I) /* 4837 */ /
152 D(line, T_LINE, KANDR, EXPAND) /* 2465 */ /
153 D(elif, T_ELIF, STDC89, COND | EXPAND) /* 610 */ /
154 D(error, T_ERROR, STDC89, 0) /* 475 */ /
155 D(pragma, T_PRAGMA, STDC89, IN_I) /* 195 */ /
156 D(warning, T_WARNING, EXTENSION, 0) /* 22 */ /
157 D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND) /* 19 */ /
158 D(ident, T_IDENT, EXTENSION, IN_I) /* 11 */ /
159 D(import, T_IMPORT, EXTENSION, INCL | EXPAND) /* 0 ObjC */ /
160 D(assert, T_ASSERT, EXTENSION, 0) /* 0 SVR4 */ /
161 D(unassert, T_UNASSERT, EXTENSION, 0) /* 0 SVR4 */ /
162 D(sccs, T_SCCS, EXTENSION, 0) /* 0 SVR4? */
The second column of above lines will be expanded into an enum type at somewhere else in cpplib.c in similar way. In third and forth columns are macros already defined. For macros in thrid column, they indicate the original of the directives, now we have following macros already.
KANDR: directives come from traditional (K&R) C
STDC89: directives come from the 1989 C standard
EXTENSION: directives are extensions
Macros in forth column indicate characteristics of directives, and we have following macros in below.
COND: indicates a conditional
IF_COND: an opening conditional
INCL: means to treat "..." and <...> as q-char and h-char sequences respectively
IN_I: means this directive should be handled even if -fpreprocessed is in effect (these are the directives with callback hooks)
EXPAND: is set on directives that are always macro-expanded
Next, _cpp_init_directives registers the #pragma the preprocessor itself handles. [4] gives detailed explaination.
#pragma GCC dependency
#pragma GCC dependency allows you to check the relative dates of the current file and anther file. If the other file is more recent than the current file, a warning is issued. This is useful if the current file is derived from the other file, and should be regenerated. The other file is searched for using the normal include search path. Optional trailing text can be used to give more information in the warning message.
#pragma GCC dependency “parse.y”
#pragma GCC dependency “/usr/include/time.h” rerun fixincludes
#pragma GCC poison
Sometimes, there is an identifier that you want to remove completely from you program, and make sure that it never creeps back in. To enforce this, you can poison the identifier with this pragma. #pragma GCC poison is followed by a list of identifiers to poison. If any of those identifiers appears anywhere in the source after the directive, it is a hard error. For example,
#pragma GCC poison printf sprint fprintf
Sprint (some_string, “hello”);
will produce an error.
If a poisoned identifier appears as part of the expansion of a macro which was defined before the identifier was poisoned, it will not cause an error. This lets you poison an identifier without worrying about system headers defining macros that use it. For example,
#define strrchr rindex
#pragma GCC poison rindex
strrchr (some_string, ‘h’);
will not produce an error.
#pragma GCC system_header
This pragma takes no arguments. It causes the rest of the code in the current file to be treated as if it came from a system header (all warnings, other than those generated by ‘#warning’, are suppressed while GCC is processing a system header. Macros defined in a system header are immune to a few warnings wherever they are expanded. This immunity is granted on an ad-hoc basis, when we find that a warning generates lots of false positives because of code in macros defined in system headers).
#pragma once is a non-standard but widely supported preprocessor directive designed to cause the current source file to be included only once in a single compilation. http://en.wikipedia.org/wiki/Pragma_once gives more details.
1048 void
1049 _cpp_init_internal_pragmas (cpp_reader *pfile) in cpplib.c
1050 {
1051 /* Pragmas in the global namespace. */
1052 cpp_register_pragma (pfile, 0, "once", do_pragma_once);
1053
1054 /* New GCC-specific pragmas should be put in the GCC namespace. */
1055 cpp_register_pragma (pfile, "GCC", "poison", do_pragma_poison);
1056 cpp_register_pragma (pfile, "GCC", "system_header", do_pragma_system_header);
1057 cpp_register_pragma (pfile, "GCC", "dependency", do_pragma_dependency);
1058 }
GCC define pragma_entry for #pragma.
49 typedef void (*pragma_cb) (cpp_reader *); in cpplib.c
50 struct pragma_entry
51 {
52 struct pragma_entry *next;
53 const cpp_hashnode *pragma; /* Name and length. */
54 int is_nspace;
55 union {
56 pragma_cb handler;
57 struct pragma_entry *space;
58 } u;
59 };
See that for #pragma that takes argument, handler at line 56 will be used, it is a function pointer to offer the functionality of the #pragma; while for #pragma that takes effect upon a range, the space at line 57 will be used to link all other #pragma appear within its space and is_nspace at line 54 will be set too.
The compiler records the #pragma by struct pragma_entry, and all #pragmas are saved in field pragmas in parse_in, then #pragma can be handled as soon as being seen.
1005 void
1006 cpp_register_pragma (cpp_reader *pfile, const char *space, in cpplib.c
1007 const char *name, pragma_cb handler)
1008 {
1009 struct pragma_entry **chain = &pfile->pragmas;
1010 struct pragma_entry *entry;
1011 const cpp_hashnode *node;
1012
1013 if (!handler)
1014 abort ();
1015
1016 if (space)
1017 {
1018 node = cpp_lookup (pfile, U space, strlen (space));
1019 entry = lookup_pragma_entry (*chain, node);
1020 if (!entry)
1021 entry = insert_pragma_entry (pfile, chain, node, NULL);
1022 else if (!entry->is_nspace)
1023 goto clash;
1024 chain = &entry->u.space;
1025 }
1026
1027 /* Check for duplicates. */
1028 node = cpp_lookup (pfile, U name, strlen (name));
1029 entry = lookup_pragma_entry (*chain, node);
1030 if (entry)
1031 {
1032 if (entry->is_nspace)
1033 clash:
1034 cpp_error (pfile, CPP_DL_ICE,
1035 "registering /"%s/" as both a pragma and a pragma namespace",
1036 NODE_NAME (node));
1037 else if (space)
1038 cpp_error (pfile, CPP_DL_ICE, "#pragma %s %s is already registered",
1039 space, name);
1040 else
1041 cpp_error (pfile, CPP_DL_ICE, "#pragma %s is already registered", name);
1042 }
1043 else
1044 insert_pragma_entry (pfile, chain, node, handler);
1045 }
Due to the limit type of #pragma, a null terminated simple list is good enough. Notice that for directive like: #pragma GCC dependency etc., GCC forms a space, dependency, posion, and system_header are within this space. Thus node corresponds to GCC is a branch contining nodes for its content.
965 static struct pragma_entry *
966 lookup_pragma_entry (struct pragma_entry *chain, const cpp_hashnode *pragma) in cpplib.c
967 {
968 while (chain && chain->pragma != pragma)
969 chain = chain->next;
970
971 return chain;
972 }
In insert_pragma_entry, notice that argument pragma is of type cpp_hashnode which is the identifier for the directive in ident_hash table.
977 static struct pragma_entry *
978 insert_pragma_entry (cpp_reader *pfile, struct pragma_entry **chain, in cpplib.c
979 const cpp_hashnode *pragma, pragma_cb handler)
980 {
981 struct pragma_entry *new;
982
983 new = (struct pragma_entry *)
984 _cpp_aligned_alloc (pfile, sizeof (struct pragma_entry));
985 new->pragma = pragma;
986 if (handler)
987 {
988 new->is_nspace = 0;
989 new->u.handler = handler;
990 }
991 else
992 {
993 new->is_nspace = 1;
994 new->u.space = NULL;
995 }
996
997 new->next = *chain;
998 *chain = new;
999 return new;
1000 }
Back _cpp_init_hashtable, spec_nodes of cpp_reader records special identifiers for the langauge. There are “defined”, “true”, “false”, “__VAR_ARGS” nodes, which are unique throughout the system, so we just uses pointer in spec_nodes as below.
247 struct spec_nodes in cpphash.h
248 {
249 cpp_hashnode *n_defined; /* defined operator */
250 cpp_hashnode *n_true; /* C++ keyword true */
251 cpp_hashnode *n_false; /* C++ keyword false */
252 cpp_hashnode *n__VA_ARGS__; /* C99 vararg macros */
253 };
587 #define DSC(str) (const uchar *)str, sizeof str – 1 in cpphash.h
With above DSC definition, unique nodes for “defined”, “true”, “false”, and “__VAR_ARGS” are generated as last step of the function.