回到do_compile,下一个被调用的是lang_dependent_init。
do_compile (continue)
4650 /* Language-dependent initialization. Returns true on success. */
4651 if (lang_dependent_init (main_input_filename))
4652 {
4653 if (flag_unit_at_a_time)
4654 {
4655 open_dump_file (DFI_cgraph, NULL);
4656 cgraph_dump_file = rtl_dump_file;
4657 rtl_dump_file = NULL;
4658 }
注意到参数name指向main_input_filename,该值在handle_options中被设置。
4525 static int
4526 lang_dependent_init (const char *name) in toplev.c
4527 {
4528 if (dump_base_name == 0)
4529 dump_base_name = name ? name : "gccdump";
4530
4531 /* Other front-end initialization. */
4532 if ((*lang_hooks.init) () == 0)
4533 return 0;
对于C++,lang_hooks的init钩子是cxx_init。正如其名,它执行C++所需的初始化工作。
384 bool
385 cxx_init (void) in lex.c
386 {
387 static const enum tree_code stmt_codes[] = {
388 c_common_stmt_codes,
389 cp_stmt_codes
390 };
391
392 INIT_STATEMENT_CODES (stmt_codes);
在上面的388行,c_common_stmt_codes具有如下定义,它表示通用的C语句类型。
1150 #define c_common_stmt_codes / in c-common.h
1151 CLEANUP_STMT, EXPR_STMT, COMPOUND_STMT, /
1152 DECL_STMT, IF_STMT, FOR_STMT, /
1153 WHILE_STMT, DO_STMT, RETURN_STMT, /
1154 BREAK_STMT, CONTINUE_STMT, SCOPE_STMT, /
1155 SWITCH_STMT, GOTO_STMT, LABEL_STMT, /
1156 ASM_STMT, FILE_STMT, CASE_LABEL
而cp_stmt_codes则有如下定义,这是特定于C++的。
895 #define cp_stmt_codes / in cp-tree.h
896 CTOR_INITIALIZER, TRY_BLOCK, HANDLER, /
897 EH_SPEC_BLOCK, USING_STMT, TAG_DEFN
在388行,INIT_STATEMENT_CODES根据可用语句的编码来设置stmt_codes。
1164 #define INIT_STATEMENT_CODES(STMT_CODES) /
1165 do { /
1166 unsigned int i; /
1167 memset (&statement_code_p, 0, sizeof (statement_code_p)); /
1168 for (i = 0; i < ARRAY_SIZE (STMT_CODES); i++) /
1169 statement_code_p[STMT_CODES[i]] = true; /
1170 } while (0)
对于C++,特定的字符串被保留用于为语言携带特定的语义。通常为了使词法分析更方便高效,输入的字符串将首先被检查是否是保留字,而不是使用有限状态机(DFA)来识别关键字。
cxx_init (continue)
394 /* We cannot just assign to input_filename because it has already
395 been initialized and will be used later as an N_BINCL for stabs+
396 debugging. */
397 push_srcloc ("<internal>", 0);
398
399 init_reswords ();
首先push_srcloc在input_file_stack中压入节点“<internal>”。
349 void
350 init_reswords (void) in lex.c
351 {
352 unsigned int i;
353 tree id;
354 int mask = ((flag_no_asm ? D_ASM : 0)
355 | (flag_no_gnu_keywords ? D_EXT : 0));
356
357 ridpointers = ggc_calloc ((int) RID_MAX, sizeof (tree));
358 for (i = 0; i < ARRAY_SIZE (reswords); i++)
359 {
360 id = get_identifier (reswords[i].word);
361 C_RID_CODE (id) = reswords[i].rid;
362 ridpointers [(int) reswords[i].rid] = id;
363 if (! (reswords[i].disable & mask))
364 C_IS_RESERVED_WORD (id) = 1;
365 }
366 }
这里ridpointers是“reserved id pointers”的缩写,它是一个rtx对象数组。reswords的类型是resword。
231 struct resword in lex.c
232 {
233 const char *const word;
234 ENUM_BITFIELD(rid) const rid : 16;
235 const unsigned int disable : 16;
236 };
用于C++的保留字定义如下:
245 static const struct resword reswords[] = in lex.c
246 {
247 { "_Complex", RID_COMPLEX, 0 },
248 { "__FUNCTION__", RID_FUNCTION_NAME, 0 },
249 { "__PRETTY_FUNCTION__", RID_PRETTY_FUNCTION_NAME, 0 },
250 { "__alignof", RID_ALIGNOF, 0 },
251 { "__alignof__", RID_ALIGNOF, 0 },
252 { "__asm", RID_ASM, 0 },
253 { "__asm__", RID_ASM, 0 },
254 { "__attribute", RID_ATTRIBUTE, 0 },
255 { "__attribute__", RID_ATTRIBUTE, 0 },
256 { "__builtin_va_arg", RID_VA_ARG, 0 },
257 { "__complex", RID_COMPLEX, 0 },
258 { "__complex__", RID_COMPLEX, 0 },
259 { "__const", RID_CONST, 0 },
260 { "__const__", RID_CONST, 0 },
261 { "__extension__", RID_EXTENSION, 0 },
262 { "__func__", RID_C99_FUNCTION_NAME, 0 },
263 { "__imag", RID_IMAGPART, 0 },
264 { "__imag__", RID_IMAGPART, 0 },
265 { "__inline", RID_INLINE, 0 },
266 { "__inline__", RID_INLINE, 0 },
267 { "__label__", RID_LABEL, 0 },
268 { "__null", RID_NULL, 0 },
269 { "__offsetof", RID_OFFSETOF, 0 },
270 { "__offsetof__", RID_OFFSETOF, 0 },
271 { "__real", RID_REALPART, 0 },
272 { "__real__", RID_REALPART, 0 },
273 { "__restrict", RID_RESTRICT, 0 },
274 { "__restrict__", RID_RESTRICT, 0 },
275 { "__signed", RID_SIGNED, 0 },
276 { "__signed__", RID_SIGNED, 0 },
277 { "__thread", RID_THREAD, 0 },
278 { "__typeof", RID_TYPEOF, 0 },
279 { "__typeof__", RID_TYPEOF, 0 },
280 { "__volatile", RID_VOLATILE, 0 },
281 { "__volatile__", RID_VOLATILE, 0 },
282 { "asm", RID_ASM, D_ASM },
283 { "auto", RID_AUTO, 0 },
284 { "bool", RID_BOOL, 0 },
285 { "break", RID_BREAK, 0 },
286 { "case", RID_CASE, 0 },
287 { "catch", RID_CATCH, 0 },
288 { "char", RID_CHAR, 0 },
289 { "class", RID_CLASS, 0 },
290 { "const", RID_CONST, 0 },
291 { "const_cast", RID_CONSTCAST, 0 },
292 { "continue", RID_CONTINUE, 0 },
293 { "default", RID_DEFAULT, 0 },
294 { "delete", RID_DELETE, 0 },
295 { "do", RID_DO, 0 },
296 { "double", RID_DOUBLE, 0 },
297 { "dynamic_cast", RID_DYNCAST, 0 },
298 { "else", RID_ELSE, 0 },
299 { "enum", RID_ENUM, 0 },
300 { "explicit", RID_EXPLICIT, 0 },
301 { "export", RID_EXPORT, 0 },
302 { "extern", RID_EXTERN, 0 },
303 { "false", RID_FALSE, 0 },
304 { "float", RID_FLOAT, 0 },
305 { "for", RID_FOR, 0 },
306 { "friend", RID_FRIEND, 0 },
307 { "goto", RID_GOTO, 0 },
308 { "if", RID_IF, 0 },
309 { "inline", RID_INLINE, 0 },
310 { "int", RID_INT, 0 },
311 { "long", RID_LONG, 0 },
312 { "mutable", RID_MUTABLE, 0 },
313 { "namespace", RID_NAMESPACE, 0 },
314 { "new", RID_NEW, 0 },
315 { "operator", RID_OPERATOR, 0 },
316 { "private", RID_PRIVATE, 0 },
317 { "protected", RID_PROTECTED, 0 },
318 { "public", RID_PUBLIC, 0 },
319 { "register", RID_REGISTER, 0 },
320 { "reinterpret_cast", RID_REINTCAST, 0 },
321 { "return", RID_RETURN, 0 },
322 { "short", RID_SHORT, 0 },
323 { "signed", RID_SIGNED, 0 },
324 { "sizeof", RID_SIZEOF, 0 },
325 { "static", RID_STATIC, 0 },
326 { "static_cast", RID_STATCAST, 0 },
327 { "struct", RID_STRUCT, 0 },
328 { "switch", RID_SWITCH, 0 },
329 { "template", RID_TEMPLATE, 0 },
330 { "this", RID_THIS, 0 },
331 { "throw", RID_THROW, 0 },
332 { "true", RID_TRUE, 0 },
333 { "try", RID_TRY, 0 },
334 { "typedef", RID_TYPEDEF, 0 },
335 { "typename", RID_TYPENAME, 0 },
336 { "typeid", RID_TYPEID, 0 },
337 { "typeof", RID_TYPEOF, D_ASM|D_EXT },
338 { "union", RID_UNION, 0 },
339 { "unsigned", RID_UNSIGNED, 0 },
340 { "using", RID_USING, 0 },
341 { "virtual", RID_VIRTUAL, 0 },
342 { "void", RID_VOID, 0 },
343 { "volatile", RID_VOLATILE, 0 },
344 { "wchar_t", RID_WCHAR, 0 },
345 { "while", RID_WHILE, 0 },
346
347 };
注意282和337行,这些保留字是GNU的扩展。并留意在init_reswords的354和363行,如何确定这些字符串是否被用作保留字。另上面的C_IS_RESERVED_WORD访问tree_common中的lang_flag_5域。
而在init_reswords的360行,get_identifier将识别符插入ident_hash,这个识别符与相应的ridpointers绑定。
100 tree
101 get_identifier (const char *text) in stringpool.c
102 {
103 hashnode ht_node = ht_lookup (ident_hash,
104 (const unsigned char *) text,
105 strlen (text), HT_ALLOC);
106
107 /* ht_node can't be NULL here. */
108 return HT_IDENT_TO_GCC_IDENT (ht_node);
109 }
在前面的章节已经看到,在系统中,前端树节点都会被保存在哈希表ident_hash内,然而对于tree_list类型的树节点(从父节点通过TREE_CHAIN访问),这个哈希表不够好,因为tree_list是串起来的树节点串。它需要另一个哈希表。这就是list_hash_table,它具有以下的定义。
649 static GTY ((param_is (union tree_node))) htab_t list_hash_table; in cp/tree.c
cxx_init (continue)
400 init_tree ();
init_tree就是初始化这个哈希表之处。
2216 void
2217 init_tree (void) in cp/tree.c
2218 {
2219 list_hash_table = htab_create_ggc (31, list_hash, list_hash_eq, NULL);
2220 }
前端为C++语义提供了钩子lang_expand_stmt,在此init_cp_semantics把该钩子与特定的函数绑定。
cxx_init (continue)
401 init_cp_semantics ();
3088 void
3089 init_cp_semantics (void) in cp/semantics.c
3090 {
3091 lang_expand_stmt = cp_expand_stmt;
3092 }
C++将某些保留字用作操作符,同时一些字符没有出现在保留字中(即,+,-等)。因此需要为这些特殊字符串收集数据。
cxx_init (continue)
402 init_operators ();
166 static void
167 init_operators (void) in cp/lex.c
168 {
169 tree identifier;
170 char buffer[256];
171 struct operator_name_info_t *oni;
172
173 #define DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, ASSN_P) /
174 sprintf (buffer, ISALPHA (NAME[0]) ? "operator %s" : "operator%s", NAME); /
175 identifier = get_identifier (buffer); /
176 IDENTIFIER_OPNAME_P (identifier) = 1; /
177 /
178 oni = (ASSN_P /
179 ? &assignment_operator_name_info[(int) CODE] /
180 : &operator_name_info[(int) CODE]); /
181 oni->identifier = identifier; /
182 oni->name = NAME; /
183 oni->mangled_name = MANGLING; /
184 oni->arity = ARITY;
185
186 #include "operators.def"
187 #undef DEF_OPERATOR
assignment_operator_name_info及operator_name_info提供了操作符名及其信息见的映射。它们具有以下的operator_name_info_t类型。
3509 typedef struct operator_name_info_t GTY(()) in cp-tree.h
3510 {
3511 /* The IDENTIFIER_NODE for the operator. */
3512 tree identifier;
3513 /* The name of the operator. */
3514 const char *name;
3515 /* The mangled name of the operator. */
3516 const char *mangled_name;
3517 /* The arity of the operator. */
3518 int arity;
3519 } operator_name_info_t;
C++的操作符定义在一个名为operators.def的文件里,其中我们可以看到以下的代码片段。注意到定义在173行的DEF_OPERATOR将在文件内被展开,因为这个文件在186行被包含。
70 #define DEF_SIMPLE_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def
71 DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, 0)
77 #define DEF_ASSN_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def
78 DEF_OPERATOR (NAME, CODE, MANGLING, ARITY, 1)
81 DEF_SIMPLE_OPERATOR ("new", NEW_EXPR, "nw", -1)
139 DEF_ASSN_OPERATOR ("=", NOP_EXPR, "aS", 2)
上面参数的含义显示如下:
NAME:C字符串形式的操作符名字,但不包含开头的`operator' 部分。这是在源程序中将给出的名字。例如,对于`operator +',这个将是`+'。
CODE:这个操作符的tree_code。例如,对于`operator +',这将是PLUS_EXPR。
MANGLING:在新ABI下,C字符串形式的操作符的修饰前缀。例如,对于`operator +',这将是"pl"。
ARITY:操作符的元数,或者-1如果允许任意元数(如`operator ()')。操作符++(postincrement)及--(postdecrement)被视为2元的。
ASSN_P:布尔值。如果非0,这是个赋值操作符。
init_operators (continue)
189 operator_name_info[(int) ERROR_MARK].identifier
190 = get_identifier ("<invalid operator>");
191
192 /* Handle some special cases. These operators are not defined in
193 the language, but can be produced internally. We may need them
194 for error-reporting. (Eventually, we should ensure that this
195 does not happen. Error messages involving these operators will
196 be confusing to users.) */
197
198 operator_name_info [(int) INIT_EXPR].name
199 = operator_name_info [(int) MODIFY_EXPR].name;
200 operator_name_info [(int) EXACT_DIV_EXPR].name = "(ceiling /)";
201 operator_name_info [(int) CEIL_DIV_EXPR].name = "(ceiling /)";
202 operator_name_info [(int) FLOOR_DIV_EXPR].name = "(floor /)";
203 operator_name_info [(int) ROUND_DIV_EXPR].name = "(round /)";
204 operator_name_info [(int) CEIL_MOD_EXPR].name = "(ceiling %)";
205 operator_name_info [(int) FLOOR_MOD_EXPR].name = "(floor %)";
206 operator_name_info [(int) ROUND_MOD_EXPR].name = "(round %)";
207 operator_name_info [(int) ABS_EXPR].name = "abs";
208 operator_name_info [(int) TRUTH_AND_EXPR].name = "strict &&";
209 operator_name_info [(int) TRUTH_OR_EXPR].name = "strict ||";
210 operator_name_info [(int) IN_EXPR].name = "in";
211 operator_name_info [(int) RANGE_EXPR].name = "...";
212 operator_name_info [(int) CONVERT_EXPR].name = "+";
213
214 assignment_operator_name_info [(int) EXACT_DIV_EXPR].name
215 = "(exact /=)";
216 assignment_operator_name_info [(int) CEIL_DIV_EXPR].name
217 = "(ceiling /=)";
218 assignment_operator_name_info [(int) FLOOR_DIV_EXPR].name
219 = "(floor /=)";
220 assignment_operator_name_info [(int) ROUND_DIV_EXPR].name
221 = "(round /=)";
222 assignment_operator_name_info [(int) CEIL_MOD_EXPR].name
223 = "(ceiling %=)";
224 assignment_operator_name_info [(int) FLOOR_MOD_EXPR].name
225 = "(floor %=)";
226 assignment_operator_name_info [(int) ROUND_MOD_EXPR].name
227 = "(round %=)";
228 }
在余下的代码中,正如注释所解释,这些操作符在系统内部生成,并且与目标语言无关,这种情形下,仅name域被设置。
在C++编译器内部,标识符名都是经过修饰的,这样才可能使用同时使用同名的变量,类定义,函数声明,等等。C++标准并没有规定修饰的做法。不过各编译器的实现均大同小异。这里,首先需要初始化相应的机制。另外,下面的current_function_decl总是指向当前被编译的函数,而class_type_node则是编译器用来标记类节点的特殊记号。
cxx_init (continue)
403 init_method ();
404 init_error ();
405
406 current_function_decl = NULL;
407
408 class_type_node = ridpointers[(int) RID_CLASS];
71 void
72 init_method (void) in method.c
73 {
74 init_mangle ();
75 }
GNU的C++修饰名规则中,替代命名规则是比较特殊的。例如:
template <class A> class T {...};
class X {...}; class Y {...};
T<X> t1; T<Y> t2;
对于具现的T模板类t1、t2,这是2个被视为完全不同的类,其修饰名应该有所反映。在GNU C++编译器中,这通过替代命名规则来生成不同的修饰名:首先类X和Y,根据其声明/定义出现的次序,予以编号;在T<X>中的X部分,由“S”+“X序号”+“_”来命名。这需要记录所有的类名字,为此,编译器定义了如下数据结构:
95 static struct globals in mangle.c
96 {
97 /* The name in which we're building the mangled name. */
98 struct obstack name_obstack;
99
100 /* An array of the current substitution candidates, in the order
101 we've seen them. */
102 varray_type substitutions;
103
104 /* The entity that is being mangled. */
105 tree entity;
106
107 /* True if the mangling will be different in a future version of the
108 ABI. */
109 bool need_abi_warning;
110 } G;
102行的substitutions就是保持替代对象的数组,其索引用作其序号。而name_obstack则是用于分配、保持修饰名的内存块。
2417 void
2418 init_mangle (void) in mangle.c
2419 {
2420 gcc_obstack_init (&G.name_obstack);
2421
2422 /* Cache these identifiers for quick comparison when checking for
2423 standard substitutions. */
2424 subst_identifiers[SUBID_ALLOCATOR] = get_identifier ("allocator");
2425 subst_identifiers[SUBID_BASIC_STRING] = get_identifier ("basic_string");
2426 subst_identifiers[SUBID_CHAR_TRAITS] = get_identifier ("char_traits");
2427 subst_identifiers[SUBID_BASIC_ISTREAM] = get_identifier ("basic_istream");
2428 subst_identifiers[SUBID_BASIC_OSTREAM] = get_identifier ("basic_ostream");
2429 subst_identifiers[SUBID_BASIC_IOSTREAM] = get_identifier ("basic_iostream");
2430 }
在2420行,由gcc_obstack_init创建这个name_obstack对象。
33 #define gcc_obstack_init(OBSTACK) / in defaults.h
34 _obstack_begin ((OBSTACK), OBSTACK_CHUNK_SIZE, 0, /
35 obstack_chunk_alloc, /
36 obstack_chunk_free)
以下则是替代规则中的特例,为此需要创建subst_identifiers对象。这个规则可以大大缩短修饰名的长度。
:std = St
::std::allocator = Sa
::std::basic_string = Sb
::std::basic_string<char, ::std::char_traits<char>, ::std::allocator<char> > = Ss
::std::basic_istream<char, ::std::char_traits<char> > = Si
::std::basic_ostream<char, ::std::char_traits<char> > = So
::std::basic_iostream<char, ::std::char_traits<char> > = Sd