Back to do_compile, next calls lang_dependent_init.
do_compile (continue)
4650 /* Language-dependent initialization. Returns true on success. */
4651 if (lang_dependent_init (main_input_filename))
4652 {
4653 if (flag_unit_at_a_time)
4654 {
4655 open_dump_file (DFI_cgraph, NULL);
4656 cgraph_dump_file = rtl_dump_file;
4657 rtl_dump_file = NULL;
4658 }
Notice that here parameter name in the function refers to main_input_filename which is set in handle_options.
4525 static int
4526 lang_dependent_init (const char *name) in toplev.c
4527 {
4528 if (dump_base_name == 0)
4529 dump_base_name = name ? name : "gccdump";
4530
4531 /* Other front-end initialization. */
4532 if ((*lang_hooks.init) () == 0)
4533 return 0;
For C++, the init hook of lang_hooks is cxx_init. As its name, it does initialization of C++.
384 bool
385 cxx_init (void) in lex.c
386 {
387 static const enum tree_code stmt_codes[] = {
388 c_common_stmt_codes,
389 cp_stmt_codes
390 };
391
392 INIT_STATEMENT_CODES (stmt_codes);
Above, line 388, c_common_stmt_codes has following definition, which is commonly used C statement type.
1150 #define c_common_stmt_codes / in c-common.h
1151 CLEANUP_STMT, EXPR_STMT, COMPOUND_STMT, /
1152 DECL_STMT, IF_STMT, FOR_STMT, /
1153 WHILE_STMT, DO_STMT, RETURN_STMT, /
1154 BREAK_STMT, CONTINUE_STMT, SCOPE_STMT, /
1155 SWITCH_STMT, GOTO_STMT, LABEL_STMT, /
1156 ASM_STMT, FILE_STMT, CASE_LABEL
And cp_stmt_codes has following definition, which is specific for C++.
895 #define cp_stmt_codes / in cp-tree.h
896 CTOR_INITIALIZER, TRY_BLOCK, HANDLER, /
897 EH_SPEC_BLOCK, USING_STMT, TAG_DEFN
At line 388, INIT_STATEMENT_CODES sets stmt_codes according to the code of available statements.
1164 #define INIT_STATEMENT_CODES(STMT_CODES) /
1165 do { /
1166 unsigned int i; /
1167 memset (&statement_code_p, 0, sizeof (statement_code_p)); /
1168 for (i = 0; i < ARRAY_SIZE (STMT_CODES); i++) /
1169 statement_code_p[STMT_CODES[i]] = true; /
1170 } while (0)
For C++, certain literal strings will be reserved for carrying special semantics meaning for the language. Normally to make lexical analysis easier and in efficient, input string will be first checked if it is reserved words or not, instead of using DFA to recognize the reserved words.
cxx_init (continue)
394 /* We cannot just assign to input_filename because it has already
395 been initialized and will be used later as an N_BINCL for stabs+
396 debugging. */
397 push_srcloc ("<internal>", 0);
398
399 init_reswords ();
First push_srcloc pushed node of “<internal>” into input_file_stack.
349 void
350 init_reswords (void) in lex.c
351 {
352 unsigned int i;
353 tree id;
354 int mask = ((flag_no_asm ? D_ASM : 0)
355 | (flag_no_gnu_keywords ? D_EXT : 0));
356
357 ridpointers = ggc_calloc ((int) RID_MAX, sizeof (tree));
358 for (i = 0; i < ARRAY_SIZE (reswords); i++)
359 {
360 id = get_identifier (reswords[i].word);
361 C_RID_CODE (id) = reswords[i].rid;
362 ridpointers [(int) reswords[i].rid] = id;
363 if (! (reswords[i].disable & mask))
364 C_IS_RESERVED_WORD (id) = 1;
365 }
366 }
Here ridpointers is abbreviation of “reserved id pointers”, it is an array of rtx objects. reswords is of type resword.
231 struct resword in lex.c
232 {
233 const char *const word;
234 ENUM_BITFIELD(rid) const rid : 16;
235 const unsigned int disable : 16;
236 };
The reserved words for C++ are defined as following:
245 static const struct resword reswords[] = in lex.c
246 {
247 { "_Complex", RID_COMPLEX, 0 },
248 { "__FUNCTION__", RID_FUNCTION_NAME, 0 },
249 { "__PRETTY_FUNCTION__", RID_PRETTY_FUNCTION_NAME, 0 },
250 { "__alignof", RID_ALIGNOF, 0 },
251 { "__alignof__", RID_ALIGNOF, 0 },
252 { "__asm", RID_ASM, 0 },
253 { "__asm__", RID_ASM, 0 },
254 { "__attribute", RID_ATTRIBUTE, 0 },
255 { "__attribute__", RID_ATTRIBUTE, 0 },
256 { "__builtin_va_arg", RID_VA_ARG, 0 },
257 { "__complex", RID_COMPLEX, 0 },
258 { "__complex__", RID_COMPLEX, 0 },
259 { "__const", RID_CONST, 0 },
260 { "__const__", RID_CONST, 0 },
261 { "__extension__", RID_EXTENSION, 0 },
262 { "__func__", RID_C99_FUNCTION_NAME, 0 },
263 { "__imag", RID_IMAGPART, 0 },
264 { "__imag__", RID_IMAGPART, 0 },
265 { "__inline", RID_INLINE, 0 },
266 { "__inline__", RID_INLINE, 0 },
267 { "__label__", RID_LABEL, 0 },
268 { "__null", RID_NULL, 0 },
269 { "__offsetof", RID_OFFSETOF, 0 },
270 { "__offsetof__", RID_OFFSETOF, 0 },
271 { "__real", RID_REALPART, 0 },
272 { "__real__", RID_REALPART, 0 },
273 { "__restrict", RID_RESTRICT, 0 },
274 { "__restrict__", RID_RESTRICT, 0 },
275 { "__signed", RID_SIGNED, 0 },
276 { "__signed__", RID_SIGNED, 0 },
277 { "__thread", RID_THREAD, 0 },
278 { "__typeof", RID_TYPEOF, 0 },
279 { "__typeof__", RID_TYPEOF, 0 },
280 { "__volatile", RID_VOLATILE, 0 },
281 { "__volatile__", RID_VOLATILE, 0 },
282 { "asm", RID_ASM, D_ASM },
283 { "auto", RID_AUTO, 0 },
284 { "bool", RID_BOOL, 0 },
285 { "break", RID_BREAK, 0 },
286 { "case", RID_CASE, 0 },
287 { "catch", RID_CATCH, 0 },
288 { "char", RID_CHAR, 0 },
289 { "class", RID_CLASS, 0 },
290 { "const", RID_CONST, 0 },
291 { "const_cast", RID_CONSTCAST, 0 },
292 { "continue", RID_CONTINUE, 0 },
293 { "default", RID_DEFAULT, 0 },
294 { "delete", RID_DELETE, 0 },
295 { "do", RID_DO, 0 },
296 { "double", RID_DOUBLE, 0 },
297 { "dynamic_cast", RID_DYNCAST, 0 },
298 { "else", RID_ELSE, 0 },
299 { "enum", RID_ENUM, 0 },
300 { "explicit", RID_EXPLICIT, 0 },
301 { "export", RID_EXPORT, 0 },
302 { "extern", RID_EXTERN, 0 },
303 { "false", RID_FALSE, 0 },
304 { "float", RID_FLOAT, 0 },
305 { "for", RID_FOR, 0 },
306 { "friend", RID_FRIEND, 0 },
307 { "goto", RID_GOTO, 0 },
308 { "if", RID_IF, 0 },
309 { "inline", RID_INLINE, 0 },
310 { "int", RID_INT, 0 },
311 { "long", RID_LONG, 0 },
312 { "mutable", RID_MUTABLE, 0 },
313 { "namespace", RID_NAMESPACE, 0 },
314 { "new", RID_NEW, 0 },
315 { "operator", RID_OPERATOR, 0 },
316 { "private", RID_PRIVATE, 0 },
317 { "protected", RID_PROTECTED, 0 },
318 { "public", RID_PUBLIC, 0 },
319 { "register", RID_REGISTER, 0 },
320 { "reinterpret_cast", RID_REINTCAST, 0 },
321 { "return", RID_RETURN, 0 },
322 { "short", RID_SHORT, 0 },
323 { "signed", RID_SIGNED, 0 },
324 { "sizeof", RID_SIZEOF, 0 },
325 { "static", RID_STATIC, 0 },
326 { "static_cast", RID_STATCAST, 0 },
327 { "struct", RID_STRUCT, 0 },
328 { "switch", RID_SWITCH, 0 },
329 { "template", RID_TEMPLATE, 0 },
330 { "this", RID_THIS, 0 },
331 { "throw", RID_THROW, 0 },
332 { "true", RID_TRUE, 0 },
333 { "try", RID_TRY, 0 },
334 { "typedef", RID_TYPEDEF, 0 },
335 { "typename", RID_TYPENAME, 0 },
336 { "typeid", RID_TYPEID, 0 },
337 { "typeof", RID_TYPEOF, D_ASM|D_EXT },
338 { "union", RID_UNION, 0 },
339 { "unsigned", RID_UNSIGNED, 0 },
340 { "using", RID_USING, 0 },
341 { "virtual", RID_VIRTUAL, 0 },
342 { "void", RID_VOID, 0 },
343 { "volatile", RID_VOLATILE, 0 },
344 { "wchar_t", RID_WCHAR, 0 },
345 { "while", RID_WHILE, 0 },
346
347 };
Pay attention to line 282 and 337, these reserved words are GNU extension. And see at line 354 and 363 in init_reswords, how to determine whether these strings should be used as reserved words or not. Above C_IS_RESERVED_WORD accesses field of lang_flag_5 in tree_common.
Above at line 360 in init_reswords, get_identifier will insert identifer into ident_hash, which is bound to corresponding ridpointers.
100 tree
101 get_identifier (const char *text) in stringpool.c
102 {
103 hashnode ht_node = ht_lookup (ident_hash,
104 (const unsigned char *) text,
105 strlen (text), HT_ALLOC);
106
107 /* ht_node can't be NULL here. */
108 return HT_IDENT_TO_GCC_IDENT (ht_node);
109 }
We have seen in before section, in the system, tree node will be kept within hashtable of ident_hash, however for node of tree_list (accessed by TREE_CHAIN from parent node), that hashtable is not good enough, because tree_list is a list of tree nodes chained tegother. It needs another hashtable. That is list_hash_table, which has definition as below.
649 static GTY ((param_is (union tree_node))) htab_t list_hash_table; in cp/tree.c
cxx_init (continue)
400 init_tree ();
init_tree is the place to create this hashtable.
2216 void
2217 init_tree (void) in cp/tree.c
2218 {
2219 list_hash_table = htab_create_ggc (31, list_hash, list_hash_eq, NULL);
2220 }
The front-end provides a language hook (lang_expand_stmt) for C++ semantics, here init_cp_semantics binds this hook with the specified function.
cxx_init (continue)
401 init_cp_semantics ();
3088 void
3089 init_cp_semantics (void) in cp/semantics.c
3090 {
3091 lang_expand_stmt = cp_expand_stmt;
3092 }
Some reserved words, C++ uses them as operators, also some characters don’t appear in reserved words (i.e. +, - etc.). It needs collect these special literal strings with specifically data.
cxx_init (continue)
402 init_operators ();
166 static void
167 init_operators (void) in cp/lex.c
168 {
169 tree identifier;
170 char buffer[256];
171 struct operator_name_info_t *oni;
172
173 #define DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, ASSN_P) /
174 sprintf (buffer, ISALPHA (NAME[0]) ? "operator %s" : "operator%s", NAME); /
175 identifier = get_identifier (buffer); /
176 IDENTIFIER_OPNAME_P (identifier) = 1; /
177 /
178 oni = (ASSN_P /
179 ? &assignment_operator_name_info[(int) CODE] /
180 : &operator_name_info[(int) CODE]); /
181 oni->identifier = identifier; /
182 oni->name = NAME; /
183 oni->mangled_name = MANGLING; /
184 oni->arity = ARITY;
185
186 #include "operators.def"
187 #undef DEF_OPERATOR
assignment_operator_name_info, and operator_name_info offer mapping between operator names and their information. They are type of operator_name_info_t as following.
3509 typedef struct operator_name_info_t GTY(()) in cp-tree.h
3510 {
3511 /* The IDENTIFIER_NODE for the operator. */
3512 tree identifier;
3513 /* The name of the operator. */
3514 const char *name;
3515 /* The mangled name of the operator. */
3516 const char *mangled_name;
3517 /* The arity of the operator. */
3518 int arity;
3519 } operator_name_info_t;
C++ has its operator defined within a file named operators.def, in which we can find following code snippet. Notice that DEF_OPERATOR defined at line 173 will be expanded in the file as the file is included at line 186.
70 #define DEF_SIMPLE_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def
71 DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, 0)
77 #define DEF_ASSN_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def
78 DEF_OPERATOR (NAME, CODE, MANGLING, ARITY, 1)
81 DEF_SIMPLE_OPERATOR ("new", NEW_EXPR, "nw", -1)
139 DEF_ASSN_OPERATOR ("=", NOP_EXPR, "aS", 2)
In above, the meaning of parameters are given in below:
NAME: The name of the operator, as a C string, but without the preceding `operator'. This is the name that would be given in the source program. For `operator +', for example, this would be `+'.
CODE: The tree_code for this operator. For `operator +', for example, this would be PLUS_EXPR.
MANGLING: The mangling prefix for the operator, as a C string, and as mangled under the new ABI. For `operator +', for example, this would be "pl".
ARITY: The arity of the operator, or -1 if any arity is allowed. (As for `operator ()') Postincrement and postdecrement operators are marked as binary.
ASSN_P: A boolean value. If nonzero, this is an assignment operator.
init_operators (continue)
189 operator_name_info[(int) ERROR_MARK].identifier
190 = get_identifier ("<invalid operator>");
191
192 /* Handle some special cases. These operators are not defined in
193 the language, but can be produced internally. We may need them
194 for error-reporting. (Eventually, we should ensure that this
195 does not happen. Error messages involving these operators will
196 be confusing to users.) */
197
198 operator_name_info [(int) INIT_EXPR].name
199 = operator_name_info [(int) MODIFY_EXPR].name;
200 operator_name_info [(int) EXACT_DIV_EXPR].name = "(ceiling /)";
201 operator_name_info [(int) CEIL_DIV_EXPR].name = "(ceiling /)";
202 operator_name_info [(int) FLOOR_DIV_EXPR].name = "(floor /)";
203 operator_name_info [(int) ROUND_DIV_EXPR].name = "(round /)";
204 operator_name_info [(int) CEIL_MOD_EXPR].name = "(ceiling %)";
205 operator_name_info [(int) FLOOR_MOD_EXPR].name = "(floor %)";
206 operator_name_info [(int) ROUND_MOD_EXPR].name = "(round %)";
207 operator_name_info [(int) ABS_EXPR].name = "abs";
208 operator_name_info [(int) TRUTH_AND_EXPR].name = "strict &&";
209 operator_name_info [(int) TRUTH_OR_EXPR].name = "strict ||";
210 operator_name_info [(int) IN_EXPR].name = "in";
211 operator_name_info [(int) RANGE_EXPR].name = "...";
212 operator_name_info [(int) CONVERT_EXPR].name = "+";
213
214 assignment_operator_name_info [(int) EXACT_DIV_EXPR].name
215 = "(exact /=)";
216 assignment_operator_name_info [(int) CEIL_DIV_EXPR].name
217 = "(ceiling /=)";
218 assignment_operator_name_info [(int) FLOOR_DIV_EXPR].name
219 = "(floor /=)";
220 assignment_operator_name_info [(int) ROUND_DIV_EXPR].name
221 = "(round /=)";
222 assignment_operator_name_info [(int) CEIL_MOD_EXPR].name
223 = "(ceiling %=)";
224 assignment_operator_name_info [(int) FLOOR_MOD_EXPR].name
225 = "(floor %=)";
226 assignment_operator_name_info [(int) ROUND_MOD_EXPR].name
227 = "(round %=)";
228 }
In rest code as the comment explains, these operators are produced internally in the system and independent upon language, see that only name field is set for the case.
In the C++ compiler, identifiers are all mangled. And in this way you can use variable, class definition, function declaration, etc. of the same name simultaneously. At here, it first needs initialize the facility. Besides, current_function_decl below always refers to the function currently under compiling, and class_type_node is the mark used by compiler to mark tree node of class.
cxx_init (continue)
403 init_method ();
404 init_error ();
405
406 current_function_decl = NULL;
407
408 class_type_node = ridpointers[(int) RID_CLASS];
71 void
72 init_method (void) in method.c
73 {
74 init_mangle ();
75 }
Among the rule of manglin of GNU C++, the rule for subsititution is a special one. For instance:
template <class A> class T {...};
class X {...}; class Y {...};
T<X> t1; T<Y> t2;
The class template T instantiation t1, t2 are regarded as different types. Their mangled names should reflect this fact. In GNU C++ compiler, it is done by rule of subsitution to generate distincted mangled name: first class X and Y are assigned sequence number according to their appearance in the source code; then the X part of T<X>, is named by “S” + “X squence No” + “_”. It needs record names of all user-defined types; for the purpose, the compiler defines following data structure.
95 static struct globals in mangle.c
96 {
97 /* The name in which we're building the mangled name. */
98 struct obstack name_obstack;
99
100 /* An array of the current substitution candidates, in the order
101 we've seen them. */
102 varray_type substitutions;
103
104 /* The entity that is being mangled. */
105 tree entity;
106
107 /* True if the mangling will be different in a future version of the
108 ABI. */
109 bool need_abi_warning;
110 } G;
Substitutions at line 102 is the array for subsitituing entities, whose index is used as sequence number. And name_obstack is the memory allocated and holds mangled names.
2417 void
2418 init_mangle (void) in mangle.c
2419 {
2420 gcc_obstack_init (&G.name_obstack);
2421
2422 /* Cache these identifiers for quick comparison when checking for
2423 standard substitutions. */
2424 subst_identifiers[SUBID_ALLOCATOR] = get_identifier ("allocator");
2425 subst_identifiers[SUBID_BASIC_STRING] = get_identifier ("basic_string");
2426 subst_identifiers[SUBID_CHAR_TRAITS] = get_identifier ("char_traits");
2427 subst_identifiers[SUBID_BASIC_ISTREAM] = get_identifier ("basic_istream");
2428 subst_identifiers[SUBID_BASIC_OSTREAM] = get_identifier ("basic_ostream");
2429 subst_identifiers[SUBID_BASIC_IOSTREAM] = get_identifier ("basic_iostream");
2430 }
At line 2420, gcc_obstack_init creates the instance for name_obstack.
33 #define gcc_obstack_init(OBSTACK) / in defaults.h
34 _obstack_begin ((OBSTACK), OBSTACK_CHUNK_SIZE, 0, /
35 obstack_chunk_alloc, /
36 obstack_chunk_free)
Below is the exception of the rule of subsititution, it is why subst_identifiers comes to being. These special rules can reduce the mangled name considerablly.
:std = St
::std::allocator = Sa
::std::basic_string = Sb
::std::basic_string<char, ::std::char_traits<char>, ::std::allocator<char> > = Ss
::std::basic_istream<char, ::std::char_traits<char> > = Si
::std::basic_ostream<char, ::std::char_traits<char> > = So
::std::basic_iostream<char, ::std::char_traits<char> > = Sd