Studying note of GCC-3.4.6 source (51)

4.3. Language dependent initialization

Back to do_compile, next calls lang_dependent_init.

 

do_compile (continue)

 

4650     /* Language-dependent initialization. Returns true on success.  */

4651     if (lang_dependent_init (main_input_filename))

4652      {

4653       if (flag_unit_at_a_time)

4654        {

4655          open_dump_file (DFI_cgraph, NULL);

4656         cgraph_dump_file = rtl_dump_file;

4657          rtl_dump_file = NULL;

4658       }

 

Notice that here parameter name in the function refers to main_input_filename which is set in handle_options.

 

4525 static int

4526 lang_dependent_init (const char *name)                                                       in toplev.c

4527 {

4528   if (dump_base_name == 0)

4529     dump_base_name = name ? name : "gccdump";

4530

4531   /* Other front-end initialization.  */

4532   if ((*lang_hooks.init) () == 0)

4533     return 0;

4.3.1. Front-end initialization

For C++, the init hook of lang_hooks is cxx_init. As its name, it does initialization of C++.

 

384  bool

385  cxx_init (void)                                                                                                 in lex.c

386  {

387    static const enum tree_code stmt_codes[] = {

388      c_common_stmt_codes,

389      cp_stmt_codes

390    };

391 

392    INIT_STATEMENT_CODES (stmt_codes);

4.3.1.1.    stmt_codes

Above, line 388, c_common_stmt_codes has following definition, which is commonly used C statement type.

 

1150 #define c_common_stmt_codes                         /                                  in c-common.h

1151    CLEANUP_STMT, EXPR_STMT, COMPOUND_STMT,    /

1152    DECL_STMT,        IF_STMT,      FOR_STMT,   /

1153    WHILE_STMT,             DO_STMT,     RETURN_STMT,   /

1154    BREAK_STMT,            CONTINUE_STMT,      SCOPE_STMT,      /

1155    SWITCH_STMT,           GOTO_STMT,       LABEL_STMT,      /

1156    ASM_STMT,         FILE_STMT,  CASE_LABEL

 

And cp_stmt_codes has following definition, which is specific for C++.

 

895  #define cp_stmt_codes                               /                                                in cp-tree.h

896     CTOR_INITIALIZER,    TRY_BLOCK,       HANDLER,    /

897     EH_SPEC_BLOCK,       USING_STMT,      TAG_DEFN

 

At line 388, INIT_STATEMENT_CODES sets stmt_codes according to the code of available statements.

 

1164 #define INIT_STATEMENT_CODES(STMT_CODES)                   /

1165   do {                                                    /

1166     unsigned int i;                                         /

1167     memset (&statement_code_p, 0, sizeof (statement_code_p));    /

1168     for (i = 0; i < ARRAY_SIZE (STMT_CODES); i++)        /

1169       statement_code_p[STMT_CODES[i]] = true;               /

1170   } while (0)

4.3.1.2.    Initialize reserved words for C++

For C++, certain literal strings will be reserved for carrying special semantics meaning for the language. Normally to make lexical analysis easier and in efficient, input string will be first checked if it is reserved words or not, instead of using DFA to recognize the reserved words.

 

cxx_init (continue)

 

394    /* We cannot just assign to input_filename because it has already

395      been initialized and will be used later as an N_BINCL for stabs+

396      debugging.  */

397    push_srcloc ("<internal>", 0);

398 

399    init_reswords ();

 

First push_srcloc pushed node of “<internal>” into input_file_stack.

 

349    void

350    init_reswords (void)                                                                                       in lex.c

351    {

352      unsigned int i;

353      tree id;

354      int mask = ((flag_no_asm ? D_ASM : 0)

355               | (flag_no_gnu_keywords ? D_EXT : 0));

356   

357      ridpointers = ggc_calloc ((int) RID_MAX, sizeof (tree));

358      for (i = 0; i < ARRAY_SIZE (reswords); i++)

359      {

360        id = get_identifier (reswords[i].word);

361        C_RID_CODE (id) = reswords[i].rid;

362        ridpointers [(int) reswords[i].rid] = id;

363        if (! (reswords[i].disable & mask))

364          C_IS_RESERVED_WORD (id) = 1;

365      }

366    }

 

Here ridpointers is abbreviation of “reserved id pointers”, it is an array of rtx objects. reswords is of type resword.

 

231    struct resword                                                                                                 in lex.c

232    {

233      const char *const word;

234      ENUM_BITFIELD(rid) const rid : 16;

235      const unsigned int disable   : 16;

236    };

 

The reserved words for C++ are defined as following:

 

245    static const struct resword reswords[] =                                                             in lex.c

246    {

247      { "_Complex",        RID_COMPLEX,   0 },

248      { "__FUNCTION__",     RID_FUNCTION_NAME, 0 },

249      { "__PRETTY_FUNCTION__", RID_PRETTY_FUNCTION_NAME, 0 },

250      { "__alignof",        RID_ALIGNOF,    0 },

251      { "__alignof__",     RID_ALIGNOF,    0 },

252      { "__asm",             RID_ASM,     0 },

253      { "__asm__",          RID_ASM,     0 },

254      { "__attribute",       RID_ATTRIBUTE, 0 },

255      { "__attribute__",    RID_ATTRIBUTE, 0 },

256      { "__builtin_va_arg",     RID_VA_ARG,      0 },

257      { "__complex",       RID_COMPLEX,   0 },

258      { "__complex__",   RID_COMPLEX,   0 },

259      { "__const",            RID_CONST, 0 },

260      { "__const__", RID_CONST, 0 },

261      { "__extension__",  RID_EXTENSION,       0 },

262      { "__func__",         RID_C99_FUNCTION_NAME,   0 },

263      { "__imag",            RID_IMAGPART,  0 },

264      { "__imag__",        RID_IMAGPART,  0 },

265      { "__inline",           RID_INLINE, 0 },

266      { "__inline__",       RID_INLINE, 0 },

267      { "__label__",  RID_LABEL, 0 },

268      { "__null",             RID_NULL,   0 },

269      { "__offsetof",       RID_OFFSETOF,   0 },

270      { "__offsetof__",     RID_OFFSETOF,   0 },

271      { "__real",              RID_REALPART,  0 },

272      { "__real__",          RID_REALPART,  0 },

273      { "__restrict",  RID_RESTRICT,   0 },

274      { "__restrict__",      RID_RESTRICT,   0 },

275      { "__signed",          RID_SIGNED,      0 },

276      { "__signed__",      RID_SIGNED,      0 },

277      { "__thread",          RID_THREAD,     0 },

278      { "__typeof",          RID_TYPEOF,      0 },

279      { "__typeof__",      RID_TYPEOF,      0 },

280      { "__volatile", RID_VOLATILE,   0 },

281      { "__volatile__",     RID_VOLATILE,   0 },

282      { "asm",          RID_ASM,     D_ASM },

283      { "auto",         RID_AUTO,   0 },

284      { "bool",         RID_BOOL,   0 },

285      { "break",        RID_BREAK, 0 },

286      { "case",          RID_CASE,   0 },

287      { "catch",        RID_CATCH, 0 },

288      { "char",         RID_CHAR,   0 },

289      { "class",         RID_CLASS,  0 },

290      { "const",        RID_CONST, 0 },

291      { "const_cast", RID_CONSTCAST,       0 },

292      { "continue",          RID_CONTINUE,  0 },

293      { "default",             RID_DEFAULT,    0 },

294      { "delete",              RID_DELETE,      0 },

295      { "do",            RID_DO,              0 },

296      { "double",             RID_DOUBLE,     0 },

297      { "dynamic_cast",   RID_DYNCAST,   0 },

298      { "else",          RID_ELSE,    0 },

299      { "enum",        RID_ENUM,  0 },

300      { "explicit",            RID_EXPLICIT,    0 },

301      { "export",             RID_EXPORT,      0 },

302      { "extern",              RID_EXTERN,      0 },

303      { "false",         RID_FALSE,  0 },

304      { "float",         RID_FLOAT,  0 },

305      { "for",           RID_FOR,      0 },

306      { "friend",              RID_FRIEND,       0 },

307      { "goto",         RID_GOTO,   0 },

308      { "if",             RID_IF,         0 },

309      { "inline",              RID_INLINE, 0 },

310      { "int",            RID_INT,       0 },

311       { "long",         RID_LONG,   0 },

312      { "mutable",           RID_MUTABLE,   0 },

313      { "namespace",       RID_NAMESPACE,     0 },

314      { "new",          RID_NEW,     0 },

315      { "operator",           RID_OPERATOR,  0 },

316      { "private",             RID_PRIVATE,      0 },

317      { "protected",  RID_PROTECTED,      0 },

318      { "public",              RID_PUBLIC,       0 },

319      { "register",            RID_REGISTER,   0 },

320      { "reinterpret_cast", RID_REINTCAST, 0 },

321      { "return",              RID_RETURN,      0 },

322      { "short",        RID_SHORT,  0 },

323      { "signed",             RID_SIGNED,      0 },

324      { "sizeof",              RID_SIZEOF, 0 },

325      { "static",        RID_STATIC, 0 },

326      { "static_cast", RID_STATCAST,   0 },

327      { "struct",        RID_STRUCT,       0 },

328      { "switch",             RID_SWITCH,      0 },

329      { "template",          RID_TEMPLATE,  0 },

330      { "this",          RID_THIS,     0 },

331      { "throw",              RID_THROW,       0 },

332      { "true",          RID_TRUE,   0 },

333      { "try",           RID_TRY,      0 },

334      { "typedef",            RID_TYPEDEF,    0 },

335      { "typename",         RID_TYPENAME,       0 },

336      { "typeid",              RID_TYPEID,       0 },

337      { "typeof",             RID_TYPEOF,      D_ASM|D_EXT },

338      { "union",              RID_UNION, 0 },

339      { "unsigned",          RID_UNSIGNED, 0 },

340      { "using",        RID_USING,  0 },

341      { "virtual",             RID_VIRTUAL,     0 },

342      { "void",         RID_VOID,    0 },

343      { "volatile",            RID_VOLATILE,   0 },

344      { "wchar_t",          RID_WCHAR,       0 },

345      { "while",        RID_WHILE, 0 },

346   

347    };

 

Pay attention to line 282 and 337, these reserved words are GNU extension. And see at line 354 and 363 in init_reswords, how to determine whether these strings should be used as reserved words or not. Above C_IS_RESERVED_WORD accesses field of lang_flag_5 in tree_common.

Above at line 360 in init_reswords, get_identifier will insert identifer into ident_hash, which is bound to corresponding ridpointers.

 

100  tree

101  get_identifier (const char *text)                                                            in stringpool.c

102  {

103    hashnode ht_node = ht_lookup (ident_hash,

104                       (const unsigned char *) text,

105                       strlen (text), HT_ALLOC);

106 

107   /* ht_node can't be NULL here.  */

108    return HT_IDENT_TO_GCC_IDENT (ht_node);

109  }

4.3.1.3.    Initialize hashtable for tree_list

We have seen in before section, in the system, tree node will be kept within hashtable of ident_hash, however for node of tree_list (accessed by TREE_CHAIN from parent node), that hashtable is not good enough, because tree_list is a list of tree nodes chained tegother. It needs another hashtable. That is list_hash_table, which has definition as below.

 

649    static GTY ((param_is (union tree_node))) htab_t list_hash_table;                in cp/tree.c

 

cxx_init (continue)

 

400    init_tree ();

 

init_tree is the place to create this hashtable.

 

2216 void

2217 init_tree (void)                                                                                          in cp/tree.c

2218 {

2219   list_hash_table = htab_create_ggc (31, list_hash, list_hash_eq, NULL);

2220 }

4.3.1.4.    Initialize related to C++ semantics

The front-end provides a language hook (lang_expand_stmt) for C++ semantics, here init_cp_semantics binds this hook with the specified function.

 

cxx_init (continue)

 

401    init_cp_semantics ();

 

3088 void

3089 init_cp_semantics (void)                                                                     in cp/semantics.c

3090 {

3091   lang_expand_stmt = cp_expand_stmt;

3092 }

4.3.1.5.    Initialize data for operators

Some reserved words, C++ uses them as operators, also some characters don’t appear in reserved words (i.e. +, - etc.). It needs collect these special literal strings with specifically data.

 

cxx_init (continue)

 

402    init_operators ();

 

166    static void

167    init_operators (void)                                                                                in cp/lex.c

168    {

169      tree identifier;

170      char buffer[256];

171      struct operator_name_info_t *oni;

172   

173    #define DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, ASSN_P)            /

174      sprintf (buffer, ISALPHA (NAME[0]) ? "operator %s" : "operator%s", NAME); /

175      identifier = get_identifier (buffer);                                 /

176      IDENTIFIER_OPNAME_P (identifier) = 1;                                 /

177                                                                     /

178      oni = (ASSN_P                                                            /

179           ? &assignment_operator_name_info[(int) CODE]                        /

180           : &operator_name_info[(int) CODE]);                          /

181      oni->identifier = identifier;                                           /

182      oni->name = NAME;                                                    /

183      oni->mangled_name = MANGLING;                   /

184      oni->arity = ARITY;

185   

186    #include "operators.def"

187    #undef DEF_OPERATOR

 

assignment_operator_name_info, and operator_name_info offer mapping between operator names and their information. They are type of operator_name_info_t as following.

 

3509 typedef struct operator_name_info_t GTY(())                                               in cp-tree.h

3510 {

3511   /* The IDENTIFIER_NODE for the operator.  */

3512   tree identifier;

3513   /* The name of the operator.  */

3514   const char *name;

3515   /* The mangled name of the operator.  */

3516   const char *mangled_name;

3517   /* The arity of the operator.  */

3518   int arity;

3519 } operator_name_info_t;

 

C++ has its operator defined within a file named operators.def, in which we can find following code snippet. Notice that DEF_OPERATOR defined at line 173 will be expanded in the file as the file is included at line 186.

 

70      #define DEF_SIMPLE_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def

71        DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, 0)

 

77      #define DEF_ASSN_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def

78        DEF_OPERATOR (NAME, CODE, MANGLING, ARITY, 1)

 

81      DEF_SIMPLE_OPERATOR ("new", NEW_EXPR, "nw", -1)

 

139    DEF_ASSN_OPERATOR ("=", NOP_EXPR, "aS", 2)

 

In above, the meaning of parameters are given in below:

NAME: The name of the operator, as a C string, but without the preceding `operator'. This is the name that would be given in the source program. For `operator +', for example, this would be `+'.

CODE: The tree_code for this operator. For `operator +', for example, this would be PLUS_EXPR.

MANGLING: The mangling prefix for the operator, as a C string, and as mangled under the new ABI. For `operator +', for example, this would be "pl".

ARITY: The arity of the operator, or -1 if any arity is allowed. (As for `operator ()') Postincrement and postdecrement operators are marked as binary.

ASSN_P: A boolean value. If nonzero, this is an assignment operator.

 

init_operators (continue)

 

189      operator_name_info[(int) ERROR_MARK].identifier

190                       = get_identifier ("<invalid operator>");

191   

192      /* Handle some special cases. These operators are not defined in

193        the language, but can be produced internally. We may need them

194        for error-reporting. (Eventually, we should ensure that this

195        does not happen. Error messages involving these operators will

196        be confusing to users.)  */

197   

198      operator_name_info [(int) INIT_EXPR].name

199          = operator_name_info [(int) MODIFY_EXPR].name;

200      operator_name_info [(int) EXACT_DIV_EXPR].name = "(ceiling /)";

201      operator_name_info [(int) CEIL_DIV_EXPR].name = "(ceiling /)";

202      operator_name_info [(int) FLOOR_DIV_EXPR].name = "(floor /)";

203      operator_name_info [(int) ROUND_DIV_EXPR].name = "(round /)";

204      operator_name_info [(int) CEIL_MOD_EXPR].name = "(ceiling %)";

205      operator_name_info [(int) FLOOR_MOD_EXPR].name = "(floor %)";

206      operator_name_info [(int) ROUND_MOD_EXPR].name = "(round %)";

207      operator_name_info [(int) ABS_EXPR].name = "abs";

208      operator_name_info [(int) TRUTH_AND_EXPR].name = "strict &&";

209      operator_name_info [(int) TRUTH_OR_EXPR].name = "strict ||";

210      operator_name_info [(int) IN_EXPR].name = "in";

211       operator_name_info [(int) RANGE_EXPR].name = "...";

212      operator_name_info [(int) CONVERT_EXPR].name = "+";

213   

214      assignment_operator_name_info [(int) EXACT_DIV_EXPR].name

215        = "(exact /=)";

216      assignment_operator_name_info [(int) CEIL_DIV_EXPR].name

217        = "(ceiling /=)";

218      assignment_operator_name_info [(int) FLOOR_DIV_EXPR].name

219        = "(floor /=)";

220      assignment_operator_name_info [(int) ROUND_DIV_EXPR].name

221        = "(round /=)";

222      assignment_operator_name_info [(int) CEIL_MOD_EXPR].name

223        = "(ceiling %=)";

224      assignment_operator_name_info [(int) FLOOR_MOD_EXPR].name

225        = "(floor %=)";

226      assignment_operator_name_info [(int) ROUND_MOD_EXPR].name

227        = "(round %=)";

228    }

 

In rest code as the comment explains, these operators are produced internally in the system and independent upon language, see that only name field is set for the case.

4.3.1.6.    Initialize data for name mangling

In the C++ compiler, identifiers are all mangled. And in this way you can use variable, class definition, function declaration, etc. of the same name simultaneously. At here, it first needs initialize the facility. Besides, current_function_decl below always refers to the function currently under compiling, and class_type_node is the mark used by compiler to mark tree node of class.

 

cxx_init (continue)

 

403    init_method ();

404    init_error ();

405 

406    current_function_decl = NULL;

407 

408    class_type_node = ridpointers[(int) RID_CLASS];

 

71      void

72      init_method (void)                                                                                  in method.c

73      {

74        init_mangle ();

75      }

 

Among the rule of manglin of GNU C++, the rule for subsititution is a special one. For instance:

template <class A> class T {...};

class X {...}; class Y {...};

T<X> t1; T<Y> t2;

The class template T instantiation t1, t2 are regarded as different types. Their mangled names should reflect this fact. In GNU C++ compiler, it is done by rule of subsitution to generate distincted mangled name: first class X and Y are assigned sequence number according to their appearance in the source code; then the X part of T<X>, is named by “S” + “X squence No” + “_”. It needs record names of all user-defined types; for the purpose, the compiler defines following data structure.

 

95      static struct globals                                                                                   in mangle.c

96      {

97        /* The name in which we're building the mangled name.  */

98        struct obstack name_obstack;

99     

100      /* An array of the current substitution candidates, in the order

101        we've seen them.  */

102      varray_type substitutions;

103   

104      /* The entity that is being mangled.  */

105      tree entity;

106   

107      /* True if the mangling will be different in a future version of the

108        ABI.  */

109      bool need_abi_warning;

110     } G;

 

Substitutions at line 102 is the array for subsitituing entities, whose index is used as sequence number. And name_obstack is the memory allocated and holds mangled names.

 

2417 void

2418 init_mangle (void)                                                                                    in mangle.c

2419 {

2420   gcc_obstack_init (&G.name_obstack);

2421

2422   /* Cache these identifiers for quick comparison when checking for

2423     standard substitutions.  */

2424   subst_identifiers[SUBID_ALLOCATOR] = get_identifier ("allocator");

2425   subst_identifiers[SUBID_BASIC_STRING] = get_identifier ("basic_string");

2426   subst_identifiers[SUBID_CHAR_TRAITS] = get_identifier ("char_traits");

2427   subst_identifiers[SUBID_BASIC_ISTREAM] = get_identifier ("basic_istream");

2428   subst_identifiers[SUBID_BASIC_OSTREAM] = get_identifier ("basic_ostream");

2429   subst_identifiers[SUBID_BASIC_IOSTREAM] = get_identifier ("basic_iostream");

2430 }

 

At line 2420, gcc_obstack_init creates the instance for name_obstack.

 

33      #define gcc_obstack_init(OBSTACK)                     /                                         in defaults.h

34        _obstack_begin ((OBSTACK), OBSTACK_CHUNK_SIZE, 0,    /

35                    obstack_chunk_alloc,               /

36                    obstack_chunk_free)

 

Below is the exception of the rule of subsititution, it is why subst_identifiers comes to being. These special rules can reduce the mangled name considerablly.

:std = St

::std::allocator = Sa

::std::basic_string = Sb

::std::basic_string<char, ::std::char_traits<char>, ::std::allocator<char> > = Ss

::std::basic_istream<char, ::std::char_traits<char> > = Si

::std::basic_ostream<char, ::std::char_traits<char> > = So

::std::basic_iostream<char, ::std::char_traits<char> > = Sd

 

你可能感兴趣的:(function,struct,tree,basic,Class,compiler)