GCC-3.4.6源代码学习笔记(51)

4.3. 语言相关的初始化

回到do_compile,下一个被调用的是lang_dependent_init

 

do_compile (continue)

 

4650     /* Language-dependent initialization. Returns true on success.  */

4651     if (lang_dependent_init (main_input_filename))

4652     {

4653       if (flag_unit_at_a_time)

4654        {

4655          open_dump_file (DFI_cgraph, NULL);

4656         cgraph_dump_file = rtl_dump_file;

4657          rtl_dump_file = NULL;

4658        }

 

注意到参数name指向main_input_filename,该值在handle_options中被设置。

 

4525 static int

4526 lang_dependent_init (const char *name)                                                       in toplev.c

4527 {

4528   if (dump_base_name == 0)

4529     dump_base_name = name ? name : "gccdump";

4530

4531   /* Other front-end initialization.  */

4532   if ((*lang_hooks.init) () == 0)

4533     return 0;

4.3.1. 前端部分的初始化

对于C++lang_hooksinit钩子是cxx_init。正如其名,它执行C++所需的初始化工作。

 

384  bool

385  cxx_init (void)                                                                                                 in lex.c

386  {

387    static const enum tree_code stmt_codes[] = {

388      c_common_stmt_codes,

389      cp_stmt_codes

390    };

391 

392    INIT_STATEMENT_CODES (stmt_codes);

4.3.1.1.    stmt_codes

在上面的388行,c_common_stmt_codes具有如下定义,它表示通用的C语句类型。

 

1150 #define c_common_stmt_codes                         /                                  in c-common.h

1151    CLEANUP_STMT, EXPR_STMT, COMPOUND_STMT,    /

1152    DECL_STMT,        IF_STMT,      FOR_STMT,   /

1153    WHILE_STMT,             DO_STMT,     RETURN_STMT,   /

1154    BREAK_STMT,            CONTINUE_STMT,      SCOPE_STMT,      /

1155    SWITCH_STMT,           GOTO_STMT,       LABEL_STMT,      /

1156    ASM_STMT,         FILE_STMT,  CASE_LABEL

 

cp_stmt_codes则有如下定义,这是特定于C++的。

 

895  #define cp_stmt_codes                               /                                                in cp-tree.h

896     CTOR_INITIALIZER,    TRY_BLOCK,       HANDLER,    /

897     EH_SPEC_BLOCK,       USING_STMT,      TAG_DEFN

 

388行,INIT_STATEMENT_CODES根据可用语句的编码来设置stmt_codes

 

1164 #define INIT_STATEMENT_CODES(STMT_CODES)                   /

1165   do {                                                    /

1166     unsigned int i;                                         /

1167     memset (&statement_code_p, 0, sizeof (statement_code_p));    /

1168     for (i = 0; i < ARRAY_SIZE (STMT_CODES); i++)        /

1169       statement_code_p[STMT_CODES[i]] = true;               /

1170   } while (0)

4.3.1.2.    C++初始化关键字

对于C++,特定的字符串被保留用于为语言携带特定的语义。通常为了使词法分析更方便高效,输入的字符串将首先被检查是否是保留字,而不是使用有限状态机(DFA)来识别关键字。

 

cxx_init (continue)

 

394    /* We cannot just assign to input_filename because it has already

395      been initialized and will be used later as an N_BINCL for stabs+

396      debugging.  */

397    push_srcloc ("<internal>", 0);

398 

399    init_reswords ();

 

首先push_srclocinput_file_stack中压入节点<internal>”

 

349    void

350    init_reswords (void)                                                                                       in lex.c

351    {

352      unsigned int i;

353      tree id;

354      int mask = ((flag_no_asm ? D_ASM : 0)

355               | (flag_no_gnu_keywords ? D_EXT : 0));

356   

357      ridpointers = ggc_calloc ((int) RID_MAX, sizeof (tree));

358      for (i = 0; i < ARRAY_SIZE (reswords); i++)

359      {

360        id = get_identifier (reswords[i].word);

361        C_RID_CODE (id) = reswords[i].rid;

362        ridpointers [(int) reswords[i].rid] = id;

363        if (! (reswords[i].disable & mask))

364          C_IS_RESERVED_WORD (id) = 1;

365      }

366    }

 

这里ridpointers“reserved id pointers”的缩写,它是一个rtx对象数组。reswords的类型是resword

 

231    struct resword                                                                                                 in lex.c

232    {

233      const char *const word;

234      ENUM_BITFIELD(rid) const rid : 16;

235      const unsigned int disable   : 16;

236    };

 

用于C++的保留字定义如下:

 

245    static const struct resword reswords[] =                                                             in lex.c

246    {

247      { "_Complex",        RID_COMPLEX,   0 },

248      { "__FUNCTION__",     RID_FUNCTION_NAME, 0 },

249      { "__PRETTY_FUNCTION__", RID_PRETTY_FUNCTION_NAME, 0 },

250      { "__alignof",        RID_ALIGNOF,    0 },

251      { "__alignof__",     RID_ALIGNOF,    0 },

252      { "__asm",             RID_ASM,     0 },

253      { "__asm__",          RID_ASM,     0 },

254      { "__attribute",       RID_ATTRIBUTE, 0 },

255      { "__attribute__",    RID_ATTRIBUTE, 0 },

256      { "__builtin_va_arg",     RID_VA_ARG,      0 },

257      { "__complex",       RID_COMPLEX,   0 },

258      { "__complex__",   RID_COMPLEX,   0 },

259      { "__const",            RID_CONST, 0 },

260      { "__const__", RID_CONST, 0 },

261      { "__extension__",  RID_EXTENSION,       0 },

262      { "__func__",         RID_C99_FUNCTION_NAME,   0 },

263      { "__imag",            RID_IMAGPART,  0 },

264      { "__imag__",        RID_IMAGPART,  0 },

265      { "__inline",           RID_INLINE, 0 },

266      { "__inline__",       RID_INLINE, 0 },

267      { "__label__",  RID_LABEL, 0 },

268      { "__null",             RID_NULL,   0 },

269      { "__offsetof",       RID_OFFSETOF,   0 },

270      { "__offsetof__",     RID_OFFSETOF,   0 },

271      { "__real",              RID_REALPART,  0 },

272      { "__real__",          RID_REALPART,  0 },

273      { "__restrict",  RID_RESTRICT,   0 },

274      { "__restrict__",      RID_RESTRICT,   0 },

275      { "__signed",          RID_SIGNED,      0 },

276      { "__signed__",      RID_SIGNED,      0 },

277      { "__thread",          RID_THREAD,     0 },

278      { "__typeof",          RID_TYPEOF,      0 },

279      { "__typeof__",      RID_TYPEOF,      0 },

280      { "__volatile", RID_VOLATILE,   0 },

281      { "__volatile__",     RID_VOLATILE,   0 },

282      { "asm",          RID_ASM,     D_ASM },

283      { "auto",         RID_AUTO,   0 },

284      { "bool",         RID_BOOL,   0 },

285      { "break",        RID_BREAK, 0 },

286      { "case",          RID_CASE,   0 },

287      { "catch",        RID_CATCH, 0 },

288      { "char",         RID_CHAR,   0 },

289      { "class",         RID_CLASS,  0 },

290      { "const",        RID_CONST, 0 },

291      { "const_cast", RID_CONSTCAST,       0 },

292      { "continue",          RID_CONTINUE,  0 },

293      { "default",             RID_DEFAULT,    0 },

294      { "delete",              RID_DELETE,      0 },

295      { "do",            RID_DO,              0 },

296      { "double",             RID_DOUBLE,     0 },

297      { "dynamic_cast",   RID_DYNCAST,   0 },

298      { "else",          RID_ELSE,    0 },

299      { "enum",        RID_ENUM,  0 },

300      { "explicit",            RID_EXPLICIT,    0 },

301      { "export",             RID_EXPORT,      0 },

302      { "extern",              RID_EXTERN,      0 },

303      { "false",         RID_FALSE,  0 },

304      { "float",         RID_FLOAT,  0 },

305      { "for",           RID_FOR,      0 },

306      { "friend",              RID_FRIEND,       0 },

307      { "goto",         RID_GOTO,   0 },

308      { "if",             RID_IF,         0 },

309      { "inline",              RID_INLINE, 0 },

310      { "int",            RID_INT,       0 },

311       { "long",         RID_LONG,   0 },

312      { "mutable",           RID_MUTABLE,   0 },

313      { "namespace",       RID_NAMESPACE,     0 },

314      { "new",          RID_NEW,     0 },

315      { "operator",           RID_OPERATOR,  0 },

316      { "private",             RID_PRIVATE,      0 },

317      { "protected",  RID_PROTECTED,      0 },

318      { "public",              RID_PUBLIC,       0 },

319      { "register",            RID_REGISTER,   0 },

320      { "reinterpret_cast", RID_REINTCAST, 0 },

321      { "return",              RID_RETURN,      0 },

322      { "short",        RID_SHORT,  0 },

323      { "signed",             RID_SIGNED,      0 },

324      { "sizeof",              RID_SIZEOF, 0 },

325      { "static",        RID_STATIC, 0 },

326      { "static_cast", RID_STATCAST,   0 },

327      { "struct",        RID_STRUCT,       0 },

328      { "switch",             RID_SWITCH,      0 },

329      { "template",          RID_TEMPLATE,  0 },

330      { "this",          RID_THIS,     0 },

331      { "throw",              RID_THROW,       0 },

332      { "true",          RID_TRUE,   0 },

333      { "try",           RID_TRY,      0 },

334      { "typedef",            RID_TYPEDEF,    0 },

335      { "typename",         RID_TYPENAME,       0 },

336      { "typeid",              RID_TYPEID,       0 },

337      { "typeof",             RID_TYPEOF,      D_ASM|D_EXT },

338      { "union",              RID_UNION, 0 },

339      { "unsigned",          RID_UNSIGNED, 0 },

340      { "using",        RID_USING,  0 },

341      { "virtual",             RID_VIRTUAL,     0 },

342      { "void",         RID_VOID,    0 },

343      { "volatile",            RID_VOLATILE,   0 },

344      { "wchar_t",          RID_WCHAR,       0 },

345      { "while",        RID_WHILE, 0 },

346   

347    };

 

注意282337行,这些保留字是GNU的扩展。并留意在init_reswords354363行,如何确定这些字符串是否被用作保留字。另上面的C_IS_RESERVED_WORD访问tree_common中的lang_flag_5域。

而在init_reswords360行,get_identifier将识别符插入ident_hash,这个识别符与相应的ridpointers绑定

 

100  tree

101  get_identifier (const char *text)                                                            in stringpool.c

102  {

103    hashnode ht_node = ht_lookup (ident_hash,

104                       (const unsigned char *) text,

105                       strlen (text), HT_ALLOC);

106 

107   /* ht_node can't be NULL here.  */

108    return HT_IDENT_TO_GCC_IDENT (ht_node);

109  }

4.3.1.3.    初始化tree_list所用的哈希表

在前面的章节已经看到,在系统中,前端树节点都会被保存在哈希表ident_hash内,然而对于tree_list类型的树节点(从父节点通过TREE_CHAIN访问),这个哈希表不够好,因为tree_list是串起来的树节点串。它需要另一个哈希表。这就是list_hash_table,它具有以下的定义。

 

649    static GTY ((param_is (union tree_node))) htab_t list_hash_table;                in cp/tree.c

 

cxx_init (continue)

 

400    init_tree ();

 

init_tree就是初始化这个哈希表之处。

 

2216 void

2217 init_tree (void)                                                                                          in cp/tree.c

2218 {

2219   list_hash_table = htab_create_ggc (31, list_hash, list_hash_eq, NULL);

2220 }

4.3.1.4.    C++语义相关的初始化

前端为C++语义提供了钩子lang_expand_stmt,在此init_cp_semantics把该钩子与特定的函数绑定。

 

cxx_init (continue)

 

401    init_cp_semantics ();

 

3088 void

3089 init_cp_semantics (void)                                                                     in cp/semantics.c

3090 {

3091   lang_expand_stmt = cp_expand_stmt;

3092 }

4.3.1.5.    初始化操作符数据

C++将某些保留字用作操作符,同时一些字符没有出现在保留字中(即,+-等)。因此需要为这些特殊字符串收集数据。

 

cxx_init (continue)

 

402    init_operators ();

 

166    static void

167    init_operators (void)                                                                                in cp/lex.c

168    {

169      tree identifier;

170      char buffer[256];

171      struct operator_name_info_t *oni;

172   

173    #define DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, ASSN_P)            /

174      sprintf (buffer, ISALPHA (NAME[0]) ? "operator %s" : "operator%s", NAME); /

175      identifier = get_identifier (buffer);                                 /

176      IDENTIFIER_OPNAME_P (identifier) = 1;                                 /

177                                                                     /

178      oni = (ASSN_P                                                            /

179           ? &assignment_operator_name_info[(int) CODE]                        /

180           : &operator_name_info[(int) CODE]);                          /

181      oni->identifier = identifier;                                           /

182      oni->name = NAME;                                                    /

183      oni->mangled_name = MANGLING;                   /

184      oni->arity = ARITY;

185   

186    #include "operators.def"

187    #undef DEF_OPERATOR

 

assignment_operator_name_infooperator_name_info提供了操作符名及其信息见的映射。它们具有以下的operator_name_info_t类型。

 

3509 typedef struct operator_name_info_t GTY(())                                               in cp-tree.h

3510 {

3511   /* The IDENTIFIER_NODE for the operator.  */

3512   tree identifier;

3513   /* The name of the operator.  */

3514   const char *name;

3515   /* The mangled name of the operator.  */

3516   const char *mangled_name;

3517   /* The arity of the operator.  */

3518   int arity;

3519 } operator_name_info_t;

 

C++的操作符定义在一个名为operators.def的文件里,其中我们可以看到以下的代码片段。注意到定义在173行的DEF_OPERATOR将在文件内被展开,因为这个文件在186行被包含。

 

70      #define DEF_SIMPLE_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def

71        DEF_OPERATOR(NAME, CODE, MANGLING, ARITY, 0)

 

77      #define DEF_ASSN_OPERATOR(NAME, CODE, MANGLING, ARITY)/ in operator.def

78        DEF_OPERATOR (NAME, CODE, MANGLING, ARITY, 1)

 

81      DEF_SIMPLE_OPERATOR ("new", NEW_EXPR, "nw", -1)

 

139    DEF_ASSN_OPERATOR ("=", NOP_EXPR, "aS", 2)

 

上面参数的含义显示如下:

NAMEC字符串形式的操作符名字,但不包含开头的`operator' 部分。这是在源程序中将给出的名字。例如,对于`operator +',这个将是`+'

CODE:这个操作符的tree_code。例如,对于`operator +',这将是PLUS_EXPR

MANGLING:在新ABI下,C字符串形式的操作符的修饰前缀。例如,对于`operator +',这将是"pl"

ARITY:操作符的元数,或者-1如果允许任意元数(如`operator ()')。操作符++postincrement)及--postdecrement)被视为2元的。

ASSN_P:布尔值。如果非0,这是个赋值操作符。

 

init_operators (continue)

 

189      operator_name_info[(int) ERROR_MARK].identifier

190                       = get_identifier ("<invalid operator>");

191   

192      /* Handle some special cases. These operators are not defined in

193        the language, but can be produced internally. We may need them

194        for error-reporting. (Eventually, we should ensure that this

195        does not happen. Error messages involving these operators will

196        be confusing to users.)  */

197   

198      operator_name_info [(int) INIT_EXPR].name

199          = operator_name_info [(int) MODIFY_EXPR].name;

200      operator_name_info [(int) EXACT_DIV_EXPR].name = "(ceiling /)";

201      operator_name_info [(int) CEIL_DIV_EXPR].name = "(ceiling /)";

202      operator_name_info [(int) FLOOR_DIV_EXPR].name = "(floor /)";

203      operator_name_info [(int) ROUND_DIV_EXPR].name = "(round /)";

204      operator_name_info [(int) CEIL_MOD_EXPR].name = "(ceiling %)";

205      operator_name_info [(int) FLOOR_MOD_EXPR].name = "(floor %)";

206      operator_name_info [(int) ROUND_MOD_EXPR].name = "(round %)";

207      operator_name_info [(int) ABS_EXPR].name = "abs";

208      operator_name_info [(int) TRUTH_AND_EXPR].name = "strict &&";

209      operator_name_info [(int) TRUTH_OR_EXPR].name = "strict ||";

210      operator_name_info [(int) IN_EXPR].name = "in";

211       operator_name_info [(int) RANGE_EXPR].name = "...";

212      operator_name_info [(int) CONVERT_EXPR].name = "+";

213   

214      assignment_operator_name_info [(int) EXACT_DIV_EXPR].name

215        = "(exact /=)";

216      assignment_operator_name_info [(int) CEIL_DIV_EXPR].name

217        = "(ceiling /=)";

218      assignment_operator_name_info [(int) FLOOR_DIV_EXPR].name

219        = "(floor /=)";

220      assignment_operator_name_info [(int) ROUND_DIV_EXPR].name

221        = "(round /=)";

222      assignment_operator_name_info [(int) CEIL_MOD_EXPR].name

223        = "(ceiling %=)";

224      assignment_operator_name_info [(int) FLOOR_MOD_EXPR].name

225        = "(floor %=)";

226      assignment_operator_name_info [(int) ROUND_MOD_EXPR].name

227        = "(round %=)";

228    }

 

在余下的代码中,正如注释所解释,这些操作符在系统内部生成,并且与目标语言无关,这种情形下,仅name域被设置。

4.3.1.6.    初始化名字修饰相关的数据结构

C++编译器内部,标识符名都是经过修饰的,这样才可能使用同时使用同名的变量,类定义,函数声明,等等。C++标准并没有规定修饰的做法。不过各编译器的实现均大同小异。这里,首先需要初始化相应的机制。另外,下面的current_function_decl总是指向当前被编译的函数,而class_type_node则是编译器用来标记类节点的特殊记号。

 

cxx_init (continue)

 

403    init_method ();

404    init_error ();

405 

406    current_function_decl = NULL;

407 

408    class_type_node = ridpointers[(int) RID_CLASS];

 

71      void

72      init_method (void)                                                                                  in method.c

73      {

74        init_mangle ();

75      }

 

GNUC++修饰名规则中,替代命名规则是比较特殊的。例如:

template <class A> class T {...};

class X {...}; class Y {...};

T<X> t1; T<Y> t2;

对于具现的T模板类t1t2,这是2个被视为完全不同的类,其修饰名应该有所反映。在GNU C++编译器中,这通过替代命名规则来生成不同的修饰名:首先类XY,根据其声明/定义出现的次序,予以编号;在T<X>中的X部分,由“S+X序号”+_”来命名。这需要记录所有的类名字,为此,编译器定义了如下数据结构:

 

95      static struct globals                                                                                   in mangle.c

96      {

97        /* The name in which we're building the mangled name.  */

98        struct obstack name_obstack;

99     

100      /* An array of the current substitution candidates, in the order

101        we've seen them.  */

102      varray_type substitutions;

103   

104      /* The entity that is being mangled.  */

105      tree entity;

106   

107      /* True if the mangling will be different in a future version of the

108        ABI.  */

109      bool need_abi_warning;

110     } G;

 

102行的substitutions就是保持替代对象的数组,其索引用作其序号。而name_obstack则是用于分配、保持修饰名的内存块。

 

2417 void

2418 init_mangle (void)                                                                                    in mangle.c

2419 {

2420   gcc_obstack_init (&G.name_obstack);

2421

2422   /* Cache these identifiers for quick comparison when checking for

2423     standard substitutions.  */

2424   subst_identifiers[SUBID_ALLOCATOR] = get_identifier ("allocator");

2425   subst_identifiers[SUBID_BASIC_STRING] = get_identifier ("basic_string");

2426   subst_identifiers[SUBID_CHAR_TRAITS] = get_identifier ("char_traits");

2427   subst_identifiers[SUBID_BASIC_ISTREAM] = get_identifier ("basic_istream");

2428   subst_identifiers[SUBID_BASIC_OSTREAM] = get_identifier ("basic_ostream");

2429   subst_identifiers[SUBID_BASIC_IOSTREAM] = get_identifier ("basic_iostream");

2430 }

 

2420行,由gcc_obstack_init创建这个name_obstack对象。

 

33      #define gcc_obstack_init(OBSTACK)                     /                                         in defaults.h

34        _obstack_begin ((OBSTACK), OBSTACK_CHUNK_SIZE, 0,    /

35                    obstack_chunk_alloc,               /

36                    obstack_chunk_free)

 

以下则是替代规则中的特例,为此需要创建subst_identifiers对象。这个规则可以大大缩短修饰名的长度。

:std = St

::std::allocator = Sa

::std::basic_string = Sb

::std::basic_string<char, ::std::char_traits<char>, ::std::allocator<char> > = Ss

::std::basic_istream<char, ::std::char_traits<char> > = Si

::std::basic_ostream<char, ::std::char_traits<char> > = So

::std::basic_iostream<char, ::std::char_traits<char> > = Sd

 

你可能感兴趣的:(GCC-3.4.6源代码学习笔记(51))