GCC-3.4.6源代码学习笔记(19)

3.2.1. 创建cpp_reader

在上面的205行,cpp_create_reader尝试创建一个cpp_reader对象。对于象CC++这样需要预处理机制的语言,一般来说,编译器需要提供一个预处理遍,对源程序进行预处理操作。而当前版本的GCC,应用cpp_reader对象,实现了在线的预处理展开,不再需要预处理遍。

因为这个目的,cpp_reader的定义也变得相当复杂。在以后有关词法分析和解析器的章节中,我们再来详细研究它的作用。

 

602  struct cpp_reader                                                                                       in cpphash.h

603  {

604    /* Top of buffer stack.  */

605    cpp_buffer *buffer;

606 

607   /* Overlaid buffer (can be different after processing #include).  */

608    cpp_buffer *overlaid_buffer;

609 

610    /* Lexer state.  */

611     struct lexer_state state;

612 

613    /* Source line tracking.  */

614    struct line_maps line_maps;

615    const struct line_map *map;

616    fileline line;

617 

618    /* The line of the '#' of the current directive.  */

619    fileline directive_line;

620 

621    /* Memory buffers.  */

622    _cpp_buff *a_buff;           /* Aligned permanent storage.  */

623    _cpp_buff *u_buff;          /* Unaligned permanent storage.  */

624    _cpp_buff *free_buffs;     /* Free buffer chain.  */

625 

626    /* Context stack.  */

627    struct cpp_context base_context;

628    struct cpp_context *context;

629 

630    /* If in_directive, the directive if known.  */

631    const struct directive *directive;

632 

633    /* Search paths for include files.  */

634    struct cpp_dir *quote_include;         /* "" */

635    struct cpp_dir *bracket_include;       /* <> */

636    struct cpp_dir no_search_path;  /* No path.  */

637 

638    /* Chain of all hashed _cpp_file instances.  */

639    struct _cpp_file *all_files;

640 

641    struct _cpp_file *main_file;

642 

643    /* File and directory hash table.  */

644    struct htab *file_hash;

645    struct htab *dir_hash;

646    struct file_hash_entry *file_hash_entries;

647    unsigned int file_hash_entries_allocated, file_hash_entries_used;

648 

649    /* Nonzero means don't look for #include "foo" the source-file

650      directory.  */

651    bool quote_ignores_source_dir;

652 

653    /* Nonzero if any file has contained #pragma once or #import has

654      been used.  */

655    bool seen_once_only;

656 

657    /* Multiple include optimization.  */

658    const cpp_hashnode *mi_cmacro;

659    const cpp_hashnode *mi_ind_cmacro;

660    bool mi_valid;

661 

662    /* Lexing.  */

663    cpp_token *cur_token;

664    tokenrun base_run, *cur_run;

665    unsigned int lookaheads;

666 

667    /* Nonzero prevents the lexer from re-using the token runs.  */

668    unsigned int keep_tokens;

669 

670    /* Error counter for exit code.  */

671    unsigned int errors;

672 

673    /* Buffer to hold macro definition string.  */

674    unsigned char *macro_buffer;

675    unsigned int macro_buffer_len;

676 

677    /* Descriptor for converting from the source character set to the

678      execution character set.  */

679    struct cset_converter narrow_cset_desc;

680 

681    /* Descriptor for converting from the source character set to the

682      wide execution character set.  */

683    struct cset_converter wide_cset_desc;

684 

685    /* Date and time text. Calculated together if either is requested.  */

686    const uchar *date;

687    const uchar *time;

688 

689    /* EOF token, and a token forcing paste avoidance.  */

690    cpp_token avoid_paste;

691    cpp_token eof;

692 

693    /* Opaque handle to the dependencies of mkdeps.c.  */

694    struct deps *deps;

695 

696    /* Obstack holding all macro hash nodes. This never shrinks.

697      See cpphash.c */

698    struct obstack hash_ob;

699 

700    /* Obstack holding buffer and conditional structures. This is a

701      real stack.  See cpplib.c.  */

702    struct obstack buffer_ob;

703 

704    /* Pragma table - dynamic, because a library user can add to the

705      list of recognized pragmas.  */

706    struct pragma_entry *pragmas;

707 

708   /* Call backs to cpplib client.  */

709    struct cpp_callbacks cb;

710 

711     /* Identifier hash table.  */

712    struct ht *hash_table;

713 

714    /* Expression parser stack.  */

715    struct op *op_stack, *op_limit;

716 

717    /* User visible options.  */

718    struct cpp_options opts;

719 

720   /* Special nodes - identifiers with predefined significance to the

721      preprocessor.  */

722    struct spec_nodes spec_nodes;

723 

724    /* Whether cpplib owns the hashtable.  */

725    bool our_hashtable;

726 

727    /* Traditional preprocessing output buffer (a logical line).  */

728    struct

729    {

730      uchar *base;

731      uchar *limit;

732      uchar *cur;

733      fileline first_line;

734    } out;

735 

736    /* Used to save the original line number during traditional

737      preprocessing.  */

738    unsigned int saved_line;

739 

740    /* A saved list of the defined macros, for dependency checking

741      of precompiled headers.  */

742    struct cpp_savedstate *savedstate;

743  };

 

下面cpp_create_reader定义中的参数langc_lang类型,这个类型的定义如下:

 

154  enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_STDC89, CLK_STDC94,           

155             CLK_STDC99, CLK_GNUCXX, CLK_CXX98, CLK_ASM};

 

注意到在c_common_init_options205行,对于CC++前端,CLK_GNUCXXCLK_GNUC89是仅有的可用值。而在cpp_create_reader132行,在v2.7版本后, init_library不作任何事情。

 

126  cpp_reader *

127  cpp_create_reader (enum c_lang lang, hash_table *table)                               in cppinit.c

128  {

129    cpp_reader *pfile;

130 

131    /* Initialize this instance of the library if it hasn't been already.  */

132    init_library ();

133 

134    pfile = xcalloc (1, sizeof (cpp_reader));

135 

136    cpp_set_lang (pfile, lang);

 

在上面的136cpp_set_langcpp_reader对象设置了语言的信息。它有如下的定义。

 

92    void

93    cpp_set_lang (cpp_reader *pfile, enum c_lang lang)                                      in cppinit.c

94    {

95      const struct lang_flags *l = &lang_defaults[(int) lang];

96   

97      CPP_OPTION (pfile, lang) = lang;

98   

99      CPP_OPTION (pfile, c99)              = l->c99;

100    CPP_OPTION (pfile, cplusplus)      = l->cplusplus;

101    CPP_OPTION (pfile, extended_numbers)       = l->extended_numbers;

102    CPP_OPTION (pfile, std)        = l->std;

103    CPP_OPTION (pfile, trigraphs)       = l->std;

104    CPP_OPTION (pfile, cplusplus_comments)    = l->cplusplus_comments;

105    CPP_OPTION (pfile, digraphs)       = l->digraphs;

106  }

 

在上面95行的lang_defaults,是一个描述所支持语言特性的全局数组。具体来说,c99域如果是非0值,表示语言符合1999 C标准。域std如果为非0值,表示语言符合指定的C/C++标准。域extended_number如果为非0值,表示语言允许16进制的浮点数及用于数值常量的LL前缀。域cplusplus_comments如果为非0值,表示语言允许C++“//”风格的注释。域digraphs如果为非0值,表示语言支持ISO的复合字母序列(digraph sequence)。

 

69    struct lang_flags                                                                                        in cppinit.c

70    {

71      char c99;

72      char cplusplus;

73      char extended_numbers;

74      char std;

75      char cplusplus_comments;

76      char digraphs;

77    };

78   

79    static const struct lang_flags lang_defaults[] =                                               in cppinit.c

80    { /*              c99 c++ xnum std  //   digr  */

81      /* GNUC89 */  { 0,  0,  1,   0,   1,   1     },

82      /* GNUC99 */  { 1,  0,  1,   0,   1,   1     },

83      /* STDC89 */  { 0,  0,  0,   1,   0,   0     },

84      /* STDC94 */  { 0,  0,  0,   1,   0,   1     },

85      /* STDC99 */  { 1,  0,  1,   1,   1,   1     },

86     /* GNUCXX */  { 0,  1,  1,   0,   1,   1     },

87      /* CXX98  */  { 0,  1,  1,   1,   1,   1     },

88      /* ASM    */  { 0,  0,  1,   0,   1,   0     }

89    };

 

cpp_set_lang97CPP_OPTION被定义为如下的域选择子。

 

#define CPP_OPTION(PFILE, OPTION) ((PFILE)->opts.OPTION)        in cpphash.h

 

从这个定义,我们可以知道这个宏可被用于初始化cpp_readeropts域。这个域具有cpp_options类型。它是一组用于控制词法分析和预处理的旗标。

 

211   struct cpp_options                                                                               in cpplib.h

212  {

213    /* Characters between tab stops.  */

214    unsigned int tabstop;

215 

216    /* The language we're preprocessing.  */

217    enum c_lang lang;

218 

219    /* Nonzero means use extra default include directories for C++.  */

220    unsigned char cplusplus;

221 

222   /* Nonzero means handle cplusplus style comments.  */

223    unsigned char cplusplus_comments;

224 

225   /* Nonzero means define __OBJC__, treat @ as a special token, and

226      use the OBJC[PLUS]_INCLUDE_PATH environment variable.  */

227    unsigned char objc;

228 

229    /* Nonzero means don't copy comments into the output file.  */

230    unsigned char discard_comments;

231 

232    /* Nonzero means don't copy comments into the output file during

233      macro expansion.  */

234    unsigned char discard_comments_in_macro_exp;

235 

236   /* Nonzero means process the ISO trigraph sequences.  */

237   unsigned char trigraphs;

238 

239    /* Nonzero means process the ISO digraph sequences.  */

240    unsigned char digraphs;

241 

242    /* Nonzero means to allow hexadecimal floats and LL suffixes.  */

243    unsigned char extended_numbers;

244 

245    /* Nonzero means print names of header files (-H).  */

246    unsigned char print_include_names;

247 

248    /* Nonzero means cpp_pedwarn causes a hard error.  */

249    unsigned char pedantic_errors;

250 

251    /* Nonzero means don't print warning messages.  */

252    unsigned char inhibit_warnings;

253 

254   /* Nonzero means complain about deprecated features.  */

255    unsigned char warn_deprecated;

256 

257    /* Nonzero means don't suppress warnings from system headers.  */

258    unsigned char warn_system_headers;

259 

260    /* Nonzero means don't print error messages. Has no option to

261      select it, but can be set by a user of cpplib (e.g. fix-header).  */

262    unsigned char inhibit_errors;

263 

264   /* Nonzero means warn if slash-star appears in a comment.  */

265    unsigned char warn_comments;

266 

267    /* Nonzero means warn if there are any trigraphs.  */

268    unsigned char warn_trigraphs;

269 

270   /* Nonzero means warn about multicharacter charconsts.  */

271    unsigned char warn_multichar;

272 

273    /* Nonzero means warn about various incompatibilities with

274      traditional C.  */

275    unsigned char warn_traditional;

276 

277   /* Nonzero means warn about long long numeric constants.  */

278    unsigned char warn_long_long;

279 

280    /* Nonzero means warn about text after an #endif (or #else).  */

281    unsigned char warn_endif_labels;

282 

283    /* Nonzero means warn about implicit sign changes owing to integer

284      promotions.  */

285    unsigned char warn_num_sign_change;

286 

287    /* Nonzero means turn warnings into errors.  */

288    unsigned char warnings_are_errors;

289 

290    /* Nonzero means we should look for header.gcc files that remap file

291      names.  */

292    unsigned char remap;

293 

294    /* Zero means dollar signs are punctuation.  */

295    unsigned char dollars_in_ident;

296 

297    /* True if we should warn about dollars in identifiers or numbers

298      for this translation unit.  */

299    unsigned char warn_dollars;

300 

301    /* Nonzero means warn if undefined identifiers are evaluated in an #if.  */

302    unsigned char warn_undef;

303 

304    /* Nonzero means warn of unused macros from the main file.  */

305    unsigned char warn_unused_macros;

306 

307    /* Nonzero for the 1999 C Standard, including corrigenda and amendments.  */

308    unsigned char c99;

309 

310    /* Nonzero if we are conforming to a specific C or C++ standard.  */

311     unsigned char std;

312 

313    /* Nonzero means give all the error messages the ANSI standard requires.  */

314    unsigned char pedantic;

315 

316    /* Nonzero means we're looking at already preprocessed code, so don't

317      bother trying to do macro expansion and whatnot.  */

318    unsigned char preprocessed;

319 

320   /* Print column number in error messages.  */

321    unsigned char show_column;

322 

323    /* Nonzero means handle C++ alternate operator names.  */

324    unsigned char operator_names;

325 

326    /* True for traditional preprocessing.  */

327    unsigned char traditional;

328 

329    /* Holds the name of the target (execution) character set.  */

330    const char *narrow_charset;

331 

332    /* Holds the name of the target wide character set.  */

333    const char *wide_charset;

334 

335   /* Holds the name of the input character set.  */

336    const char *input_charset;

337 

338   /* True to warn about precompiled header files we couldn't use.  */

339    bool warn_invalid_pch;

340 

341    /* True if dependencies should be restored from a precompiled header.  */

342    bool restore_pch_deps;

343 

344    /* Dependency generation.  */

345    struct

346    {

347      /* Style of header dependencies to generate.  */

348      enum {DEPS_NONE = 0, DEPS_USER, DEPS_SYSTEM } style;

349 

350      /* Assume missing files are generated files.  */

351      bool missing_files;

352 

353      /* Generate phony targets for each dependency apart from the first

354        one.  */

355      bool phony_targets;

356 

357      /* If true, no dependency is generated on the main file.  */

358      bool ignore_main_file;

359    } deps;

360 

361    /* Target-specific features set by the front end or client.  */

362 

363    /* Precision for target CPP arithmetic, target characters, target

364      ints and target wide characters, respectively.  */

365    size_t precision, char_precision, int_precision, wchar_precision;

366 

367    /* True means chars (wide chars) are unsigned.  */

368    bool unsigned_char, unsigned_wchar;

369 

370   /* True if the most significant byte in a word has the lowest

371      address in memory.  */

372    bool bytes_big_endian;

373 

374    /* Nonzero means __STDC__ should have the value 0 in system headers.  */

375    unsigned char stdc_0_in_system_headers;

376  };

 

接下来cpp_create_reader接着初始化opts的某些域,这些域都是很基本的,已预先确定的设置。同样在169行,把初始的头文件查找路径设置为空(“”)。

 

cpp_create_reader (continue)

 

137    CPP_OPTION (pfile, warn_multichar) = 1;

138    CPP_OPTION (pfile, discard_comments) = 1;

139    CPP_OPTION (pfile, discard_comments_in_macro_exp) = 1;

140    CPP_OPTION (pfile, show_column) = 1;

141    CPP_OPTION (pfile, tabstop) = 8;

142    CPP_OPTION (pfile, operator_names) = 1;

143    CPP_OPTION (pfile, warn_trigraphs) = 2;

144    CPP_OPTION (pfile, warn_endif_labels) = 1;

145    CPP_OPTION (pfile, warn_deprecated) = 1;

146    CPP_OPTION (pfile, warn_long_long) = !CPP_OPTION (pfile, c99);

147    CPP_OPTION (pfile, dollars_in_ident) = 1;

148    CPP_OPTION (pfile, warn_dollars) = 1;

149 

150    /* Default CPP arithmetic to something sensible for the host for the

151      benefit of dumb users like fix-header.  */

152    CPP_OPTION (pfile, precision) = CHAR_BIT * sizeof (long);

153    CPP_OPTION (pfile, char_precision) = CHAR_BIT;

154    CPP_OPTION (pfile, wchar_precision) = CHAR_BIT * sizeof (int);

155    CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);

156    CPP_OPTION (pfile, unsigned_char) = 0;

157    CPP_OPTION (pfile, unsigned_wchar) = 1;

158    CPP_OPTION (pfile, bytes_big_endian) = 1;  /* does not matter */

159 

160   /* Default to locale/UTF-8.  */

161    CPP_OPTION (pfile, narrow_charset) = _cpp_default_encoding ();

162    CPP_OPTION (pfile, wide_charset) = 0;

163    CPP_OPTION (pfile, input_charset) = _cpp_default_encoding ();

164 

165    /* A fake empty "directory" used as the starting point for files

166      looked up without a search path. Name cannot be '/' because we

167      don't want to prepend anything at all to filenames using it. All

168      other entries are correct zero-initialized.  */

169    pfile->no_search_path.name = (char *) "";

170 

171   /* Initialize the line map. Start at logical line 1, so we can use

172      a line number of zero for special states.  */

173    linemap_init (&pfile->line_maps);

174    pfile->line = 1;

 

宏和被包含文件会被预处理器展开,然后作为词法分析器的输入。当发现错误时,词法分析器不能知道错误在源文件中的位置,只能知道它在预处理结果中的位置。语法分析器也有同样的问题。因此要给出正确的出错消息,需要知道源文件和预处理结果间的映射关系。结构line_maps为此目的而定义。

 

58    struct line_maps                                                                                         in line-map.h

59    {

60      struct line_map *maps;

61      unsigned int allocated;

62      unsigned int used;

63   

64      /* The most recently listed include stack, if any, starts with

65        LAST_LISTED as the topmost including file. -1 indicates nothing

66        has been listed yet.  */

67      int last_listed;

68   

69      /* Depth of the include stack, including the current file.  */

70      unsigned int depth;

71   

72      /* If true, prints an include trace a la -H.  */

73      bool trace_includes;

74    };

 

39    /* The logical line FROM_LINE maps to physical source file TO_FILE at

40      line TO_LINE, and subsequently one-to-one until the next line_map

41      structure in the set. INCLUDED_FROM is an index into the set that

42      gives the line mapping at whose end the current one was included.

43      File(s) at the bottom of the include stack have this set to -1.

44      REASON is the reason for creation of this line map, SYSP is one for

45      a system header, two for a C system header file that therefore

46      needs to be extern "C" protected in C++, and zero otherwise.  */

47    struct line_map                                                                                          in line-map.h

48    {

49      const char *to_file;

50      unsigned int to_line;

51      source_location from_line;

52      int included_from;

53      ENUM_BITFIELD (lc_reason) reason : CHAR_BIT;

54      unsigned char sysp;

55    };

 

26    /* Reason for adding a line change with add_line_map (). LC_ENTER is

27      when including a new file, e.g. a #include directive in C.

28      LC_LEAVE is when reaching a file's end. LC_RENAME is when a file

29      name or line number changes for neither of the above reasons

30      (e.g. a #line directive in C).  */

31    enum lc_reason {LC_ENTER = 0, LC_LEAVE, LC_RENAME};

 

在后面的章节可以看到,逻辑位置与物理位置的映射实际上不简单,每包含一个文件,需要建立一个新的映射关系,而从包含文件返回,也需要重新建立一个映射关系。

cpp_create_reader173行,linemap_init被调用初始化line_maps

 

32    void

33    linemap_init (struct line_maps *set)

34    {

35      set->maps = 0;

36      set->allocated = 0;

37      set->used = 0;

38      set->last_listed = -1;

39      set->trace_includes = false;

40      set->depth = 0;

41    }

 

cpp_create_reader (continue)

 

176   /* Initialize lexer state.  */

177    pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);

178 

179    /* Set up static tokens.  */

180    pfile->avoid_paste.type = CPP_PADDING;

181    pfile->avoid_paste.val.source = NULL;

182    pfile->eof.type = CPP_EOF;

183    pfile->eof.flags = 0;

 

上面177行,cpp_readerstate域,记录了词法分析器的状态,域avoid_paste域是强制避免粘贴的符号(token),它具有cpp_token类型,下面我们会看到。域eof代表文件的结尾。

 

cpp_create_reader (continue)

 

185    /* Create a token buffer for the lexer.  */

186    _cpp_init_tokenrun (&pfile->base_run, 250);

187    pfile->cur_run = &pfile->base_run;

188    pfile->cur_token = pfile->base_run.base;

 

base_run是块大小为250个符号(token)的双向链表。一开始,只有一个块被分配出来。它具有tokenrun类型,其定义如下。

 

158  typedef struct tokenrun tokenrun;

159  struct tokenrun                                                                                          in cpphash.h

160  {

161    tokenrun *next, *prev;

162    cpp_token *base, *limit;

163  };

 

644  void

645  _cpp_init_tokenrun (tokenrun *run, unsigned int count)                                 in cpplex.c

646  {

647    run->base = xnewvec (cpp_token, count);

648    run->limit = run->base + count;

649    run->next = NULL;

650  }

 

cpp_create_reader (continue)

 

190    /* Initialize the base context.  */

191    pfile->context = &pfile->base_context;

192    pfile->base_context.macro = 0;

193    pfile->base_context.prev = pfile->base_context.next = 0;

 

cpp_reader中的域context是用于处理宏展开的缓存,它也是双向链表。它的定义如下。

 

172  struct cpp_context                                                                        in cpphash.h

173  {

174    /* Doubly-linked list.  */

175    cpp_context *next, *prev;

176 

177    union

178    {

179      /* For ISO macro expansion. Contexts other than the base context

180        are contiguous tokens. e.g. macro expansions, expanded

181        argument tokens.  */

182      struct

183      {

184        union utoken first;

185        union utoken last;

186      } iso;

187 

188      /* For traditional macro expansion.  */

189      struct

190      {

191        const uchar *cur;

192        const uchar *rlimit;

193      } trad;

194    } u;

195 

196   /* If non-NULL, a buffer used for storage related to this context.

197      When the context is popped, the buffer is released.  */

198    _cpp_buff *buff;

199 

200   /* For a macro context, the macro node, otherwise NULL.  */

201    cpp_hashnode *macro;

202 

203    /* True if utoken element is token, else ptoken.  */

204    bool direct_p;

205  };

 

177194行的union用于处理传统(traditional)及ISO的宏展开。如果我们使用–traditional选项进行编译,编译器采用传统方式展开(用于处理标准化之前的语言)。传统和ISO宏展开的主要区别是,前者展开为文本(text)而后者展开为符号串。因此,我们看到182 ~ 186行,用于ISO宏的结构中,包含了utoken类型的union结构。

 

151  union utoken                                                                                             in cpphash.h

152  {

153    const cpp_token *token;

154    const cpp_token **ptoken;

155  };

 

cpp_create_reader (continue)

 

196   /* Aligned and unaligned storage.  */

197    pfile->a_buff = _cpp_get_buff (pfile, 0);

198    pfile->u_buff = _cpp_get_buff (pfile, 0);

 

a_buffu_buffcpp_reader用到的另2个缓存。其中u_buff用于处理字符串(’#’预处理操作符),a_buff用于其它处理。为了管理内存,cpp_reader使用free_buffs来保存从a_buff u_buff释放的块。缓存具有如下的定义及操作。

 

130  struct _cpp_buff                                                                                        in cpphash.h

131  {

132    struct _cpp_buff *next;

133    unsigned char *base, *cur, *limit;

134  };

 

1423 _cpp_buff *

1424 _cpp_get_buff (cpp_reader *pfile, size_t min_size)                                       in cpplex.c

1425 {

1426   _cpp_buff *result, **p;

1427

1428   for (p = &pfile->free_buffs;; p = &(*p)->next)

1429   {

1430     size_t size;

1431

1432     if (*p == NULL)

1433        return new_buff (min_size);

1434     result = *p;

1435     size = result->limit - result->base;

1436     /* Return a buffer that's big enough, but don't waste one that's

1437       way too big.  */

1438     if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))

1439       break;

1440   }

1441

1442   *p = result->next;

1443   result->next = NULL;

1444   result->cur = result->base;

1445   return result;

1446 }

 

1391 static _cpp_buff *

1392 new_buff (size_t len)                                                                                 in cpplex.c

1393 {

1394   _cpp_buff  *result;

1395   unsigned char *base;

1396

1397   if (len < MIN_BUFF_SIZE)

1398     len = MIN_BUFF_SIZE;

1399   len = CPP_ALIGN (len);

1400

1401   base = xmalloc (len + sizeof (_cpp_buff));

1402   result = (_cpp_buff *) (base + len);

1403   result->base = base;

1404   result->cur = base;

1405   result->limit = base + len;

1406   result->next = NULL;

1407   return result;

1408 }

你可能感兴趣的:(struct,buffer,token,character,include,Comments)