Studying note of GCC-3.4.6 source (19)

3.2.1. Create cpp_reader

At line 205 above, cpp_create_reader tries to create a cpp_reader object. For language requires preprocessor like C and C++, usually compiler should provide a pass to preprocess the source. But in current version GCC, using cpp_reader, it implements a on-fly preprocessing (preprocessing as soon as readin) and doesn’t need the specific pass any more.

Out of this aim, the definition of cpp_reader is quite complex. In later chapters about lexer and parser, we will come back to see its functionality.

 

602  struct cpp_reader                                                                                       in cpphash.h

603  {

604    /* Top of buffer stack.  */

605    cpp_buffer *buffer;

606 

607   /* Overlaid buffer (can be different after processing #include).  */

608    cpp_buffer *overlaid_buffer;

609 

610    /* Lexer state.  */

611     struct lexer_state state;

612 

613    /* Source line tracking.  */

614    struct line_maps line_maps;

615    const struct line_map *map;

616    fileline line;

617 

618    /* The line of the '#' of the current directive.  */

619    fileline directive_line;

620 

621    /* Memory buffers.  */

622    _cpp_buff *a_buff;           /* Aligned permanent storage.  */

623    _cpp_buff *u_buff;          /* Unaligned permanent storage.  */

624    _cpp_buff *free_buffs;     /* Free buffer chain.  */

625 

626    /* Context stack.  */

627    struct cpp_context base_context;

628    struct cpp_context *context;

629 

630    /* If in_directive, the directive if known.  */

631    const struct directive *directive;

632 

633    /* Search paths for include files.  */

634    struct cpp_dir *quote_include;         /* "" */

635    struct cpp_dir *bracket_include;       /* <> */

636    struct cpp_dir no_search_path;  /* No path.  */

637 

638    /* Chain of all hashed _cpp_file instances.  */

639    struct _cpp_file *all_files;

640 

641    struct _cpp_file *main_file;

642 

643    /* File and directory hash table.  */

644    struct htab *file_hash;

645    struct htab *dir_hash;

646    struct file_hash_entry *file_hash_entries;

647    unsigned int file_hash_entries_allocated, file_hash_entries_used;

648 

649    /* Nonzero means don't look for #include "foo" the source-file

650      directory.  */

651    bool quote_ignores_source_dir;

652 

653    /* Nonzero if any file has contained #pragma once or #import has

654      been used.  */

655    bool seen_once_only;

656 

657    /* Multiple include optimization.  */

658    const cpp_hashnode *mi_cmacro;

659    const cpp_hashnode *mi_ind_cmacro;

660    bool mi_valid;

661 

662    /* Lexing.  */

663    cpp_token *cur_token;

664    tokenrun base_run, *cur_run;

665    unsigned int lookaheads;

666 

667    /* Nonzero prevents the lexer from re-using the token runs.  */

668    unsigned int keep_tokens;

669 

670    /* Error counter for exit code.  */

671    unsigned int errors;

672 

673    /* Buffer to hold macro definition string.  */

674    unsigned char *macro_buffer;

675    unsigned int macro_buffer_len;

676 

677    /* Descriptor for converting from the source character set to the

678      execution character set.  */

679    struct cset_converter narrow_cset_desc;

680 

681    /* Descriptor for converting from the source character set to the

682      wide execution character set.  */

683    struct cset_converter wide_cset_desc;

684 

685    /* Date and time text. Calculated together if either is requested.  */

686    const uchar *date;

687    const uchar *time;

688 

689    /* EOF token, and a token forcing paste avoidance.  */

690    cpp_token avoid_paste;

691    cpp_token eof;

692 

693    /* Opaque handle to the dependencies of mkdeps.c.  */

694    struct deps *deps;

695 

696    /* Obstack holding all macro hash nodes. This never shrinks.

697      See cpphash.c */

698    struct obstack hash_ob;

699 

700    /* Obstack holding buffer and conditional structures. This is a

701      real stack.  See cpplib.c.  */

702    struct obstack buffer_ob;

703 

704    /* Pragma table - dynamic, because a library user can add to the

705      list of recognized pragmas.  */

706    struct pragma_entry *pragmas;

707 

708   /* Call backs to cpplib client.  */

709    struct cpp_callbacks cb;

710 

711     /* Identifier hash table.  */

712    struct ht *hash_table;

713 

714    /* Expression parser stack.  */

715    struct op *op_stack, *op_limit;

716 

717    /* User visible options.  */

718    struct cpp_options opts;

719 

720   /* Special nodes - identifiers with predefined significance to the

721      preprocessor.  */

722    struct spec_nodes spec_nodes;

723 

724    /* Whether cpplib owns the hashtable.  */

725    bool our_hashtable;

726 

727    /* Traditional preprocessing output buffer (a logical line).  */

728    struct

729    {

730      uchar *base;

731      uchar *limit;

732      uchar *cur;

733      fileline first_line;

734    } out;

735 

736    /* Used to save the original line number during traditional

737      preprocessing.  */

738    unsigned int saved_line;

739 

740    /* A saved list of the defined macros, for dependency checking

741      of precompiled headers.  */

742    struct cpp_savedstate *savedstate;

743  };

 

Below the parameter lang of cpp_create_reader is type of c_lang which has definition as below:

 

154  enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_STDC89, CLK_STDC94,           

155             CLK_STDC99, CLK_GNUCXX, CLK_CXX98, CLK_ASM};

 

We see that, at line 205 in c_common_init_options, for C or C++ front-end, CLK_GNUCXX and CLK_GNUC89 are the only values applicable. At line 132 in cpp_create_reader below, after v2.7 init_library does nothing.

 

126  cpp_reader *

127  cpp_create_reader (enum c_lang lang, hash_table *table)                               in cppinit.c

128  {

129    cpp_reader *pfile;

130 

131    /* Initialize this instance of the library if it hasn't been already.  */

132    init_library ();

133 

134    pfile = xcalloc (1, sizeof (cpp_reader));

135 

136    cpp_set_lang (pfile, lang);

137 

At line 136 above, cpp_set_lang sets information about the language into the cpp_reader object. It has the definition as below.

 

92    void

93    cpp_set_lang (cpp_reader *pfile, enum c_lang lang)                                      in cppinit.c

94    {

95      const struct lang_flags *l = &lang_defaults[(int) lang];

96   

97      CPP_OPTION (pfile, lang) = lang;

98   

99      CPP_OPTION (pfile, c99)              = l->c99;

100    CPP_OPTION (pfile, cplusplus)      = l->cplusplus;

101    CPP_OPTION (pfile, extended_numbers)       = l->extended_numbers;

102    CPP_OPTION (pfile, std)        = l->std;

103    CPP_OPTION (pfile, trigraphs)       = l->std;

104    CPP_OPTION (pfile, cplusplus_comments)    = l->cplusplus_comments;

105    CPP_OPTION (pfile, digraphs)       = l->digraphs;

106  }

 

lang_defaults, at line 95 above, is a global array decribes the characteristics of the supported langauges. In detail, c99 field if nonzero, means the language conforms to 1999 C standard. std field if nonzero, means the language conforms to specific C/C++ standard. extended_number field if nonzero, means the language allows hexadecimal floats and LL suffixes for numeric constant. cplusplus_comments field if nonzero, means the language allows “//” style comment of C++. digraphs field if nonzero, means the language supports the ISO digraph sequences.

 

69    struct lang_flags                                                                                        in cppinit.c

70    {

71      char c99;

72      char cplusplus;

73      char extended_numbers;

74      char std;

75      char cplusplus_comments;

76      char digraphs;

77    };

78   

79    static const struct lang_flags lang_defaults[] =                                               in cppinit.c

80    { /*              c99 c++ xnum std  //   digr  */

81      /* GNUC89 */  { 0,  0,  1,   0,   1,   1     },

82      /* GNUC99 */  { 1,  0,  1,   0,   1,   1     },

83      /* STDC89 */  { 0,  0,  0,   1,   0,   0     },

84      /* STDC94 */  { 0,  0,  0,   1,   0,   1     },

85      /* STDC99 */  { 1,  0,  1,   1,   1,   1     },

86     /* GNUCXX */  { 0,  1,  1,   0,   1,   1     },

87      /* CXX98  */  { 0,  1,  1,   1,   1,   1     },

88      /* ASM    */  { 0,  0,  1,   0,   1,   0     }

89    };

 

At line 97 in cpp_set_lang, macro CPP_OPTION is defined as the field selector as following:

 

#define CPP_OPTION(PFILE, OPTION) ((PFILE)->opts.OPTION)        in cpphash.h

 

From the definition, we can see that the macro can be used to initialize the opts field of cpp_reader, which is of type cpp_options. It is the collection of flags to control the behavior of lexer and preprocessor.

 

211   struct cpp_options                                                                               in cpplib.h

212  {

213    /* Characters between tab stops.  */

214    unsigned int tabstop;

215 

216    /* The language we're preprocessing.  */

217    enum c_lang lang;

218 

219    /* Nonzero means use extra default include directories for C++.  */

220    unsigned char cplusplus;

221 

222   /* Nonzero means handle cplusplus style comments.  */

223    unsigned char cplusplus_comments;

224 

225   /* Nonzero means define __OBJC__, treat @ as a special token, and

226      use the OBJC[PLUS]_INCLUDE_PATH environment variable.  */

227    unsigned char objc;

228 

229    /* Nonzero means don't copy comments into the output file.  */

230    unsigned char discard_comments;

231 

232    /* Nonzero means don't copy comments into the output file during

233      macro expansion.  */

234    unsigned char discard_comments_in_macro_exp;

235 

236   /* Nonzero means process the ISO trigraph sequences.  */

237    unsigned char trigraphs;

238 

239    /* Nonzero means process the ISO digraph sequences.  */

240    unsigned char digraphs;

241 

242    /* Nonzero means to allow hexadecimal floats and LL suffixes.  */

243    unsigned char extended_numbers;

244 

245    /* Nonzero means print names of header files (-H).  */

246    unsigned char print_include_names;

247 

248    /* Nonzero means cpp_pedwarn causes a hard error.  */

249    unsigned char pedantic_errors;

250 

251    /* Nonzero means don't print warning messages.  */

252    unsigned char inhibit_warnings;

253 

254   /* Nonzero means complain about deprecated features.  */

255    unsigned char warn_deprecated;

256 

257    /* Nonzero means don't suppress warnings from system headers.  */

258    unsigned char warn_system_headers;

259 

260    /* Nonzero means don't print error messages. Has no option to

261      select it, but can be set by a user of cpplib (e.g. fix-header).  */

262    unsigned char inhibit_errors;

263 

264   /* Nonzero means warn if slash-star appears in a comment.  */

265    unsigned char warn_comments;

266 

267    /* Nonzero means warn if there are any trigraphs.  */

268    unsigned char warn_trigraphs;

269 

270   /* Nonzero means warn about multicharacter charconsts.  */

271    unsigned char warn_multichar;

272 

273    /* Nonzero means warn about various incompatibilities with

274      traditional C.  */

275    unsigned char warn_traditional;

276 

277   /* Nonzero means warn about long long numeric constants.  */

278    unsigned char warn_long_long;

279 

280    /* Nonzero means warn about text after an #endif (or #else).  */

281    unsigned char warn_endif_labels;

282 

283    /* Nonzero means warn about implicit sign changes owing to integer

284      promotions.  */

285    unsigned char warn_num_sign_change;

286 

287    /* Nonzero means turn warnings into errors.  */

288    unsigned char warnings_are_errors;

289 

290    /* Nonzero means we should look for header.gcc files that remap file

291      names.  */

292    unsigned char remap;

293 

294    /* Zero means dollar signs are punctuation.  */

295    unsigned char dollars_in_ident;

296 

297    /* True if we should warn about dollars in identifiers or numbers

298      for this translation unit.  */

299    unsigned char warn_dollars;

300 

301    /* Nonzero means warn if undefined identifiers are evaluated in an #if.  */

302    unsigned char warn_undef;

303 

304    /* Nonzero means warn of unused macros from the main file.  */

305    unsigned char warn_unused_macros;

306 

307    /* Nonzero for the 1999 C Standard, including corrigenda and amendments.  */

308    unsigned char c99;

309 

310    /* Nonzero if we are conforming to a specific C or C++ standard.  */

311     unsigned char std;

312 

313    /* Nonzero means give all the error messages the ANSI standard requires.  */

314    unsigned char pedantic;

315 

316    /* Nonzero means we're looking at already preprocessed code, so don't

317      bother trying to do macro expansion and whatnot.  */

318    unsigned char preprocessed;

319 

320   /* Print column number in error messages.  */

321    unsigned char show_column;

322 

323    /* Nonzero means handle C++ alternate operator names.  */

324    unsigned char operator_names;

325 

326    /* True for traditional preprocessing.  */

327    unsigned char traditional;

328 

329    /* Holds the name of the target (execution) character set.  */

330    const char *narrow_charset;

331 

332    /* Holds the name of the target wide character set.  */

333    const char *wide_charset;

334 

335   /* Holds the name of the input character set.  */

336    const char *input_charset;

337 

338   /* True to warn about precompiled header files we couldn't use.  */

339    bool warn_invalid_pch;

340 

341    /* True if dependencies should be restored from a precompiled header.  */

342    bool restore_pch_deps;

343 

344    /* Dependency generation.  */

345    struct

346    {

347      /* Style of header dependencies to generate.  */

348      enum {DEPS_NONE = 0, DEPS_USER, DEPS_SYSTEM } style;

349 

350      /* Assume missing files are generated files.  */

351      bool missing_files;

352 

353      /* Generate phony targets for each dependency apart from the first

354        one.  */

355      bool phony_targets;

356 

357      /* If true, no dependency is generated on the main file.  */

358      bool ignore_main_file;

359    } deps;

360 

361    /* Target-specific features set by the front end or client.  */

362 

363    /* Precision for target CPP arithmetic, target characters, target

364      ints and target wide characters, respectively.  */

365    size_t precision, char_precision, int_precision, wchar_precision;

366 

367    /* True means chars (wide chars) are unsigned.  */

368    bool unsigned_char, unsigned_wchar;

369 

370   /* True if the most significant byte in a word has the lowest

371      address in memory.  */

372    bool bytes_big_endian;

373 

374    /* Nonzero means __STDC__ should have the value 0 in system headers.  */

375    unsigned char stdc_0_in_system_headers;

376  };

 

cpp_create_reader continue to initialize some fields of opts, which are very basic settings and can be determined beforehand. And the same is initial header files search path, it is set as nil (“”) at line 169 below.

 

cpp_create_reader (continue)

 

137    CPP_OPTION (pfile, warn_multichar) = 1;

138    CPP_OPTION (pfile, discard_comments) = 1;

139    CPP_OPTION (pfile, discard_comments_in_macro_exp) = 1;

140    CPP_OPTION (pfile, show_column) = 1;

141    CPP_OPTION (pfile, tabstop) = 8;

142    CPP_OPTION (pfile, operator_names) = 1;

143    CPP_OPTION (pfile, warn_trigraphs) = 2;

144    CPP_OPTION (pfile, warn_endif_labels) = 1;

145    CPP_OPTION (pfile, warn_deprecated) = 1;

146    CPP_OPTION (pfile, warn_long_long) = !CPP_OPTION (pfile, c99);

147    CPP_OPTION (pfile, dollars_in_ident) = 1;

148    CPP_OPTION (pfile, warn_dollars) = 1;

149 

150    /* Default CPP arithmetic to something sensible for the host for the

151      benefit of dumb users like fix-header.  */

152    CPP_OPTION (pfile, precision) = CHAR_BIT * sizeof (long);

153    CPP_OPTION (pfile, char_precision) = CHAR_BIT;

154    CPP_OPTION (pfile, wchar_precision) = CHAR_BIT * sizeof (int);

155    CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);

156    CPP_OPTION (pfile, unsigned_char) = 0;

157    CPP_OPTION (pfile, unsigned_wchar) = 1;

158    CPP_OPTION (pfile, bytes_big_endian) = 1;  /* does not matter */

159 

160   /* Default to locale/UTF-8.  */

161    CPP_OPTION (pfile, narrow_charset) = _cpp_default_encoding ();

162    CPP_OPTION (pfile, wide_charset) = 0;

163    CPP_OPTION (pfile, input_charset) = _cpp_default_encoding ();

164 

165    /* A fake empty "directory" used as the starting point for files

166      looked up without a search path. Name cannot be '/' because we

167      don't want to prepend anything at all to filenames using it. All

168      other entries are correct zero-initialized.  */

169    pfile->no_search_path.name = (char *) "";

170 

171   /* Initialize the line map. Start at logical line 1, so we can use

172      a line number of zero for special states.  */

173    linemap_init (&pfile->line_maps);

174    pfile->line = 1;

 

Macros and included files will be expanded by preprocessor, and then be feed in lexer. When error found, the lexer can’t tell the location in the source file, but the location in the code after preprocessing. The same happens in parser. So it requires a mapping between source file and the proprocessed output. Structure line_maps is provided for this purpose.

 

58    struct line_maps                                                                                         in line-map.h

59    {

60      struct line_map *maps;

61      unsigned int allocated;

62      unsigned int used;

63   

64      /* The most recently listed include stack, if any, starts with

65        LAST_LISTED as the topmost including file. -1 indicates nothing

66        has been listed yet.  */

67      int last_listed;

68   

69      /* Depth of the include stack, including the current file.  */

70      unsigned int depth;

71   

72      /* If true, prints an include trace a la -H.  */

73      bool trace_includes;

74    };

 

39    /* The logical line FROM_LINE maps to physical source file TO_FILE at

40      line TO_LINE, and subsequently one-to-one until the next line_map

41      structure in the set. INCLUDED_FROM is an index into the set that

42      gives the line mapping at whose end the current one was included.

43      File(s) at the bottom of the include stack have this set to -1.

44      REASON is the reason for creation of this line map, SYSP is one for

45      a system header, two for a C system header file that therefore

46      needs to be extern "C" protected in C++, and zero otherwise.  */

47    struct line_map                                                                                          in line-map.h

48    {

49      const char *to_file;

50      unsigned int to_line;

51      source_location from_line;

52      int included_from;

53      ENUM_BITFIELD (lc_reason) reason : CHAR_BIT;

54      unsigned char sysp;

55    };

 

26    /* Reason for adding a line change with add_line_map (). LC_ENTER is

27      when including a new file, e.g. a #include directive in C.

28      LC_LEAVE is when reaching a file's end. LC_RENAME is when a file

29      name or line number changes for neither of the above reasons

30      (e.g. a #line directive in C).  */

31    enum lc_reason {LC_ENTER = 0, LC_LEAVE, LC_RENAME};

 

In later chapter, we can see that the setting up mapping between logical postion to physical position is not simple. Every time including a file, it needs setup a new mapping; and when exitting from included file, it also needs remap.

At line 173 in cpp_create_reader, linemap_init is used to initialize the line_maps.

 

32    void

33    linemap_init (struct line_maps *set)

34    {

35      set->maps = 0;

36      set->allocated = 0;

37      set->used = 0;

38      set->last_listed = -1;

39      set->trace_includes = false;

40      set->depth = 0;

41    }

 

cpp_create_reader (continue)

 

176   /* Initialize lexer state.  */

177    pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);

178 

179    /* Set up static tokens.  */

180    pfile->avoid_paste.type = CPP_PADDING;

181    pfile->avoid_paste.val.source = NULL;

182    pfile->eof.type = CPP_EOF;

183    pfile->eof.flags = 0;

 

The state field of cpp_reader, at line 177 above, records lexer state; avoid_paste field is a token forcing paste avoidance, it is of type cpp_token we will see in below. eof field stands for the end of the file.

 

cpp_create_reader (continue)

 

185    /* Create a token buffer for the lexer.  */

186    _cpp_init_tokenrun (&pfile->base_run, 250);

187    pfile->cur_run = &pfile->base_run;

188    pfile->cur_token = pfile->base_run.base;

 

base_run is doubly linked blocks of size of 250 tokens. At first, one block is allocated. It’s of type tokenrun which has definition as below.

 

158  typedef struct tokenrun tokenrun;

159  struct tokenrun                                                                                          in cpphash.h

160  {

161    tokenrun *next, *prev;

162    cpp_token *base, *limit;

163  };

 

644  void

645  _cpp_init_tokenrun (tokenrun *run, unsigned int count)                                 in cpplex.c

646  {

647    run->base = xnewvec (cpp_token, count);

648    run->limit = run->base + count;

649    run->next = NULL;

650  }

 

cpp_create_reader (continue)

 

190    /* Initialize the base context.  */

191    pfile->context = &pfile->base_context;

192    pfile->base_context.macro = 0;

193    pfile->base_context.prev = pfile->base_context.next = 0;

 

context field of cpp_reader is the buffer used for marco expansion purpose, it is a doubly linked list too. It is defined as following:

 

172  struct cpp_context                                                                        in cpphash.h

173  {

174    /* Doubly-linked list.  */

175    cpp_context *next, *prev;

176 

177    union

178    {

179      /* For ISO macro expansion. Contexts other than the base context

180        are contiguous tokens. e.g. macro expansions, expanded

181        argument tokens.  */

182      struct

183      {

184        union utoken first;

185        union utoken last;

186      } iso;

187 

188      /* For traditional macro expansion.  */

189      struct

190      {

191        const uchar *cur;

192        const uchar *rlimit;

193      } trad;

194    } u;

195 

196   /* If non-NULL, a buffer used for storage related to this context.

197      When the context is popped, the buffer is released.  */

198    _cpp_buff *buff;

199 

200   /* For a macro context, the macro node, otherwise NULL.  */

201    cpp_hashnode *macro;

202 

203    /* True if utoken element is token, else ptoken.  */

204    bool direct_p;

205  };

 

In line 177~194, the union is used to handle traditional and ISO macro expansion. Traditinal expansion will be used if we require the gcc to do the compilation with –traditional switch (to cope with the pre-standard language). The major difference between traditional and ISO macro is that the former expand to text rather than to a token sequence. So we see that between line 182~186, the struct used for ISO macros contains the union type of utoken which has definition as following.

 

151  union utoken                                                                                             in cpphash.h

152  {

153    const cpp_token *token;

154    const cpp_token **ptoken;

155  };

 

cpp_create_reader (continue)

 

196   /* Aligned and unaligned storage.  */

197    pfile->a_buff = _cpp_get_buff (pfile, 0);

198    pfile->u_buff = _cpp_get_buff (pfile, 0);

 

a_buff and u_buff are two other buffers used by cpp_reader, which u_buff is used for handling stringnize (preprocessing operator ‘#’), and a_buff is used for others. To facilitate the memory management, cpp_reader uses free_buffs to hold freed blocks from a_buff and u_buff. The buffer has following defintion and operation.

 

130  struct _cpp_buff                                                                                        in cpphash.h

131  {

132    struct _cpp_buff *next;

133    unsigned char *base, *cur, *limit;

134  };

 

1423 _cpp_buff *

1424 _cpp_get_buff (cpp_reader *pfile, size_t min_size)                                       in cpplex.c

1425 {

1426   _cpp_buff *result, **p;

1427

1428   for (p = &pfile->free_buffs;; p = &(*p)->next)

1429   {

1430     size_t size;

1431

1432     if (*p == NULL)

1433        return new_buff (min_size);

1434     result = *p;

1435     size = result->limit - result->base;

1436     /* Return a buffer that's big enough, but don't waste one that's

1437       way too big.  */

1438     if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))

1439        break;

1440   }

1441

1442   *p = result->next;

1443   result->next = NULL;

1444   result->cur = result->base;

1445   return result;

1446 }

 

1391 static _cpp_buff *

1392 new_buff (size_t len)                                                                                 in cpplex.c

1393 {

1394   _cpp_buff  *result;

1395   unsigned char *base;

1396

1397   if (len < MIN_BUFF_SIZE)

1398     len = MIN_BUFF_SIZE;

1399   len = CPP_ALIGN (len);

1400

1401   base = xmalloc (len + sizeof (_cpp_buff));

1402   result = (_cpp_buff *) (base + len);

1403   result->base = base;

1404   result->cur = base;

1405   result->limit = base + len;

1406   result->next = NULL;

1407   return result;

1408 }

你可能感兴趣的:(struct,buffer,token,character,include,Comments)