Studying note of GCC-3.4.6 source (37)

4.1.3.1.2.2.      Read in common file

Back should_stack_file, for common file, the whole file content will be put into buffer of cpp_reader.

 

should_stack_file (continue)

 

584      if (!read_file (pfile, file))

585        return false;

586   

587      /* Now we've read the file's contents, we can stack it if there

588        are no once-only files.  */

589      if (!pfile->seen_once_only)

590        return true;

 

For common file, reading in its content is quite straight-forward.

 

523    static bool

524    read_file (cpp_reader *pfile, _cpp_file *file)                                              in cppfiles.c

525    {

526      /* If we already have its contents in memory, succeed immediately.  */

527      if (file->buffer_valid)

528        return true;

529   

530      /* If an earlier read failed for some reason don't try again.  */

531      if (file->dont_read || file->err_no)

532        return false;

533   

534      if (file->fd == -1 && !open_file (file))

535      {

536        open_file_failed (pfile, file, 0);

537        return false;

538      }

539   

540      file->dont_read = !read_file_guts (pfile, file);

541      close (file->fd);

542      file->fd = -1;

543   

544      return !file->dont_read;

545    }

 

Nonzero dont_read field in cpp_reader above indicates if file has read failed. For that file don’t attempt to open it anymore.

 

450    static bool

451    read_file_guts (cpp_reader *pfile, _cpp_file *file)                                      in cppfiles.c

452    {

453      ssize_t size, total, count;

454      uchar *buf;

455      bool regular;

456     

457      if (S_ISBLK (file->st.st_mode))

458      {

459        cpp_error (pfile, CPP_DL_ERROR, "%s is a block device", file->path);

460        return false;

461      }

462   

463      regular = S_ISREG (file->st.st_mode);

464      if (regular)

465      {

466        /* off_t might have a wider range than ssize_t - in other words,

467          the max size of a file might be bigger than the address

468          space. We can't handle a file that large. (Anyone with

469          a single source file bigger than 2GB needs to rethink

470          their coding style.) Some systems (e.g. AIX 4.1) define

471          SSIZE_MAX to be much smaller than the actual range of the

472          type. Use INTTYPE_MAXIMUM unconditionally to ensure this

473          does not bite us.  */

474        if (file->st.st_size > INTTYPE_MAXIMUM (ssize_t))

475        {

476          cpp_error (pfile, CPP_DL_ERROR, "%s is too large", file->path);

477          return false;

478        }

479   

480        size = file->st.st_size;

481      }

482      else

483        /* 8 kilobytes is a sensible starting size. It ought to be bigger

484          than the kernel pipe buffer, and it's definitely bigger than

485          the majority of C source files.  */

486        size = 8 * 1024;

487   

488      buf = xmalloc (size + 1);

489      total = 0;

490      while ((count = read (file->fd, buf + total, size - total)) > 0)

491      {

492        total += count;

493   

494        if (total == size)

495        {

496          if (regular)

497            break;

498          size *= 2;

499          buf = xrealloc (buf, size + 1);

500        }

501      }

502   

503      if (count < 0)

504      {

505        cpp_errno (pfile, CPP_DL_ERROR, file->path);

506        return false;

507      }

508   

509      if (regular && total != size && STAT_SIZE_RELIABLE (file->st))

510        cpp_error (pfile, CPP_DL_WARNING,

511                 "%s is shorter than expected", file->path);

512   

513      file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),

514                                   buf, size, total, &file->st.st_size);

515      file->buffer_valid = true;

516   

517      return true;

518    }

 

Above _cpp_convert_input is used to convert the content if the file using charset other than UTF-8 or UTF-EBCDIC by using system call iconv. For the detail, you can refer to souce, cppcharset.c, strcasecmp.c.

If the file is not included by #pragma once or #import, after reading in the content, it exits should_stack_file at line 590. Otherwise, it should ensure only one copy of content is available.

 

should_stack_file (continue)

 

592      /* We may have read the file under a different name. Look

593        for likely candidates and compare file contents to be sure.  */

594      for (f = pfile->all_files; f; f = f->next_file)

595      {

596        if (f == file)

597          continue;

598   

599        if ((import || f->once_only)

600           && f->err_no == 0

601           && f->st.st_mtime == file->st.st_mtime

602           && f->st.st_size == file->st.st_size)

603        {

604          _cpp_file *ref_file;

605          bool same_file_p = false;

606   

607          if (f->buffer && !f->buffer_valid)

608          {

609            /* We already have a buffer but it is not valid, because

610              the file is still stacked. Make a new one.  */

611             ref_file = make_cpp_file (pfile, f->dir, f->name);

612            ref_file->path = f->path;

613          }

614          else

615            /* The file is not stacked anymore. We can reuse it.  */

616            ref_file = f;

617   

618          same_file_p = read_file (pfile, ref_file)

619             /* Size might have changed in read_file().  */

620             && ref_file->st.st_size == file->st.st_size

621             && !memcmp (ref_file->buffer,

622                                  file->buffer,

623                                  file->st.st_size);

624   

625          if (f->buffer && !f->buffer_valid)

626          {

627            ref_file->path = 0;

628            destroy_cpp_file (ref_file);

629          }

630   

631          if (same_file_p)

632            break;

633        }

634      }

635   

636      return f == NULL;

637    }

 

Above at line 594, all_files in cpp_reader saves all already read in files belonging to current compilation unit; while at line 622, file is the file just read in. Then at line 636, if is NULL, it means file doesn’t exists in all_files in cpp_reader, which should be stacked.

4.1.3.1.2.3.      Stack the file (exclude PCH file)

If should_stack_file returns true, the file needs to be stacked (see PCH file will cause the function returns false, it will not be stacked). At line 672 and 673 below, mi_valid and mi_cmacro are used for multiple include optimization, the detail of the technique is given below from [5]. We will see the steps in source code in later section:

Header files are often of the form

     #ifndef FOO

     #define FOO

     ...

     #endif

to prevent the compiler from processing them more than once. The preprocessor notices such header files, so that if the header file appears in a subsequent `#include' directive and `FOO' is defined, then it is ignored and it doesn't preprocess or even re-open the file a second time. This is referred to as the "multiple include optimization".

Under what circumstances is such an optimization valid? If the file were included a second time, it can only be optimized away if that inclusion would result in no tokens to return, and no relevant directives to process. Therefore the current implementation imposes requirements and makes some allowances as follows:

Ÿ        There must be no tokens outside the controlling `#if'-`#endif' pair, but whitespace and comments are permitted.

Ÿ        There must be no directives outside the controlling directive pair, but the "null directive" (a line containing nothing other than a single `#' and possibly whitespace) is permitted.

Ÿ        The opening directive must be of the form

          #ifndef FOO

     or

          #if !defined FOO     [equivalently, #if !defined(FOO)]

Ÿ        In the second form above, the tokens forming the `#if' expression must have come directly from the source file--no macro expansion must have been involved. This is because macro definitions can change, and tracking whether or not a relevant change has been made is not worth the implementation cost.

Ÿ        There can be no `#else' or `#elif' directives at the outer conditional block level, because they would probably contain something of interest to a subsequent pass.

First, when pushing a new file on the buffer stack, _stack_include_file sets the controlling macro mi_cmacro to `NULL', and sets mi_valid to `true'. This indicates that the preprocessor has not yet encountered anything that would invalidate the multiple-include optimization. As described in the next few paragraphs, these two variables having these values effectively indicates top-of-file.

When about to return a token that is not part of a directive, _cpp_lex_token sets mi_valid to `false'. This enforces the constraint that tokens outside the controlling conditional block invalidate the optimization.

The do_if, when appropriate, and do_ifndef directive handlers pass the controlling macro to the function push_conditional. Cpplib maintains a stack of nested conditional blocks, and after processing every opening conditional this function pushes an if_stack structure onto the stack. In this structure it records the controlling macro for the block, provided there is one and we're at top-of-file (as described above). If an `#elif' or `#else' directive is encountered, the controlling macro for that block is cleared to `NULL'. Otherwise, it survives until the `#endif' closing the block, upon which do_endif sets mi_valid to true and stores the controlling macro in mi_cmacro.

_cpp_handle_directive clears mi_valid when processing any directive other than an opening conditional and the null directive. With this, and requiring top-of-file to record a controlling macro, and no `#else' or `#elif' for it to survive and be copied to mi_cmacro by do_endif, we have enforced the absence of directives outside the main conditional block for the optimization to be on.

Note that whilst we are inside the conditional block, mi_valid is likely to be reset to `false', but this does not matter since the the closing `#endif' restores it to `true' if appropriate.

Finally, since _cpp_lex_direct pops the file off the buffer stack at `EOF' without returning a token, if the `#endif' directive was not followed by any tokens, mi_valid is `true' and _cpp_pop_file_buffer remembers the controlling macro associated with the file. Subsequent calls to _stack_include_file result in no buffer being pushed if the controlling macro is defined, effecting the optimization.

A quick word on how we handle the case.

     #if !defined FOO

_cpp_parse_expr and parse_defined take steps to see whether the three stages `!', `defined-expression' and `end-of-directive' occur in order in a `#if' expression. If so, they return the guard macro to do_if in the variable mi_ind_cmacro, and otherwise set it to `NULL'. enter_macro_context sets mi_valid to false, so if a macro was expanded whilst parsing any part of the expression, then the top-of-file test in push_conditional fails and the optimization is turned off.

 

_cpp_stack_file (continue)

 

652      sysp = MAX ((pfile->map ? pfile->map->sysp : 0),

653               (file->dir ? file->dir->sysp : 0));

654   

655      /* Add the file to the dependencies on its first inclusion.  */

656      if (CPP_OPTION (pfile, deps.style) > !!sysp && !file->stack_count)

657      {

658        if (!file->main_file || !CPP_OPTION (pfile, deps.ignore_main_file))

659          deps_add_dep (pfile->deps, file->path);

660      }

661   

662      /* Clear buffer_valid since _cpp_clean_line messes it up.  */

663      file->buffer_valid = false;

664      file->stack_count++;

665   

666      /* Stack the buffer.  */

667      buffer = cpp_push_buffer (pfile, file->buffer, file->st.st_size,

668                           CPP_OPTION (pfile, preprocessed));

669      buffer->file = file;

670   

671      /* Initialize controlling macro state.  */

672      pfile->mi_valid = true;

673      pfile->mi_cmacro = 0;

674   

675      /* Generate the call back.  */

676      _cpp_do_file_change (pfile, LC_ENTER, file->path, 1, sysp);

677   

678      return true;

679    }

4.1.3.1.2.3.1.              Do file change

When read the file (non-PCH file) into the buffer, lexer (it also provides some functionalities of preprocessor, like macro expansion), which output will be syntax analyzed by parser. But before switching buffers, there are some processings needed.

 

906    void

907    _cpp_do_file_change (cpp_reader *pfile, enum lc_reason reason,                  in cpplib.c

908                     const char *to_file, unsigned int file_line,

909                     unsigned int sysp)

910    {

911       pfile->map = linemap_add (&pfile->line_maps, reason, sysp,

912                           pfile->line, to_file, file_line);

913   

914      if (pfile->cb.file_change)

915        pfile->cb.file_change (pfile, pfile->map);

916    }

 

Notice that line in pfile above records the current position in the file in line number, and parameter set in linemap_add refers to line_maps of pfile. Then parameters to_file and to_line mention to the included file (if the file is introduced by #include) or the file itself. At here, as we first open the interested file, to_line is 1. Slot used in line_maps indicates how many files have been line mapped within this file context, and field depth indicates the position in the include list.

 

76      const struct line_map *

77      linemap_add (struct line_maps *set, enum lc_reason reason,                        in line-map.c

78                unsigned int sysp, source_location from_line,

79                const char *to_file, unsigned int to_line)

80      {

81        struct line_map *map;

82     

83        if (set->used && from_line < set->maps[set->used - 1].from_line)

84          abort ();

85     

86        if (set->used == set->allocated)

87        {

88          set->allocated = 2 * set->allocated + 256;

89          set->maps = xrealloc (set->maps, set->allocated * sizeof (struct line_map));

90        }

91     

92        map = &set->maps[set->used++];

93     

94        if (to_file && *to_file == '/0')

95          to_file = "<stdin>";

96     

97        /* If we don't keep our line maps consistent, we can easily

98          segfault. Don't rely on the client to do it for us.  */

99        if (set->depth == 0)

100        reason = LC_ENTER;

101      else if (reason == LC_LEAVE)

102      {

103        struct line_map *from;

104        bool error;

105   

106        if (MAIN_FILE_P (map - 1))

107        {

108          if (to_file == NULL)

109          {

110             set->depth--;

111             set->used--;

112             return NULL;

113           }

114           error = true;

115           reason = LC_RENAME;

116           from = map - 1;

117         }

118         else

119         {

120          from = INCLUDED_FROM (set, map - 1);

121          error = to_file && strcmp (from->to_file, to_file);

122        }

123   

124        /* Depending upon whether we are handling preprocessed input or

125          not, this can be a user error or an ICE.  */

126        if (error)

127          fprintf (stderr, "line-map.c: file /"%s/" left but not entered/n",

128                to_file);

129   

130        /* A TO_FILE of NULL is special - we use the natural values.  */

131        if (error || to_file == NULL)

132        {

133          to_file = from->to_file;

134          to_line = LAST_SOURCE_LINE (from) + 1;

135          sysp = from->sysp;

136        }

137      }

138   

139      map->reason = reason;

140      map->sysp = sysp;

141      map->from_line = from_line;

142      map->to_file = to_file;

143      map->to_line = to_line;

144   

145      if (reason == LC_ENTER)

146      {

147        map->included_from = set->depth == 0 ? -1 : (int) (set->used - 2);

148        set->depth++;

149        if (set->trace_includes)

150          trace_include (set, map);

151      }

152      else if (reason == LC_RENAME)

153        map->included_from = map[-1].included_from;

154      else if (reason == LC_LEAVE)

155      {

156        set->depth--;

157        map->included_from = INCLUDED_FROM (set, map - 1)->included_from;

158      }

159   

160      return map;

161    }

 

See that at line 106 above, (map-1) is used as the parameter in MAIN_FILE_P.

 

120    #define MAIN_FILE_P(MAP) ((MAP)->included_from < 0)                             in line-map.h

 

And at line 120 INCLUDED_FROM is defined as:

 

117     #define INCLUDED_FROM(SET, MAP) (&(SET)->maps[(MAP)->included_from])

 

To demonstrate how line_maps and linemap_add works, assuming that we have following file stack:

Current file ß include file1 ß include file2 ß include file3

When we are handling file3, we have line_maps as below figure.

Figure 14 linemap_add, step 1

Then if we leave file3 back into file2, we have following line_maps.

 

 

 

 Figure 15 linemap_add, step 2

Then back to file1.

Figure 16 linemap_add, step 3

And even leave current file. Notice that for the case, NULL will be returned, element at position of index 5 will be reused for the next “main file”, and 5 elements before it track the mapping relation of all related files to the preprocessed output.

Figure 17 linemap_add, step 4

At line 914 in _cpp_do_file_change, if the front-end at time of file change also has something to say, it needs set hook file_change in cb structure in cpp_reader. The C++ front-end binds the hook with below function.

 

1500 static void

1501 cb_file_change (cpp_reader *pfile ATTRIBUTE_UNUSED,                          in c-opts.c

1502                const struct line_map *new_map)c

1503 {

1504   if (flag_preprocess_only)

1505     pp_file_change (new_map);

1506   else

1507     fe_file_change (new_map);

1508

1509   if (new_map == 0 || (new_map->reason == LC_LEAVE && MAIN_FILE_P (new_map)))

1510     push_command_line_include ();

1511 }

 

Below, macro NO_IMPLICIT_EXTERN_C is defined if the system header files support C++ as well as C. This macro inhibits the usual method of using system header files in C++, which is to pretend that the file's contents are enclosed in `extern "C" {...}'. This macro is defined in Linux. Below input_filename and input_line access line and file fields of input_location respectively.

 

203    void

204    fe_file_change (const struct line_map *new_map)                                      in c-lex.c

205    {

206      if (new_map == NULL)

207      {

208        map = NULL;

209        return;

210      }

211    

212      if (new_map->reason == LC_ENTER)

213      {

214        /* Don't stack the main buffer on the input stack;

215          we already did in compile_file.  */

216        if (map != NULL)

217        {

218          int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);

219   

220          input_line = included_at;

221          push_srcloc (new_map->to_file, 1);

222          (*debug_hooks->start_source_file) (included_at, new_map->to_file);

223    #ifndef NO_IMPLICIT_EXTERN_C

   

231    #endif

232        }

233      }

234      else if (new_map->reason == LC_LEAVE)

235      {

236    #ifndef NO_IMPLICIT_EXTERN_C

   

243    #endif

244        pop_srcloc ();

245   

246        (*debug_hooks->end_source_file) (new_map->to_line);

247      }

248   

249      update_header_times (new_map->to_file);

250      in_system_header = new_map->sysp != 0;

251      input_filename = new_map->to_file;

252      input_line = new_map->to_line;

253      map = new_map;

254   

255      /* Hook for C++.  */

256      extract_interface_info ();

257    }

 

Then for included file, push_srcloc, pop_srcloc, start_source_file, and end_source_file are invoked for debug diagnostic purpose.

 

你可能感兴趣的:(File,buffer,input,include,optimization,preprocessor)