Back should_stack_file, for common file, the whole file content will be put into buffer of cpp_reader.
should_stack_file (continue)
584 if (!read_file (pfile, file))
585 return false;
586
587 /* Now we've read the file's contents, we can stack it if there
588 are no once-only files. */
589 if (!pfile->seen_once_only)
590 return true;
For common file, reading in its content is quite straight-forward.
523 static bool
524 read_file (cpp_reader *pfile, _cpp_file *file) in cppfiles.c
525 {
526 /* If we already have its contents in memory, succeed immediately. */
527 if (file->buffer_valid)
528 return true;
529
530 /* If an earlier read failed for some reason don't try again. */
531 if (file->dont_read || file->err_no)
532 return false;
533
534 if (file->fd == -1 && !open_file (file))
535 {
536 open_file_failed (pfile, file, 0);
537 return false;
538 }
539
540 file->dont_read = !read_file_guts (pfile, file);
541 close (file->fd);
542 file->fd = -1;
543
544 return !file->dont_read;
545 }
Nonzero dont_read field in cpp_reader above indicates if file has read failed. For that file don’t attempt to open it anymore.
450 static bool
451 read_file_guts (cpp_reader *pfile, _cpp_file *file) in cppfiles.c
452 {
453 ssize_t size, total, count;
454 uchar *buf;
455 bool regular;
456
457 if (S_ISBLK (file->st.st_mode))
458 {
459 cpp_error (pfile, CPP_DL_ERROR, "%s is a block device", file->path);
460 return false;
461 }
462
463 regular = S_ISREG (file->st.st_mode);
464 if (regular)
465 {
466 /* off_t might have a wider range than ssize_t - in other words,
467 the max size of a file might be bigger than the address
468 space. We can't handle a file that large. (Anyone with
469 a single source file bigger than 2GB needs to rethink
470 their coding style.) Some systems (e.g. AIX 4.1) define
471 SSIZE_MAX to be much smaller than the actual range of the
472 type. Use INTTYPE_MAXIMUM unconditionally to ensure this
473 does not bite us. */
474 if (file->st.st_size > INTTYPE_MAXIMUM (ssize_t))
475 {
476 cpp_error (pfile, CPP_DL_ERROR, "%s is too large", file->path);
477 return false;
478 }
479
480 size = file->st.st_size;
481 }
482 else
483 /* 8 kilobytes is a sensible starting size. It ought to be bigger
484 than the kernel pipe buffer, and it's definitely bigger than
485 the majority of C source files. */
486 size = 8 * 1024;
487
488 buf = xmalloc (size + 1);
489 total = 0;
490 while ((count = read (file->fd, buf + total, size - total)) > 0)
491 {
492 total += count;
493
494 if (total == size)
495 {
496 if (regular)
497 break;
498 size *= 2;
499 buf = xrealloc (buf, size + 1);
500 }
501 }
502
503 if (count < 0)
504 {
505 cpp_errno (pfile, CPP_DL_ERROR, file->path);
506 return false;
507 }
508
509 if (regular && total != size && STAT_SIZE_RELIABLE (file->st))
510 cpp_error (pfile, CPP_DL_WARNING,
511 "%s is shorter than expected", file->path);
512
513 file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),
514 buf, size, total, &file->st.st_size);
515 file->buffer_valid = true;
516
517 return true;
518 }
Above _cpp_convert_input is used to convert the content if the file using charset other than UTF-8 or UTF-EBCDIC by using system call iconv. For the detail, you can refer to souce, cppcharset.c, strcasecmp.c.
If the file is not included by #pragma once or #import, after reading in the content, it exits should_stack_file at line 590. Otherwise, it should ensure only one copy of content is available.
should_stack_file (continue)
592 /* We may have read the file under a different name. Look
593 for likely candidates and compare file contents to be sure. */
594 for (f = pfile->all_files; f; f = f->next_file)
595 {
596 if (f == file)
597 continue;
598
599 if ((import || f->once_only)
600 && f->err_no == 0
601 && f->st.st_mtime == file->st.st_mtime
602 && f->st.st_size == file->st.st_size)
603 {
604 _cpp_file *ref_file;
605 bool same_file_p = false;
606
607 if (f->buffer && !f->buffer_valid)
608 {
609 /* We already have a buffer but it is not valid, because
610 the file is still stacked. Make a new one. */
611 ref_file = make_cpp_file (pfile, f->dir, f->name);
612 ref_file->path = f->path;
613 }
614 else
615 /* The file is not stacked anymore. We can reuse it. */
616 ref_file = f;
617
618 same_file_p = read_file (pfile, ref_file)
619 /* Size might have changed in read_file(). */
620 && ref_file->st.st_size == file->st.st_size
621 && !memcmp (ref_file->buffer,
622 file->buffer,
623 file->st.st_size);
624
625 if (f->buffer && !f->buffer_valid)
626 {
627 ref_file->path = 0;
628 destroy_cpp_file (ref_file);
629 }
630
631 if (same_file_p)
632 break;
633 }
634 }
635
636 return f == NULL;
637 }
Above at line 594, all_files in cpp_reader saves all already read in files belonging to current compilation unit; while at line 622, file is the file just read in. Then at line 636, if is NULL, it means file doesn’t exists in all_files in cpp_reader, which should be stacked.
If should_stack_file returns true, the file needs to be stacked (see PCH file will cause the function returns false, it will not be stacked). At line 672 and 673 below, mi_valid and mi_cmacro are used for multiple include optimization, the detail of the technique is given below from [5]. We will see the steps in source code in later section:
Header files are often of the form #ifndef FOO #define FOO ... #endif to prevent the compiler from processing them more than once. The preprocessor notices such header files, so that if the header file appears in a subsequent `#include' directive and `FOO' is defined, then it is ignored and it doesn't preprocess or even re-open the file a second time. This is referred to as the "multiple include optimization". Under what circumstances is such an optimization valid? If the file were included a second time, it can only be optimized away if that inclusion would result in no tokens to return, and no relevant directives to process. Therefore the current implementation imposes requirements and makes some allowances as follows: There must be no tokens outside the controlling `#if'-`#endif' pair, but whitespace and comments are permitted. There must be no directives outside the controlling directive pair, but the "null directive" (a line containing nothing other than a single `#' and possibly whitespace) is permitted. The opening directive must be of the form #ifndef FOO or #if !defined FOO [equivalently, #if !defined(FOO)] In the second form above, the tokens forming the `#if' expression must have come directly from the source file--no macro expansion must have been involved. This is because macro definitions can change, and tracking whether or not a relevant change has been made is not worth the implementation cost. There can be no `#else' or `#elif' directives at the outer conditional block level, because they would probably contain something of interest to a subsequent pass. First, when pushing a new file on the buffer stack, _stack_include_file sets the controlling macro mi_cmacro to `NULL', and sets mi_valid to `true'. This indicates that the preprocessor has not yet encountered anything that would invalidate the multiple-include optimization. As described in the next few paragraphs, these two variables having these values effectively indicates top-of-file. When about to return a token that is not part of a directive, _cpp_lex_token sets mi_valid to `false'. This enforces the constraint that tokens outside the controlling conditional block invalidate the optimization. The do_if, when appropriate, and do_ifndef directive handlers pass the controlling macro to the function push_conditional. Cpplib maintains a stack of nested conditional blocks, and after processing every opening conditional this function pushes an if_stack structure onto the stack. In this structure it records the controlling macro for the block, provided there is one and we're at top-of-file (as described above). If an `#elif' or `#else' directive is encountered, the controlling macro for that block is cleared to `NULL'. Otherwise, it survives until the `#endif' closing the block, upon which do_endif sets mi_valid to true and stores the controlling macro in mi_cmacro. _cpp_handle_directive clears mi_valid when processing any directive other than an opening conditional and the null directive. With this, and requiring top-of-file to record a controlling macro, and no `#else' or `#elif' for it to survive and be copied to mi_cmacro by do_endif, we have enforced the absence of directives outside the main conditional block for the optimization to be on. Note that whilst we are inside the conditional block, mi_valid is likely to be reset to `false', but this does not matter since the the closing `#endif' restores it to `true' if appropriate. Finally, since _cpp_lex_direct pops the file off the buffer stack at `EOF' without returning a token, if the `#endif' directive was not followed by any tokens, mi_valid is `true' and _cpp_pop_file_buffer remembers the controlling macro associated with the file. Subsequent calls to _stack_include_file result in no buffer being pushed if the controlling macro is defined, effecting the optimization. A quick word on how we handle the case. #if !defined FOO _cpp_parse_expr and parse_defined take steps to see whether the three stages `!', `defined-expression' and `end-of-directive' occur in order in a `#if' expression. If so, they return the guard macro to do_if in the variable mi_ind_cmacro, and otherwise set it to `NULL'. enter_macro_context sets mi_valid to false, so if a macro was expanded whilst parsing any part of the expression, then the top-of-file test in push_conditional fails and the optimization is turned off. |
_cpp_stack_file (continue)
652 sysp = MAX ((pfile->map ? pfile->map->sysp : 0),
653 (file->dir ? file->dir->sysp : 0));
654
655 /* Add the file to the dependencies on its first inclusion. */
656 if (CPP_OPTION (pfile, deps.style) > !!sysp && !file->stack_count)
657 {
658 if (!file->main_file || !CPP_OPTION (pfile, deps.ignore_main_file))
659 deps_add_dep (pfile->deps, file->path);
660 }
661
662 /* Clear buffer_valid since _cpp_clean_line messes it up. */
663 file->buffer_valid = false;
664 file->stack_count++;
665
666 /* Stack the buffer. */
667 buffer = cpp_push_buffer (pfile, file->buffer, file->st.st_size,
668 CPP_OPTION (pfile, preprocessed));
669 buffer->file = file;
670
671 /* Initialize controlling macro state. */
672 pfile->mi_valid = true;
673 pfile->mi_cmacro = 0;
674
675 /* Generate the call back. */
676 _cpp_do_file_change (pfile, LC_ENTER, file->path, 1, sysp);
677
678 return true;
679 }
4.1.3.1.2.3.1. Do file change
When read the file (non-PCH file) into the buffer, lexer (it also provides some functionalities of preprocessor, like macro expansion), which output will be syntax analyzed by parser. But before switching buffers, there are some processings needed.
906 void
907 _cpp_do_file_change (cpp_reader *pfile, enum lc_reason reason, in cpplib.c
908 const char *to_file, unsigned int file_line,
909 unsigned int sysp)
910 {
911 pfile->map = linemap_add (&pfile->line_maps, reason, sysp,
912 pfile->line, to_file, file_line);
913
914 if (pfile->cb.file_change)
915 pfile->cb.file_change (pfile, pfile->map);
916 }
Notice that line in pfile above records the current position in the file in line number, and parameter set in linemap_add refers to line_maps of pfile. Then parameters to_file and to_line mention to the included file (if the file is introduced by #include) or the file itself. At here, as we first open the interested file, to_line is 1. Slot used in line_maps indicates how many files have been line mapped within this file context, and field depth indicates the position in the include list.
76 const struct line_map *
77 linemap_add (struct line_maps *set, enum lc_reason reason, in line-map.c
78 unsigned int sysp, source_location from_line,
79 const char *to_file, unsigned int to_line)
80 {
81 struct line_map *map;
82
83 if (set->used && from_line < set->maps[set->used - 1].from_line)
84 abort ();
85
86 if (set->used == set->allocated)
87 {
88 set->allocated = 2 * set->allocated + 256;
89 set->maps = xrealloc (set->maps, set->allocated * sizeof (struct line_map));
90 }
91
92 map = &set->maps[set->used++];
93
94 if (to_file && *to_file == '/0')
95 to_file = "<stdin>";
96
97 /* If we don't keep our line maps consistent, we can easily
98 segfault. Don't rely on the client to do it for us. */
99 if (set->depth == 0)
100 reason = LC_ENTER;
101 else if (reason == LC_LEAVE)
102 {
103 struct line_map *from;
104 bool error;
105
106 if (MAIN_FILE_P (map - 1))
107 {
108 if (to_file == NULL)
109 {
110 set->depth--;
111 set->used--;
112 return NULL;
113 }
114 error = true;
115 reason = LC_RENAME;
116 from = map - 1;
117 }
118 else
119 {
120 from = INCLUDED_FROM (set, map - 1);
121 error = to_file && strcmp (from->to_file, to_file);
122 }
123
124 /* Depending upon whether we are handling preprocessed input or
125 not, this can be a user error or an ICE. */
126 if (error)
127 fprintf (stderr, "line-map.c: file /"%s/" left but not entered/n",
128 to_file);
129
130 /* A TO_FILE of NULL is special - we use the natural values. */
131 if (error || to_file == NULL)
132 {
133 to_file = from->to_file;
134 to_line = LAST_SOURCE_LINE (from) + 1;
135 sysp = from->sysp;
136 }
137 }
138
139 map->reason = reason;
140 map->sysp = sysp;
141 map->from_line = from_line;
142 map->to_file = to_file;
143 map->to_line = to_line;
144
145 if (reason == LC_ENTER)
146 {
147 map->included_from = set->depth == 0 ? -1 : (int) (set->used - 2);
148 set->depth++;
149 if (set->trace_includes)
150 trace_include (set, map);
151 }
152 else if (reason == LC_RENAME)
153 map->included_from = map[-1].included_from;
154 else if (reason == LC_LEAVE)
155 {
156 set->depth--;
157 map->included_from = INCLUDED_FROM (set, map - 1)->included_from;
158 }
159
160 return map;
161 }
See that at line 106 above, (map-1) is used as the parameter in MAIN_FILE_P.
120 #define MAIN_FILE_P(MAP) ((MAP)->included_from < 0) in line-map.h
And at line 120 INCLUDED_FROM is defined as:
117 #define INCLUDED_FROM(SET, MAP) (&(SET)->maps[(MAP)->included_from])
To demonstrate how line_maps and linemap_add works, assuming that we have following file stack:
Current file ß include file1 ß include file2 ß include file3
When we are handling file3, we have line_maps as below figure.
Figure 14 linemap_add, step 1
Then if we leave file3 back into file2, we have following line_maps.
Figure 15 linemap_add, step 2
Then back to file1.
Figure 16 linemap_add, step 3
And even leave current file. Notice that for the case, NULL will be returned, element at position of index 5 will be reused for the next “main file”, and 5 elements before it track the mapping relation of all related files to the preprocessed output.
Figure 17 linemap_add, step 4
At line 914 in _cpp_do_file_change, if the front-end at time of file change also has something to say, it needs set hook file_change in cb structure in cpp_reader. The C++ front-end binds the hook with below function.
1500 static void
1501 cb_file_change (cpp_reader *pfile ATTRIBUTE_UNUSED, in c-opts.c
1502 const struct line_map *new_map)c
1503 {
1504 if (flag_preprocess_only)
1505 pp_file_change (new_map);
1506 else
1507 fe_file_change (new_map);
1508
1509 if (new_map == 0 || (new_map->reason == LC_LEAVE && MAIN_FILE_P (new_map)))
1510 push_command_line_include ();
1511 }
Below, macro NO_IMPLICIT_EXTERN_C is defined if the system header files support C++ as well as C. This macro inhibits the usual method of using system header files in C++, which is to pretend that the file's contents are enclosed in `extern "C" {...}'. This macro is defined in Linux. Below input_filename and input_line access line and file fields of input_location respectively.
203 void
204 fe_file_change (const struct line_map *new_map) in c-lex.c
205 {
206 if (new_map == NULL)
207 {
208 map = NULL;
209 return;
210 }
211
212 if (new_map->reason == LC_ENTER)
213 {
214 /* Don't stack the main buffer on the input stack;
215 we already did in compile_file. */
216 if (map != NULL)
217 {
218 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
219
220 input_line = included_at;
221 push_srcloc (new_map->to_file, 1);
222 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
223 #ifndef NO_IMPLICIT_EXTERN_C
…
231 #endif
232 }
233 }
234 else if (new_map->reason == LC_LEAVE)
235 {
236 #ifndef NO_IMPLICIT_EXTERN_C
…
243 #endif
244 pop_srcloc ();
245
246 (*debug_hooks->end_source_file) (new_map->to_line);
247 }
248
249 update_header_times (new_map->to_file);
250 in_system_header = new_map->sysp != 0;
251 input_filename = new_map->to_file;
252 input_line = new_map->to_line;
253 map = new_map;
254
255 /* Hook for C++. */
256 extract_interface_info ();
257 }
Then for included file, push_srcloc, pop_srcloc, start_source_file, and end_source_file are invoked for debug diagnostic purpose.