Back to toplev_main, as we mentioned before, it’s appreciate that front-end can make new language introducation easy. In GCC, the front-end provides the famework for language, all language related treatments are designed as callback. It also bundles all these callbacks into a structure as following.
212 struct lang_hooks in langhooks.h
213 {
214 /* String identifying the front end. e.g. "GNU C++". */
215 const char *name;
216
217 /* sizeof (struct lang_identifier), so make_node () creates
218 identifier nodes long enough for the language-specific slots. */
219 size_t identifier_size;
220
221 /* Determines the size of any language-specific 'x' or 'c' nodes.
222 Since it is called from make_node, the only information available
223 is the tree code. Expected to abort on unrecognized codes. */
224 size_t (*tree_size) (enum tree_code);
225
226 /* The first callback made to the front end, for simple
227 initialization needed before any calls to handle_option. Return
228 the language mask to filter the switch array with. */
229 unsigned int (*init_options) (unsigned int argc, const char **argv);
230
231 /* Callback used to perform language-specific initialization for the
232 global diagnostic context structure. */
233 void (*initialize_diagnostics) (struct diagnostic_context *);
234
235 /* Handle the switch CODE, which has real type enum opt_code from
236 options.h. If the switch takes an argument, it is passed in ARG
237 which points to permanent storage. The handler is responsible for
238 checking whether ARG is NULL, which indicates that no argument
239 was in fact supplied. For -f and -W switches, VALUE is 1 or 0
240 for the positive and negative forms respectively.
241
242 Return 1 if the switch is valid, 0 if invalid, and -1 if it's
243 valid and should not be treated as language-independent too. */
244 int (*handle_option) (size_t code, const char *arg, int value);
245
246 /* Return false to use the default complaint about a missing
247 argument, otherwise output a complaint and return true. */
248 bool (*missing_argument) (const char *opt, size_t code);
249
250 /* Called when all command line options have been parsed to allow
251 further processing and initialization
252
253 Should return true to indicate that a compiler back-end is
254 not required, such as with the -E option.
255
256 If errorcount is nonzero after this call the compiler exits
257 immediately and the finish hook is not called. */
258 bool (*post_options) (const char **);
259
260 /* Called after post_options to initialize the front end. Return
261 false to indicate that no further compilation be performed, in
262 which case the finish hook is called immediately. */
263 bool (*init) (void);
264
265 /* Called at the end of compilation, as a finalizer. */
266 void (*finish) (void);
267
268 /* Parses the entire file. The argument is nonzero to cause bison
269 parsers to dump debugging information during parsing. */
270 void (*parse_file) (int);
271
272 /* Called immediately after parsing to clear the binding stack. */
273 void (*clear_binding_stack) (void);
274
275 /* Called to obtain the alias set to be used for an expression or type.
276 Returns -1 if the language does nothing special for it. */
277 HOST_WIDE_INT (*get_alias_set) (tree);
278
279 /* Called with an expression that is to be processed as a constant.
280 Returns either the same expression or a language-independent
281 constant equivalent to its input. */
282 tree (*expand_constant) (tree);
283
284 /* Called by expand_expr for language-specific tree codes.
285 Fourth argument is actually an enum expand_modifier. */
286 rtx (*expand_expr) (tree, rtx, enum machine_mode, int, rtx *);
287
288 /* Prepare expr to be an argument of a TRUTH_NOT_EXPR or other logical
289 operation.
290
291 This preparation consists of taking the ordinary representation
292 of an expression expr and producing a valid tree boolean
293 expression describing whether expr is nonzero. We could simply
294 always do build_binary_op (NE_EXPR, expr, integer_zero_node, 1),
295 but we optimize comparisons, &&, ||, and !.
296
297 The result should be an expression of boolean type (if not an
298 error_mark_node). */
299 tree (*truthvalue_conversion) (tree);
300
301 /* Hook called by safe_from_p for language-specific tree codes. It is
302 up to the language front-end to install a hook if it has any such
303 codes that safe_from_p needs to know about. Since same_from_p will
304 recursively explore the TREE_OPERANDs of an expression, this hook
305 should not reexamine those pieces. This routine may recursively
306 call safe_from_p; it should always pass `0' as the TOP_P
307 parameter. */
308 int (*safe_from_p) (rtx, tree);
309
310 /* Function to finish handling an incomplete decl at the end of
311 compilation. Default hook is does nothing. */
312 void (*finish_incomplete_decl) (tree);
313
314 /* Function used by unsafe_for_reeval. A non-negative number is
315 returned directly from unsafe_for_reeval, a negative number falls
316 through. The default hook returns a negative number. */
317 int (*unsafe_for_reeval) (tree);
318
319 /* Mark EXP saying that we need to be able to take the address of
320 it; it should not be allocated in a register. Return true if
321 successful. */
322 bool (*mark_addressable) (tree);
323
324 /* Hook called by staticp for language-specific tree codes. */
325 int (*staticp) (tree);
326
327 /* Replace the DECL_LANG_SPECIFIC data, which may be NULL, of the
328 DECL_NODE with a newly GC-allocated copy. */
329 void (*dup_lang_specific_decl) (tree);
330
331 /* Called before its argument, an UNSAVE_EXPR, is to be
332 unsaved. Modify it in-place so that all the evaluate only once
333 things are cleared out. */
334 tree (*unsave_expr_now) (tree);
335
336 /* Called by expand_expr to build and return the cleanup-expression
337 for the passed TARGET_EXPR. Return NULL if there is none. */
338 tree (*maybe_build_cleanup) (tree);
339
340 /* Set the DECL_ASSEMBLER_NAME for a node. If it is the sort of
341 thing that the assembler should talk about, set
342 DECL_ASSEMBLER_NAME to an appropriate IDENTIFIER_NODE.
343 Otherwise, set it to the ERROR_MARK_NODE to ensure that the
344 assembler does not talk about it. */
345 void (*set_decl_assembler_name) (tree);
346
347 /* Return nonzero if fold-const is free to use bit-field
348 optimizations, for instance in fold_truthop(). */
349 bool (*can_use_bit_fields_p) (void);
350
351 /*Nonzero if TYPE_READONLY and TREE_READONLY should always be honored.*/
352 bool honor_readonly;
353
354 /* Nonzero if this front end does not generate a dummy BLOCK between
355 the outermost scope of the function and the FUNCTION_DECL. See
356 is_body_block in stmt.c, and its callers. */
357 bool no_body_blocks;
358
359 /* The front end can add its own statistics to -fmem-report with
360 this hook. It should output to stderr. */
361 void (*print_statistics) (void);
362
363 /* Called by print_tree when there is a tree of class 'x' that it
364 doesn't know how to display. */
365 lang_print_tree_hook print_xnode;
366
367 /* Called to print language-dependent parts of a class 'd', class
368 't', and IDENTIFIER_NODE nodes. */
369 lang_print_tree_hook print_decl;
370 lang_print_tree_hook print_type;
371 lang_print_tree_hook print_identifier;
372
373 /* Computes the name to use to print a declaration. DECL is the
374 non-NULL declaration in question. VERBOSITY determines what
375 information will be printed: 0: DECL_NAME, demangled as
376 necessary. 1: and scope information. 2: and any other
377 information that might be interesting, such as function parameter
378 types in C++. */
379 const char *(*decl_printable_name) (tree decl, int verbosity);
380
381 /* Given a CALL_EXPR, return a function decl that is its target. */
382 tree (*lang_get_callee_fndecl) (tree);
383
384 /* Called by report_error_function to print out function name. */
385 void (*print_error_function) (struct diagnostic_context *, const char *);
386
387 /* Called from expr_size to calculate the size of the value of an
388 expression in a language-dependent way. Returns a tree for the size
389 in bytes. A frontend can call lhd_expr_size to get the default
390 semantics in cases that it doesn't want to handle specially. */
391 tree (*expr_size) (tree);
392
393 /* Called from uninitialized_vars_warning to find out if a variable is
394 uninitialized based on DECL_INITIAL. */
395 bool (*decl_uninit) (tree);
396
397 /* Pointers to machine-independent attribute tables, for front ends
398 using attribs.c. If one is NULL, it is ignored. Respectively, a
399 table of attributes specific to the language, a table of
400 attributes common to two or more languages (to allow easy
401 sharing), and a table of attributes for checking formats. */
402 const struct attribute_spec *attribute_table;
403 const struct attribute_spec *common_attribute_table;
404 const struct attribute_spec *format_attribute_table;
405
406 /* Function-related language hooks. */
407 struct lang_hooks_for_functions function;
408
409 struct lang_hooks_for_tree_inlining tree_inlining;
410
411 struct lang_hooks_for_callgraph callgraph;
412
413 struct lang_hooks_for_tree_dump tree_dump;
414
415 struct lang_hooks_for_decls decls;
416
417 struct lang_hooks_for_types types;
418
419 struct lang_hooks_for_rtl_expansion rtl_expand;
420
421 /* Whenever you add entries here, make sure you adjust langhooks-def.h
422 and langhooks.c accordingly. */
423 };
It is a big and complex but important data structure; it has a global instance lang_hooks. Every front-end needs set this structure. It is initialized by macro LANG_HOOKS_INITIALIZER. Front-end doesn’t need provide the whole definition, for part front-end doesn’t define, default operation is assumed (commonly is harmless empty operation or returns false value). Following, with C++ front-end, we will see the functionality one by one.
No doubt different language has different compilation options set, and their processing is front-end dependent. Routine decode_options processes compilation options for the language. Note that the function can be invoked by all front-ends.
479 void
480 decode_options (unsigned int argc, const char **argv) in opts.c
481 {
482 unsigned int i, lang_mask;
483
484 /* Perform language-specific options initialization. */
485 lang_mask = (*lang_hooks.init_options) (argc, argv);
486
487 lang_hooks.initialize_diagnostics (global_dc);
For C/C++, they share the same init_options operation. It’s c_common_init_options. Its major work is to create a reader for source files for scanning program.
188 unsigned int
189 c_common_init_options (unsigned int argc, const char **argv) in c-opts.c
190 {
191 static const unsigned int lang_flags[] = {CL_C, CL_ObjC, CL_CXX, CL_ObjCXX};
192 unsigned int i, result;
193
194 /* This is conditionalized only because that is the way the front
195 ends used to do it. Maybe this should be unconditional? */
196 if (c_dialect_cxx ())
197 {
198 /* By default wrap lines at 80 characters. Is getenv
199 ("COLUMNS") preferable? */
200 diagnostic_line_cutoff (global_dc) = 80;
201 /* By default, emit location information once for every
202 diagnostic message. */
203 diagnostic_prefixing_rule (global_dc) = DIAGNOSTICS_SHOW_PREFIX_ONCE;
204 }
205
206 parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX: CLK_GNUC89,
207 ident_hash);
c_dialect_cxx at line 195 is a very simple macro to check if it is C++ dialect.
254 #define c_dialect_cxx() (c_language & clk_cxx) in c-common.h
c_language is an instance of an enum type of c_language_kind. It is initialized by front-end, for example, in C++ front-end, it is set as clk_cxx.
241 typedef enum c_language_kind in c-common.h
242 {
243 clk_c = 0, /* C90, C94 or C99 */
244 clk_objc = 1, /* clk_c with ObjC features. */
245 clk_cxx = 2, /* ANSI/ISO C++ */
246 clk_objcxx = 3 /* clk_cxx with ObjC features. */
247 }
248 c_language_kind;
249
250 /* To test for a specific language use c_language, defined by each
251 front end. For "ObjC features" or "not C++" use the macros. */
252 extern c_language_kind c_language; in c-common.h